RubyGems - mspire - Versions diffs - 0.5.0 → 0.6.1 - Mend

mspire 0.5.0 → 0.6.1

Files changed (107) hide show

data/README.rdoc +24 -0
data/Rakefile +51 -0
data/VERSION +1 -0
data/lib/cv/description.rb +18 -0
data/lib/cv/param.rb +33 -0
data/lib/cv.rb +3 -0
data/lib/io/bookmark.rb +13 -0
data/lib/merge.rb +7 -0
data/lib/ms/cvlist.rb +76 -0
data/lib/ms/digester.rb +245 -0
data/lib/ms/fasta.rb +86 -0
data/lib/ms/ident/peptide/db.rb +243 -0
data/lib/ms/ident/peptide.rb +72 -0
data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
data/lib/ms/ident/peptide_hit.rb +26 -0
data/lib/ms/ident/pepxml/modifications.rb +83 -0
data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
data/lib/ms/ident/pepxml/parameters.rb +14 -0
data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
data/lib/ms/ident/pepxml/search_database.rb +49 -0
data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
data/lib/ms/ident/pepxml/search_hit.rb +144 -0
data/lib/ms/ident/pepxml/search_result.rb +35 -0
data/lib/ms/ident/pepxml/search_summary.rb +92 -0
data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
data/lib/ms/ident/pepxml.rb +112 -0
data/lib/ms/ident/protein.rb +33 -0
data/lib/ms/ident/protein_group.rb +80 -0
data/lib/ms/ident/search.rb +114 -0
data/lib/ms/ident.rb +37 -0
data/lib/ms/isotope/aa.rb +59 -0
data/lib/ms/mascot.rb +6 -0
data/lib/ms/mass/aa.rb +79 -0
data/lib/ms/mass.rb +55 -0
data/lib/ms/mzml/index_list.rb +98 -0
data/lib/ms/mzml/plms1.rb +34 -0
data/lib/ms/mzml.rb +197 -0
data/lib/ms/obo.rb +38 -0
data/lib/ms/plms1.rb +156 -0
data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
data/lib/ms/quant/qspec.rb +112 -0
data/lib/ms/spectrum.rb +154 -8
data/lib/ms.rb +3 -10
data/lib/msplat.rb +2 -0
data/lib/obo/ims.rb +5 -0
data/lib/obo/ms.rb +7 -0
data/lib/obo/ontology.rb +41 -0
data/lib/obo/unit.rb +5 -0
data/lib/openany.rb +23 -0
data/lib/write_file_or_string.rb +18 -0
data/obo/ims.obo +562 -0
data/obo/ms.obo +11677 -0
data/obo/unit.obo +2563 -0
data/spec/ms/cvlist_spec.rb +60 -0
data/spec/ms/digester_spec.rb +351 -0
data/spec/ms/fasta_spec.rb +100 -0
data/spec/ms/ident/peptide/db_spec.rb +108 -0
data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
data/spec/ms/ident/pepxml_spec.rb +442 -0
data/spec/ms/ident/protein_group_spec.rb +68 -0
data/spec/ms/mass_spec.rb +8 -0
data/spec/ms/mzml/index_list_spec.rb +122 -0
data/spec/ms/mzml/plms1_spec.rb +62 -0
data/spec/ms/mzml_spec.rb +50 -0
data/spec/ms/plms1_spec.rb +38 -0
data/spec/ms/quant/qspec_spec.rb +25 -0
data/spec/msplat_spec.rb +24 -0
data/spec/obo_spec.rb +25 -0
data/spec/spec_helper.rb +25 -0
data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
data/spec/testfiles/plms1/output.key +0 -0
metadata +157 -40
data/README +0 -77
data/changelog.txt +0 -196
data/lib/ms/calc.rb +0 -32
data/lib/ms/data/interleaved.rb +0 -60
data/lib/ms/data/lazy_io.rb +0 -73
data/lib/ms/data/lazy_string.rb +0 -15
data/lib/ms/data/simple.rb +0 -59
data/lib/ms/data/transposed.rb +0 -41
data/lib/ms/data.rb +0 -57
data/lib/ms/format/format_error.rb +0 -12
data/lib/ms/support/binary_search.rb +0 -126

data/lib/ms/ident/pepxml/sample_enzyme.rb ADDED Viewed

@@ -0,0 +1,165 @@
+require 'merge'
+require 'strscan'
+module MS ; end
+module MS::Ident ; end
+class MS::Ident::Pepxml ; end
+class MS::Ident::Pepxml::SampleEnzyme
+  include Merge
+  # an identifier
+  attr_accessor :name
+  # amino acids after which to cleave
+  attr_accessor :cut
+  # cleave at 'cut' amino acids UNLESS it is followed by 'no_cut'
+  attr_accessor :no_cut
+  # 'C' or 'N'
+  attr_accessor :sense
+  # Can pass in a name of an enzyme that is recognized (meaning there is a
+  # set_<name> method), or
+  #   trypsin
+  # For other enzymes, you must set :cut, :no_cut, :name, and :sense will
+  def initialize(arg={})
+    if arg.is_a?(String)
+      @name = arg
+      send("set_#{@name}".to_sym)
+    else
+      merge!(arg)
+    end
+  end
+  def set_trypsin
+    @sense = 'C'
+    @cut = 'KR'
+    @no_cut = 'P'
+  end
+  # if an xml builder object is given, it adds to the object and returns the
+  # builder object, otherwise it returns an xml fragment string
+  def to_xml(builder=nil)
+    xmlb = builder || Nokogiri::XML::Builder.new
+    xmlb.sample_enzyme(:name => name) do |xmlb|
+      xmlb.specificity(:cut => cut, :no_cut => no_cut, :sense => sense)
+    end
+    builder || xmlb.doc.root.to_xml
+  end
+  # returns self
+  def from_pepxml_node(node)
+    self.name = node['name']
+    ch = node.child
+    self.cut = ch['cut']
+    self.no_cut= ch['no_cut']
+    self.sense = ch['sense']
+    self
+  end
+  def self.from_pepxml_node(node)
+    self.new.from_pepxml_node(node)
+  end
+  # takes an amino acid sequence (e.g. PEPTIDE).
+  # returns the number of missed cleavages
+  def num_missed_cleavages(aaseq)
+    seq_to_scan = '  ' + aaseq + '  '
+    raise NotImplementedError, 'need to implement for N terminal sense'  if sense == 'N'
+    @num_missed_cleavages_regex =
+      if @num_missed_cleavages_regex ; @num_missed_cleavages_regex
+      else
+        regex_string = "[#{@cut}]"
+        if @no_cut and @no_cut != ''
+          regex_string << "[^#{@no_cut}]"
+        end
+        /#{regex_string}/
+      end
+    arr = aaseq.scan(@num_missed_cleavages_regex)
+    num = arr.size
+    if aaseq[-1,1] =~ @num_missed_cleavages_regex
+      num -= 1
+    end
+    num
+  end
+  # No arguments should contain non-standard amino acids
+  def num_tol_term(prev_aa, middle, next_aa)
+    raise NotImplementedError, 'need to implement for N terminal sense'  if sense == 'N'
+    no_cut = @no_cut || ''
+    num_tol = 0
+    last_of_middle = middle[-1,1]
+    first_of_middle = middle[0,1]
+    if ( @cut.include?(prev_aa) && !no_cut.include?(first_of_middle) ) || prev_aa == '-'
+      num_tol += 1
+    end
+    if @cut.include?(last_of_middle) && !no_cut.include?(next_aa) || next_aa == '-'
+      num_tol += 1
+    end
+    num_tol
+  end
+end
+###################################################
+###################################################
+###################################################
+###################################################
+# This is digestion methodology:
+=begin
+  # returns all peptides of missed cleavages <= 'missed_cleavages'
+  # so 2 missed cleavages will return all no missed cleavage peptides
+  # all 1 missed cleavages and all 2 missed cleavages.
+  # options:
+  def digest(string, missed_cleavages=0, options={})
+    raise NotImplementedError if @sense == 'N'
+    s = StringScanner.new(string)
+    no_cut_regex = Regexp.new("[#{@no_cut}]")
+    regex = Regexp.new("[#{@cut}]")
+    peps = []
+    last_pos = 0
+    current_pep = ''
+    loop do
+      if s.eos?
+        break
+      end
+      m = s.scan_until(regex)
+      if m  ## found a cut point
+        last_pos = s.pos
+        # is the next amino acid a no_cut?
+        if string[s.pos,1] =~ no_cut_regex
+          current_pep << m
+        else
+          # cut it
+          current_pep << m
+          peps << current_pep
+          current_pep = ''
+        end
+      else  ## didn't find a cut point
+        current_pep << string[last_pos..-1]
+        peps << current_pep
+        break
+      end
+    end
+    ## LOOP through and grab each set of missed cleavages from num down to 0
+    all_sets_of_peps = []
+    (0..missed_cleavages).to_a.reverse.each do |num_mc|
+      all_sets_of_peps.push( *(get_missed_cleavages(peps, num_mc)) )
+    end
+    all_sets_of_peps
+  end
+  # takes an array of peptides and returns an array containing 'num' missed
+  # cleavages
+  # DOES NOT contain peptides that contain < num of missed cleavages
+  # (i.e., will not return missed cleaveages of 1 or 2 if num == 3
+  def get_missed_cleavages(ar_of_peptide_seqs, num)
+    (0...(ar_of_peptide_seqs.size - num)).to_a.map do |i|
+      ar_of_peptide_seqs[i,num+1].join
+    end
+  end
+  def self.tryptic(string, missed_cleavages=0)
+    self.new("trypsin").digest(string, missed_cleavages)
+  end
+end
+=end

data/lib/ms/ident/pepxml/search_database.rb ADDED Viewed

@@ -0,0 +1,49 @@
+require 'ms/fasta'
+require 'merge'
+module MS ; end
+module MS::Ident ; end
+class MS::Ident::Pepxml
+  class SearchDatabase
+    include Merge
+    # required! the local, full path to the protein sequence database
+    attr_accessor :local_path
+    # required! 'AA' or 'NA'
+    attr_accessor :seq_type
+    # optional
+    attr_accessor :database_name
+    # optional
+    attr_accessor :orig_database_url
+    # optional
+    attr_accessor :database_release_date
+    # optional
+    attr_accessor :database_release_identifier
+    # optional
+    attr_accessor :size_of_residues
+    # takes a hash to fill in values
+    def initialize(hash={}, get_size_of_residues=false)
+      merge!(hash)
+      if get_size_of_residues && File.exist?(@local_path)
+        set_size_of_residues!
+      end
+    end
+    # returns self for chaining
+    def set_size_of_residues!
+      @size_of_residues = 0
+      MS::Fasta.foreach(@local_path) do |entry|
+        @size_of_residues += entry.sequence.size
+      end
+      self
+    end
+    def to_xml(builder)
+      attrs = [:local_path, :seq_type, :database_name, :orig_database_url, :database_release_date, :database_release_identifier, :size_of_residues].map {|k| v=send(k) ; [k, v] if v }.compact
+      builder.search_database(Hash[attrs])
+      builder
+    end
+  end
+end

data/lib/ms/ident/pepxml/search_hit/modification_info.rb ADDED Viewed

@@ -0,0 +1,79 @@
+require 'andand'
+require 'nokogiri'
+module MS ; end
+module MS::Ident ; end
+class MS::Ident::Pepxml ; end
+class MS::Ident::Pepxml::SearchHit ; end
+# Positions and masses of modifications
+MS::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass) do
+  ## Should be something like this:
+  # <modification_info mod_nterm_mass=" " mod_nterm_mass=" " modified_peptide=" ">
+  #   <mod_aminoacid_mass position=" " mass=" "/>
+  # </modification_info>
+  # e.g.:
+  # <modification_info modified_peptide="GC[546]M[147]PSKEVLSAGAHR">
+  #   <mod_aminoacid_mass position="2" mass="545.7160"/>
+  #   <mod_aminoacid_mass position="3" mass="147.1926"/>
+  # </modification_info>
+  # Mass of modified N terminus<
+  #attr_accessor :mod_nterm_mass
+  # Mass of modified C terminus<
+  #attr_accessor :mod_cterm_mass
+  # Peptide sequence (with indicated modifications)  I'm assuming that the
+  # native sequest indicators are OK here
+  #attr_accessor :modified_peptide
+  # These are objects of type: ...ModAminoacidMass
+  # position ranges from 1 to peptide length
+  #attr_accessor :mod_aminoacid_masses
+  def initialize(*args)
+    if args.first.is_a?(Hash)
+      args = args.first.values_at(*members)
+    end
+    super(*args)
+  end
+  # Will escape any xml special chars in modified_peptide
+  def to_xml(builder=nil)
+    xmlb = builder || Nokogiri::XML::Builder.new
+    ## Collect the modifications:
+    ## Create the attribute string:
+    atts = [:mod_nterm_mass, :mod_cterm_mass, :modified_peptide]
+    atts.map! {|at| (v=send(at)) && [at, v] }.compact
+    xmlb.modification_info(Hash[atts]) do |xmlb|
+      mod_aminoacid_masses.andand.each do |mod_aa_mass|
+        mod_aa_mass.to_xml(xmlb)
+      end
+    end
+    builder || xmlb.doc.root.to_s
+  end
+  def self.from_pepxml_node(node)
+    self.new.from_pepxml_node(node)
+  end
+  # returns self
+  def from_pepxml_node(node)
+    self[0] = node['modified_peptide']
+    self[2] = node['mod_nterm_mass']
+    self[3] = node['mod_cterm_mass']
+    _masses = []
+    node.children do |mass_n|
+      _masses << MS::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new([mass_n['position'].to_i, mass_n['mass'].to_f])
+    end
+    self.mod_aminoacid_masses = _masses
+    self
+  end
+end
+MS::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass = Struct.new(:position, :mass) do
+  def to_xml(builder)
+    builder.mod_aminoacid_mass(:position => position, :mass => mass)
+    builder
+  end
+end

data/lib/ms/ident/pepxml/search_hit.rb ADDED Viewed

@@ -0,0 +1,144 @@
+require 'set'
+require 'merge'
+require 'nokogiri'
+module MS ; end
+module MS::Ident ; end
+class MS::Ident::Pepxml
+  class MS::Ident::Pepxml::SearchHit
+    include Merge
+    DEFAULT_MEMBERS = [:hit_rank, :peptide, :peptide_prev_aa, :peptide_next_aa, :num_matched_ions, :tot_num_ions, :calc_neutral_pep_mass, :massdiff, :num_tol_term, :num_missed_cleavages, :is_rejected, :protein, :num_tot_proteins, :protein_desc, :calc_pI, :protein_mw, :modification_info, :search_scores, :spectrum_query]
+    Required = Set.new([:hit_rank, :peptide, :protein, :num_tot_proteins, :calc_neutral_pep_mass, :massdiff])
+    class << self
+      attr_writer :members
+      def members
+        @members || DEFAULT_MEMBERS
+      end
+    end
+    members.each {|memb| attr_accessor memb }
+    # rank of the peptide hit (required)
+    attr_accessor :hit_rank
+    # Peptide aminoacid sequence (with no indicated modifications) (required)
+    attr_accessor :peptide
+    # Aminoacid preceding peptide ('-' if none)
+    attr_accessor :peptide_prev_aa
+    # Aminoacid following peptide (- if none)
+    attr_accessor :peptide_next_aa
+    # Number of peptide fragment ions found in spectrum (Integer)
+    attr_accessor :num_matched_ions
+    # Number of peptide fragment ions predicted for peptide (Integer)
+    attr_accessor :tot_num_ions
+    # (required)
+    attr_accessor :calc_neutral_pep_mass
+    # Mass(precursor ion) - Mass(peptide) (required)
+    attr_accessor :massdiff
+    # Number of peptide termini consistent with cleavage by sample enzyme
+    attr_accessor :num_tol_term
+    # Number of sample enzyme cleavage sites internal to peptide<
+    attr_accessor :num_missed_cleavages
+    # Potential use in future for user manual validation (true/false)
+    # by default, this will be set to false
+    # (the xml is expressed as a 0 or 1)
+    attr_accessor :is_rejected
+    # a protein identifier string (required)
+    attr_accessor :protein
+    # Number of unique proteins in search database containing peptide
+    # (required)
+    attr_accessor :num_tot_proteins
+    # Extracted from search database
+    attr_accessor :protein_desc
+    attr_accessor :calc_pI
+    attr_accessor :protein_mw
+    # a ModificationInfo object
+    attr_accessor :modification_info
+    # a Hash with keys (the score type) and values
+    # (to_xml calls each_pair to generate the xml, so a Struct would also
+    # work)
+    attr_accessor :search_scores
+    # a link back to the spectrum_query object
+    attr_accessor :spectrum_query
+    Non_standard_amino_acid_char_re = %r{[^A-Z\.\-]}
+    alias_method :aaseq, :peptide
+    alias_method :aaseq=, :peptide=
+    # takes either a hash or an ordered list of values to set.
+    # yeilds an empty search_scores hash if given a block.
+    # mind that you set the ModificationInfo object as needed.
+    def initialize(*args, &block)
+      @search_scores = {}
+      if args.first.is_a?(Hash)
+        merge!(args.first)
+      else
+        self.class.members.zip(args) do |k,v|
+          send("#{k}=", v)
+        end
+      end
+      block.call(@search_scores) if block
+    end
+    def members
+      self.class.members
+    end
+    def to_xml(builder=nil)
+      xmlb = builder || Nokogiri::XML::Builder.new
+      attrs = members[0,14].map {|k| v=send(k) ; [k, v] if v }.compact
+      hash_attrs = Hash[attrs]
+      hash_attrs[:massdiff] = hash_attrs[:massdiff].to_plus_minus_string
+      xmlb.search_hit(hash_attrs) do |xmlb|
+        @modification_info.to_xml(xmlb) if @modification_info
+        @search_scores.each_pair {|k,v| xmlb.search_score(:name => k, :value => v) }
+      end
+      builder || xmlb.doc.root.to_xml
+    end
+    def from_pepxml_node(node)
+      node.attributes
+      self[0] = node['hit_rank'].to_i
+      self[1] = node['peptide']
+      self[2] = node['peptide_prev_aa']
+      self[3] = node['peptide_next_aa']
+      self[4] = node['protein']  ## will this be the string?? (yes, for now)
+      self[5] = node['num_tot_proteins'].to_i
+      self[6] = node['num_matched_ions'].to_i
+      self[7] = node['tot_num_ions'].to_i
+      self[8] = node['calc_neutral_pep_mass'].to_f
+      self[9] = node['massdiff'].to_f
+      self[10] = node['num_tol_term'].to_i
+      self[11] = node['num_missed_cleavages'].to_i
+      self[12] = node['is_rejected'].to_i
+      self
+    end
+    Simple = Struct.new(:id, :search, :aaseq, :charge, :search_scores)
+  end
+end

data/lib/ms/ident/pepxml/search_result.rb ADDED Viewed

@@ -0,0 +1,35 @@
+require 'nokogiri'
+require 'ms/ident/pepxml/search_hit'
+module MS ; end
+module MS::Ident ; end
+class MS::Ident::Pepxml ; end
+class MS::Ident::Pepxml::SearchResult
+  # an array of search_hits
+  attr_accessor :search_hits
+  # if block given, then yields an empty search_hits array.
+  # For consistency with other objects, will also take a hash that has the key
+  # :search_hits and the value an array.
+  def initialize(search_hits = [], &block)
+    @search_hits = search_hits
+    if search_hits.is_a?(Hash)
+      @search_hits = search_hits[:search_hits]
+    end
+    block.call(@search_hits) if block
+  end
+  def to_xml(builder=nil)
+    xmlb = builder || Nokogiri::XML::Builder.new
+    builder.search_result do |xmlb|
+      search_hits.each do |sh|
+        sh.to_xml(xmlb)
+      end
+    end
+    builder || xmlb.doc.root.to_xml
+  end
+end

data/lib/ms/ident/pepxml/search_summary.rb ADDED Viewed

@@ -0,0 +1,92 @@
+require 'ms/ident/pepxml/search_database'
+require 'ms/ident/pepxml/modifications'
+require 'ms/ident/pepxml/parameters'
+require 'nokogiri'
+require 'merge'
+module MS ; end
+module MS::Ident ; end
+class MS::Ident::Pepxml ; end
+# requires these keys:
+#
+#    :enzyme => a valid enzyme name
+#    :max_num_internal_cleavages => max number of internal cleavages allowed
+#    :min_number_termini => minimum number of termini??
+class MS::Ident::Pepxml::EnzymaticSearchConstraint < Hash
+end
+class MS::Ident::Pepxml::SearchSummary
+  include Merge
+  DEFAULT_SEARCH_ID = '1'
+  attr_accessor :base_name
+  # required in v18-19, optional in later versions
+  attr_accessor :out_data_type
+  # required in v18-19, optional in later versions
+  attr_accessor :out_data
+  # by default, "1"
+  attr_accessor :search_id
+  # an array of MS::Ident::Pepxml::Modification objects
+  attr_accessor :modifications
+  # A SearchDatabase object (responds to :local_path and :type)
+  attr_accessor :search_database
+  # the other search paramaters as a hash
+  attr_accessor :parameters
+  # the search engine used, SEQUEST, Mascot, Comet, etc.
+  attr_accessor :search_engine
+  # required: 'average' or 'monoisotopic'
+  attr_accessor :precursor_mass_type
+  # required: 'average' or 'monoisotopic'
+  attr_accessor :fragment_mass_type
+  # An EnzymaticSearchConstraint object (at the moment this is merely a hash
+  # with a few required keys
+  attr_accessor :enzymatic_search_constraint
+  def block_arg
+    [@search_database = MS::Ident::Pepxml::SearchDatabase.new,
+      @enzymatic_search_constraint = MS::Ident::Pepxml::EnzymaticSearchConstraint.new,
+      @modifications,
+      @parameters = MS::Ident::Pepxml::Parameters.new,
+    ]
+  end
+  # initializes modifications to an empty array
+  def initialize(hash={}, &block)
+    @modifications = []
+    @search_id = DEFAULT_SEARCH_ID
+    merge!(hash, &block)
+  end
+  def to_xml(builder=nil)
+    # TODO: out_data and out_data_type are optional in later pepxml versions...
+    # should work that in...
+    attrs = [:base_name, :search_engine, :precursor_mass_type, :fragment_mass_type, :out_data_type, :out_data, :search_id]
+    hash = Hash[ attrs.map {|at| v=send(at) ; [at, v] if v }.compact ]
+    xmlb = builder || Nokogiri::XML::Builder.new
+    builder.search_summary(hash) do |xmlb|
+      search_database.to_xml(xmlb)
+      xmlb.enzymatic_search_constraint(enzymatic_search_constraint) if enzymatic_search_constraint
+      modifications.each do |mod|
+        mod.to_xml(xmlb)
+      end
+      parameters.to_xml(xmlb) if parameters
+    end
+    builder || xmlb.doc.root.to_xml
+  end
+  def self.from_pepxml_node(node)
+    self.new.from_pepxml_node(node)
+  end
+  def from_pepxml_node(node)
+    raise NotImplementedError, "not implemented just yet (just use the raw xml node)"
+  end
+end

data/lib/ms/ident/pepxml/spectrum_query.rb ADDED Viewed

@@ -0,0 +1,85 @@
+require 'nokogiri'
+require 'ms/mass'
+require 'merge'
+require 'ms/ident/pepxml/search_result'
+module MS ; end
+module MS::Ident ; end
+class MS::Ident::Pepxml ; end
+# search_specification is a search constraint applied specifically to this query (a String)
+class MS::Ident::Pepxml::SpectrumQuery
+  include Merge
+  DEFAULT_MEMBERS = [:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :assumed_charge, :retention_time_sec, :search_specification, :search_results, :pepxml_version]
+  class << self
+    attr_writer :members
+    def members
+      @members || DEFAULT_MEMBERS
+    end
+  end
+  members.each {|memb| attr_accessor memb }
+  Required = Set.new([:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :assumed_charge])
+  Optional = [:retention_time_sec, :search_specification]
+  # takes either a hash or an ordered list of values to set
+  # yeilds an empty search_results array if given a block
+  def initialize(*args, &block)
+    @search_results = []
+    if args.first.is_a?(Hash)
+      merge!(args.first)
+    else
+      self.class.members.zip(args) do |k,v|
+        send("#{k}=", v)
+      end
+    end
+    block.call(@search_results) if block
+  end
+  def members
+    self.class.members
+  end
+  ############################################################
+  # FOR PEPXML:
+  ############################################################
+  def to_xml(builder=nil)
+    xmlb = builder || Nokogiri::XML::Builder.new
+    # all through search_specification
+    attrs = members[0, 8].map {|at| v=send(at) ; [at, v] if v }
+    attrs_hash = Hash[attrs]
+    case pepxml_version
+    when 18
+      attrs_hash.delete(:retention_time_sec)
+    end
+    xmlb.spectrum_query(attrs_hash) do |xmlb|
+      search_results.each do |search_result|
+        search_result.to_xml(xmlb)
+      end
+    end
+    builder || xmlb.doc.root.to_xml
+  end
+  def self.from_pepxml_node(node)
+    self.new.from_pepxml_node(node)
+  end
+  def from_pepxml_node(node)
+    @spectrum = node['spectrum']
+    @start_scan = node['start_scan'].to_i
+    @end_scan = node['end_scan'].to_i
+    @precursor_neutral_mass = node['precursor_neutral_mass'].to_f
+    @index = node['index'].to_i
+    @assumed_charge = node['assumed_charge'].to_i
+    self
+  end
+  def self.calc_precursor_neutral_mass(m_plus_h, deltamass, h_plus=MS::Mass::H_PLUS)
+    m_plus_h - h_plus + deltamass
+  end
+end