RubyGems - mspire - Versions diffs - 0.3.1 → 0.3.9 - Mend

mspire 0.3.1 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

data/Rakefile +2 -2
data/bin/bioworks_to_pepxml.rb +15 -3
data/bin/ms_to_lmat.rb +2 -1
data/bin/sqt_group.rb +26 -0
data/changelog.txt +36 -0
data/lib/ms/msrun.rb +3 -1
data/lib/ms/parser/mzdata/dom.rb +14 -14
data/lib/ms/scan.rb +3 -3
data/lib/mspire.rb +1 -1
data/lib/sample_enzyme.rb +39 -0
data/lib/spec_id.rb +18 -0
data/lib/spec_id/aa_freqs.rb +6 -9
data/lib/spec_id/digestor.rb +16 -17
data/lib/spec_id/mass.rb +63 -1
data/lib/spec_id/parser/proph.rb +101 -2
data/lib/spec_id/precision/filter.rb +3 -2
data/lib/spec_id/precision/filter/cmdline.rb +3 -1
data/lib/spec_id/precision/filter/output.rb +1 -0
data/lib/spec_id/precision/prob.rb +88 -21
data/lib/spec_id/precision/prob/cmdline.rb +28 -16
data/lib/spec_id/precision/prob/output.rb +8 -2
data/lib/spec_id/proph/pep_summary.rb +25 -12
data/lib/spec_id/sequest.rb +28 -0
data/lib/spec_id/sequest/pepxml.rb +142 -197
data/lib/spec_id/sqt.rb +349 -0
data/lib/spec_id/srf.rb +33 -23
data/lib/validator.rb +40 -57
data/lib/validator/aa.rb +3 -90
data/lib/validator/aa_est.rb +112 -0
data/lib/validator/cmdline.rb +163 -31
data/lib/validator/decoy.rb +15 -7
data/lib/validator/digestion_based.rb +5 -4
data/lib/validator/q_value.rb +32 -0
data/script/peps_per_bin.rb +67 -0
data/script/sqt_to_meta.rb +24 -0
data/specs/bin/bioworks_to_pepxml_spec.rb +3 -3
data/specs/bin/fasta_shaker_spec.rb +2 -2
data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +7 -10
data/specs/bin/filter_and_validate_spec.rb +25 -6
data/specs/bin/ms_to_lmat_spec.rb +2 -2
data/specs/bin/prob_validate_spec.rb +5 -3
data/specs/sample_enzyme_spec.rb +86 -1
data/specs/spec_helper.rb +11 -9
data/specs/spec_id/bioworks_spec.rb +2 -1
data/specs/spec_id/precision/filter_spec.rb +5 -5
data/specs/spec_id/precision/prob_spec.rb +0 -67
data/specs/spec_id/proph/pep_summary_spec.rb +42 -87
data/specs/spec_id/protein_summary_spec.rb +4 -4
data/specs/spec_id/sequest/pepxml_spec.rb +1 -79
data/specs/spec_id/sequest_spec.rb +38 -0
data/specs/spec_id/sqt_spec.rb +111 -3
data/specs/spec_id_spec.rb +2 -0
data/specs/transmem/phobius_spec.rb +3 -1
data/specs/transmem/toppred_spec.rb +1 -1
data/specs/validator/aa_est_spec.rb +66 -0
data/specs/validator/aa_spec.rb +1 -68
data/specs/validator/background_spec.rb +2 -0
data/specs/validator/bias_spec.rb +3 -27
data/specs/validator/decoy_spec.rb +2 -2
data/specs/validator/transmem_spec.rb +2 -1
data/test_files/small.sqt +87 -0
metadata +312 -293

data/lib/spec_id/precision/prob/output.rb CHANGED Viewed

@@ -1,4 +1,4 @@
+require 'yaml'
 require 'spec_id/precision/output'
 require 'table'
 require 'matrix'
@@ -12,12 +12,18 @@ class SpecID::Precision::Prob::Output
   # returns array of data arrays and parallel labels
   def to_cols_and_labels(answer_hash)
     col_labels = %w(count probability peptide)
+    col_labels[1] = 'q_values' if answer_hash.key?(:q_values)
     cols = []
     cols << answer_hash[:count]
-    cols << answer_hash[:probabilities]
+    if answer_hash.key?(:q_values)
+      cols << answer_hash[:q_values]
+    else
+      cols << answer_hash[:probabilities]
+    end
     cols << answer_hash[:aaseqs]
     # if there is a single modified peptide, we'll include the column
     if answer_hash.key?(:modified_peptides)
       cols << answer_hash[:modified_peptides]

data/lib/spec_id/proph/pep_summary.rb CHANGED Viewed

@@ -1,7 +1,6 @@
 require 'array_class'
-puts "REQUIRING"
-puts( require 'spec_id/sequest/pepxml' )
+require 'spec_id/sequest/pepxml'
 require 'spec_id/parser/proph'
 module Sequest ; end
@@ -13,10 +12,12 @@ module SpecID ; end
 module SpecID::Prot ; end
 module SpecID::Pep ; end
 module Proph
-  class PepSummary < Sequest::PepXML::MSMSRunSummary
-    # MSMSRunSummary is a SpecID object!
+  class PepSummary
+    include SpecID
     Filetype_and_version_re_new = /version="PeptideProphet v([\d\.]+) /
@@ -25,7 +26,7 @@ module Proph
     # the protein groups
     # currently these are just xml nodes returned!
     attr_accessor :peptideprophet_summary
-    attr_accessor :spectrum_queries
+    attr_accessor :msms_run_summaries
     attr_accessor :version
     def hi_prob_best ; true end
@@ -51,24 +52,26 @@ module Proph
     end
     def initialize(file=nil)
-      @prots = nil
       if file
         @version = get_version(file)
-        #@prot_groups = ProtSummary::Parser.new.parse_file(file)
-        SpecID::Parser::PepProph.new(:spec_id).parse(file, :spec_id => self)
+        spec_id = SpecID::Parser::PepProph.new(:spec_id).parse(file, :spec_id => self)
       end
     end
   end
+  # this is a SpecID::Pep (by interface: not including stuff yet)
   class PepSummary::Pep < Sequest::PepXML::SearchHit
-    %w(probability fval ntt nmc massd).each do |guy|
+    # aaseq is defined in SearchHit
+    %w(probability fval ntt nmc massd prots).each do |guy|
       self.add_member(guy)
     end
     # returns self
-    def from_pepxml_node(node, spec_query)
-      super(node, spec_query)
-      #pp_n = node.find_first('descendant::peptideprophet_result')
+    def from_pepxml_node(node)
+      super(node)
       an_res = node.find_first('child::analysis_result')
       pp_n = an_res.find_first('child::peptideprophet_result')
       self.probability = pp_n['probability'].to_f
@@ -87,6 +90,16 @@ module Proph
       self
     end
   end
+  ::Proph::PepSummary::Prot = ArrayClass.new(%w(name protein_descr peps))
+  class PepSummary::Prot
+    def first_entry ; self[0] end ## name
+    def reference ; self[0] + ' ' + self[1] end
+  end
 end

data/lib/spec_id/sequest.rb CHANGED Viewed

@@ -1,5 +1,33 @@
 require 'spec_id/sequest/params'
+require 'hash_by'
+require 'sort_by_attributes.rb'
 module Sequest
+  # returns one array of peptide hits:  indexes hits based on index_by, takes
+  # the uniq ones and then sorts the group by sort_by (compatible with
+  # sort_by_attributes) then slices from first_index to last_index
+  # (inclusive).
+  def self.other_hits(peps, first_index=1, last_index=9, index_by=[:base_name, :first_scan, :charge], sort_by=[:xcorr, {:down => :xcorr}])
+    all_hits = []
+    peps.hash_by(*index_by).each do |scan_key, peps_per_scan|
+      if peps_per_scan.size >= (first_index + 1)
+        all_hits.push( *(peps_per_scan.uniq.sort_by_attributes(*sort_by)[first_index..last_index]) )
+      end
+    end
+    all_hits.compact
+  end
+  def self.other_hits_sorted_by_xcorr(peps, first_index, last_index, index_by=[:base_name, :first_scan, :charge])
+    all_hits = []
+    peps.hash_by(*index_by).each do |scan_key, peps_per_scan|
+      if peps_per_scan.size >= (first_index + 1)
+        all_hits.push( *(peps_per_scan.uniq.sort_by {|x| x.xcorr }.reverse[first_index..last_index]) )
+      end
+    end
+    all_hits.compact
+  end
 end

data/lib/spec_id/sequest/pepxml.rb CHANGED Viewed

@@ -155,25 +155,6 @@ class Sequest::PepXML::MSMSRunSummary
     @ms_detector = node['msDetector']
     @raw_data_type = node['raw_data_type']
     @raw_data = node['raw_data']
-    sample_enzyme_n = node.find_first("child::sample_enzyme")
-    @sample_enzyme = SampleEnzyme.from_pepxml_node(sample_enzyme_n)
-    search_summary_n = sample_enzyme_n.find_first("following-sibling::search_summary")
-    spectrum_queries = search_summary_n.find("following-sibling::spectrum_query")
-    @spectrum_queries = spectrum_queries.map do |sq_n|
-      Sequest::PepXML::SpectrumQuery.from_pepxml_node(sq_n, self)
-    end
-    ## NOTE: this is currently just the xml node!!!! TODO: wrap everything up
-    #into a better search summary object (to eventually depracate the params object)
-    @search_summary = node  ## in future call SearchSummary.from_pepxml_node
-    @peps = []
-    @spectrum_queries.each do |sq|
-      sq.search_results.each do |sr|
-        @peps.push( *(sr.search_hits) )
-      end
-    end
     self
   end
 end
@@ -353,7 +334,13 @@ Default_Options = {
     search_summary = Sequest::PepXML::SearchSummary.new( params, modifications_string, search_summary_options)
     # create the sample enzyme from the params object:
-    opts[:sample_enzyme] = params.sample_enzyme
+    sample_enzyme_obj =
+      if opts[:sample_enzyme]
+        opts[:sample_enzyme]
+      else
+        params.sample_enzyme
+      end
+    opts[:sample_enzyme] = sample_enzyme_obj
     ## Create the pepxml obj and top level objects
     pepxml_obj = Sequest::PepXML.new(ppxml_version, params)
@@ -390,36 +377,20 @@ Default_Options = {
     out_files = srf.out_files
     spectrum_queries_arr = Array.new(srf.dta_files.size)
     files_with_hits_index = 0  ## will end up being 1 indexed
-    srf.dta_files.each_with_index do |dta_file,i|
-      next if out_files[i].num_hits == 0
-      files_with_hits_index += 1
-      # We don't need to sort the hits by xcorr since it comes pre-sorted in
-      # srf files!
-      #arr = hits.sort_by{|v| v.xcorr }
-      # Get proper deltacn and deltacnstar
-      # under new srf, deltacn is already corrected for what prophet wants,
-      # deltacn_orig is how to access the old one
-      # Prophet deltacn is not the same as the native Sequest deltacn
-      # It is the deltacn of the second best hit!
-      hits = out_files[i].hits
-      top_hit = hits[0]
-      second_hit = hits[1]
-      deltacnstar =
-        if second_hit ; '0'
-        else ; '1'
-        end
-      ## mass calculations:
-      precursor_neutral_mass = dta_file.mh - h_plus
-      calc_neutral_pep_mass = top_hit[0] - h_plus
-      (start_scan, end_scan, charge) = srf_index[i]
+    deltacn_orig = opts[:deltacn_orig]
+    deltacn_index =
+      if deltacn_orig ; 20
+      else 19
+      end
+    srf.dta_files.each_with_index do |dta_file,dta_i|
+      next if out_files[dta_i].num_hits == 0
+      files_with_hits_index += 1
+      precursor_neutral_mass = dta_file.mh - h_plus
+      (start_scan, end_scan, charge) = srf_index[dta_i]
       sq_hash = {
         :spectrum => [bn_noext, start_scan, end_scan, charge].join('.'),
         :start_scan => start_scan,
@@ -432,39 +403,70 @@ Default_Options = {
       spectrum_query = Sequest::PepXML::SpectrumQuery.new(sq_hash)
-      sequence = top_hit.sequence
-      #  NEED TO MODIFY SPLIT SEQUENCE TO DO MODS!
-      ## THIS IS ALL INNER LOOP, so we make every effort at speed here:
-      (prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(sequence)
-      # 0=mh 1=deltacn 2=sp 3=xcorr 4=id 5=num_other_loci 6=rsp 7=ions_matched 8=ions_total 9=sequence 10=prots 11=deltamass 12=ppm 13=aaseq 14=base_name 15=first_scan 16=last_scan 17=charge 18=srf 19=deltacn
-      sh_hash = {
-        :hit_rank => 1,
-        :peptide => pepseq,
-        :peptide_prev_aa => prevaa,
-        :peptide_next_aa => nextaa,
-        :protein => top_hit[10].first.reference.split(" ").first,
-        :num_tot_proteins => top_hit[10].size,
-        :num_matched_ions => top_hit[7],
-        :tot_num_ions => top_hit[8],
-        :calc_neutral_pep_mass => calc_neutral_pep_mass,
-        :massdiff => precursor_neutral_mass - calc_neutral_pep_mass,
-        :num_tol_term => Sequest::PepXML::SearchHit.calc_num_tol_term(params, sequence),
-        :num_missed_cleavages => Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, sequence),
-        :is_rejected => 0,
-        # These are search score attributes:
-        :xcorr => top_hit[3],
-        :deltacn => top_hit[19],
-        :deltacnstar => deltacnstar,
-        :spscore => top_hit[2],
-        :sprank => top_hit[6],
-        :modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(sequence)[1]),
-      }
-      search_hit = Sequest::PepXML::SearchHit.new(sh_hash) # there can be multiple hits
+      hits = out_files[dta_i].hits
+      search_hits =
+        if opts[:all_hits]
+          Array.new(out_files[dta_i].num_hits)  # all hits
+        else
+          Array.new(1)  # top hit only
+        end
+      (0...(search_hits.size)).each do |hit_i|
+        hit = hits[hit_i]
+        # under the modified deltacn schema (like bioworks)
+        # Get proper deltacn and deltacnstar
+        # under new srf, deltacn is already corrected for what prophet wants,
+        # deltacn_orig_updated is how to access the old one
+        # Prophet deltacn is not the same as the native Sequest deltacn
+        # It is the deltacn of the second best hit!
+        ## mass calculations:
+        calc_neutral_pep_mass = hit[0] - h_plus
+        sequence = hit.sequence
+        #  NEED TO MODIFY SPLIT SEQUENCE TO DO MODS!
+        ## THIS IS ALL INNER LOOP, so we make every effort at speed here:
+        (prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(sequence)
+        # 0=mh 1=deltacn_orig 2=sp 3=xcorr 4=id 5=num_other_loci 6=rsp 7=ions_matched 8=ions_total 9=sequence 10=prots 11=deltamass 12=ppm 13=aaseq 14=base_name 15=first_scan 16=last_scan 17=charge 18=srf 19=deltacn 20=deltacn_orig_updated
+        sh_hash = {
+          :hit_rank => hit_i+1,
+          :peptide => pepseq,
+          :peptide_prev_aa => prevaa,
+          :peptide_next_aa => nextaa,
+          :protein => hit[10].first.reference.split(" ").first,
+          :num_tot_proteins => hit[10].size,
+          :num_matched_ions => hit[7],
+          :tot_num_ions => hit[8],
+          :calc_neutral_pep_mass => calc_neutral_pep_mass,
+          :massdiff => precursor_neutral_mass - calc_neutral_pep_mass,
+          :num_tol_term => sample_enzyme_obj.num_tol_term(sequence),
+          :num_missed_cleavages => sample_enzyme_obj.num_missed_cleavages(pepseq),
+          :is_rejected => 0,
+          # These are search score attributes:
+          :xcorr => hit[3],
+          :deltacn => hit[deltacn_index],
+          :spscore => hit[2],
+          :sprank => hit[6],
+          :modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(sequence)[1]),
+        }
+        unless deltacn_orig
+          sh_hash[:deltacnstar] =
+            if hits[hit_i+1].nil?  # no next hit? then its deltacnstar == 1
+            '1'
+            else
+            '0'
+            end
+        end
+        search_hits[hit_i] = Sequest::PepXML::SearchHit.new(sh_hash) # there can be multiple hits
+      end
       search_result = Sequest::PepXML::SearchResult.new
-      search_result.search_hits = [search_hit]
+      search_result.search_hits = search_hits
       spectrum_query.search_results = [search_result]
       spectrum_queries_arr[files_with_hits_index] = spectrum_query
     end
@@ -473,56 +475,61 @@ Default_Options = {
     pipeline.msms_run_summary.spectrum_queries = spectrum_queries_arr
     pepxml_obj.base_name = pipeline.msms_run_summary.base_name
     pipeline.msms_run_summary.spectrum_queries =  spectrum_queries_arr
     pepxml_obj
   end
   # takes an .srg or bioworks.xml file
   # if possible, ensures that an mzXML file is present for each pepxml file
   # :print => true, will print files
+  # NOTES: num_tol_term and num_missing_cleavages are both calculated from the
+  # sample_enzyme.  Thus, a No_Enzyme search may still pass in a
+  # :sample_enzyme option to get these calculated.
   def self.set_from_bioworks(bioworks_file, opts={})
     opts = Default_Options.merge(opts)
     ## Create the out_path directory if necessary
-    unless File.exist? opts[:out_path]
-      FileUtils.mkpath(opts[:out_path])
-    end
-    unless File.directory? opts[:out_path]
-      abort "#{opts[:out_path]} must be a directory!"
-    end
-    spec_id = SpecID.new(bioworks_file)
-    pepxml_objs =
-    if spec_id.is_a? Bioworks
-      abort("must have opts[:params] set!") unless opts[:params]
-      set_from_bioworks_xml(bioworks_file, opts[:params], opts)
-    elsif spec_id.is_a? SRFGroup
-      spec_id.srfs.map do |srf|
-        new_from_srf(srf, opts)
+      unless File.exist? opts[:out_path]
+        FileUtils.mkpath(opts[:out_path])
       end
-    else
-      abort "invalid object"
-    end
-    if opts[:print]
-      pepxml_objs.each do |obj|
-        obj.to_pepxml(obj.base_name + ".xml")
+      unless File.directory? opts[:out_path]
+        abort "#{opts[:out_path]} must be a directory!"
+      end
+      spec_id = SpecID.new(bioworks_file)
+      pepxml_objs =
+        if spec_id.is_a? Bioworks
+          abort("must have opts[:params] set!") unless opts[:params]
+          set_from_bioworks_xml(bioworks_file, opts[:params], opts)
+        elsif spec_id.is_a? SRFGroup
+          spec_id.srfs.map do |srf|
+            new_from_srf(srf, opts)
+          end
+        else
+          abort "invalid object"
+        end
+      if opts[:print]
+        pepxml_objs.each do |obj|
+          obj.to_pepxml(obj.base_name + ".xml")
+        end
       end
+      pepxml_objs
     end
-    pepxml_objs
-  end
-  # Takes bioworks 3.2/3.3 xml output (with no filters)
-  # Returns a list of PepXML objects
-  # params = sequest.params file
-  # bioworks = bioworks.xml exported multi-consensus view file
-  # pepxml_version = 0 for tpp 1.2.3
-  # pepxml_version = 18 for tpp 2.8.2, 2.8.3, 2.9.2
+    # Takes bioworks 3.2/3.3 xml output (with no filters)
+    # Returns a list of PepXML objects
+    # params = sequest.params file
+    # bioworks = bioworks.xml exported multi-consensus view file
+    # pepxml_version = 0 for tpp 1.2.3
+    # pepxml_version = 18 for tpp 2.8.2, 2.8.3, 2.9.2
   def self.set_from_bioworks_xml(bioworks, params, opts={})
     opts = Default_Options.merge(opts)
     pepxml_version, ms_manufacturer, ms_model, ms_ionization, ms_mass_analyzer, ms_detector, raw_data_type, raw_data, out_data_type, out_data, ms_data, out_path = opts.values_at(:pepxml_version, :ms_manufacturer, :ms_model, :ms_ionization, :ms_mass_analyzer, :ms_detector, :raw_data_type, :raw_data, :out_data_type, :out_data, :ms_data, :out_path)
     unless out_path
       out_path = '.'
     end
@@ -545,6 +552,13 @@ Default_Options = {
     else                           ; abort "Don't recognize #{bioworks} as object or string!"
     end
+    sample_enzyme_obj =
+      if opts[:sample_enzyme]
+        opts[:sample_enzyme]
+      else
+        params.sample_enzyme
+      end
     #puts "bioworks.peps.size: #{bioworks.peps.size}"; #puts "bioworks.prots.size: #{bioworks.prots.size}"; #puts "Bioworks.version: #{bioworks.version}"
     ## TURN THIS ON IF YOU THINK YOU MIGHT NOT BE GETTING PEPTIDES from
@@ -589,7 +603,7 @@ Default_Options = {
           :ms_detector => ms_detector,
           :raw_data_type => raw_data_type,
           :raw_data => raw_data,
-          :sample_enzyme => params.sample_enzyme,
+          :sample_enzyme => sample_enzyme_obj, # usually, params.sample_enzyme,
           :search_summary => search_summary,
         })
         pipeline.msms_run_summary = msms_run_summary
@@ -626,10 +640,11 @@ Default_Options = {
       end
-      spectrum_queries_ar = pep_arr.hash_by(:first_scan, :last_scan, :charge).collect do |key,arr|
+      spectrum_queries_ar = pep_arr.hash_by(:first_scan, :last_scan, :charge).map do |key,arr|
         # Sort_by_rank and take the top hit (to mimick out2summary):
         arr = arr.sort_by {|pep| pep.xcorr.to_f } # ascending
         top_pep = arr.pop
         second_hit = arr.last # needed for deltacnstar
@@ -643,7 +658,7 @@ Default_Options = {
         end
         calc_neutral_pep_mass = (top_pep.mass.to_f - pepxml_obj.h_plus)
         # deltacn & star:
         # (NOTE: OLD?? out2summary wants the deltacn of the 2nd best hit.)
         if second_hit
@@ -685,8 +700,8 @@ Default_Options = {
           :tot_num_ions => tot_num_ions,
           :calc_neutral_pep_mass => calc_neutral_pep_mass,
           :massdiff => precursor_neutral_mass - calc_neutral_pep_mass,
-          :num_tol_term => Sequest::PepXML::SearchHit.calc_num_tol_term(params, top_pep.sequence),
-          :num_missed_cleavages => Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, top_pep.sequence),
+          :num_tol_term => sample_enzyme_obj.num_tol_term(top_pep.sequence),
+          :num_missed_cleavages => sample_enzyme_obj.num_missed_cleavages(pepseq),
           :is_rejected => 0,
           # These are search score attributes:
           :xcorr => top_pep.xcorr,
@@ -697,7 +712,7 @@ Default_Options = {
           :modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(top_pep.sequence)[1]),
           :spectrum_query => spec_query,
         })
-        search_result.search_hits = [search_hit]   # there can be multiple search hits
+        search_result.search_hits = [search_hit] # there can be multiple search hits
         spec_query.search_results = [search_result]  # can be multiple search_results
         spec_query
       end
@@ -766,9 +781,8 @@ class Sequest::PepXML::SearchResult
   attr_accessor :search_hits
   # if block given, then search_hits set to return value
-  def initialize
-    if block_given? ; @search_hits = yield
-    else ; @search_hits = [] end
+  def initialize(search_hits = [])
+    @search_hits = search_hits
   end
   def to_pepxml
@@ -777,17 +791,6 @@ class Sequest::PepXML::SearchResult
     end
   end
-  def self.from_pepxml_node(node, spec_query)
-    self.new.from_pepxml_node(node, spec_query)
-  end
-  def from_pepxml_node(node, spec_query, msmsrun_obj)
-    sh_klass = msmsrun_obj.search_hit_class
-    @search_hits = node.children.map do |sh_n|
-      sh_klass.from_pepxml_node(sh_n, spec_query)
-    end
-    self
-  end
 end
 class Sequest::PepXML::SearchSummary
@@ -820,7 +823,11 @@ class Sequest::PepXML::SearchSummary
   def to_pepxml
     element_xml(:search_summary, [:base_name, :search_engine, :precursor_mass_type, :fragment_mass_type, :out_data_type, :out_data, :search_id]) do
       search_database.to_pepxml +
-        short_element_xml(:enzymatic_search_constraint, [:enzyme, :max_num_internal_cleavages, :min_number_termini]) +
+        if @params.enzyme =~ /^No_Enzyme/
+          ''
+        else
+          short_element_xml(:enzymatic_search_constraint, [:enzyme, :max_num_internal_cleavages, :min_number_termini])
+        end +
         @modifications.to_pepxml +
         Sequest::PepXML::Parameters.new(@params).to_pepxml
     end
@@ -1216,21 +1223,17 @@ class Sequest::PepXML::SpectrumQuery
     end
   end
-  def self.from_pepxml_node(node, msmsrun_obj)
-    self.new.from_pepxml_node(node, msmsrun_obj)
+  def self.from_pepxml_node(node)
+    self.new.from_pepxml_node(node)
   end
-  def from_pepxml_node(node, msmsrun_obj)
+  def from_pepxml_node(node)
     self[0] = node['spectrum']
     self[1] = node['start_scan'].to_i
     self[2] = node['end_scan'].to_i
     self[3] = node['precursor_neutral_mass'].to_f
     self[4] = node['index'].to_i
     self[5] = node['assumed_charge'].to_i
-    self[6] = node.children.map do |v|
-      sh = Sequest::PepXML::SearchResult.new
-      sh.from_pepxml_node(v, self, msmsrun_obj)
-    end
     self
   end
@@ -1299,6 +1302,8 @@ class Sequest::PepXML::SearchHit
   Non_standard_amino_acid_char_re = /[^A-Z\.\-]/
+  def aaseq ; self[1] end
+  def aaseq=(arg) ; self[1] = arg end
   # These are all search_score elements:
@@ -1321,44 +1326,6 @@ class Sequest::PepXML::SearchHit
     "#<SearchHit #{var}>"
   end
-  # requires Params object and full sequence (with heads and tails)
-  def self.calc_num_missed_cleavages(params, sequence)
-    num_missed = 0
-    offset, split_after, except_before = params.enzyme_specificity
-    first, middle, last = SpecID::Pep.split_sequence(sequence)
-    to_regexp = "[#{split_after}]"
-    if except_before.size > 0
-      to_regexp << "[^#{except_before}]"
-    end
-    regexp = /#{to_regexp}/
-    arr = middle.scan(regexp)
-    num = arr.size
-    if middle[-1,1] =~ regexp
-      # if the regexp is a single letter (exceptions) and the last letter
-      # matches, then it will count when it is not a missed cleavage
-      # we can correct for this and get the right answer
-      num -= 1
-    else
-      num
-    end
-  end
-  # requires Params object and full sequence (with heads and tails)
-  def self.calc_num_tol_term(params, sequence)
-    num_tol = 0
-    offset, split_after, except_before = params.enzyme_specificity
-    first, middle, last = SpecID::Pep.split_sequence(sequence)
-    last_of_middle = middle[-1,1]
-    first_of_middle = middle[0,1]
-    if ( split_after.include?(first) && !except_before.include?(first_of_middle) ) || first == '-'
-      num_tol += 1
-    end
-    if split_after.include?(last_of_middle) && !except_before.include?(last) || last == '-'
-      num_tol += 1
-    end
-    num_tol
-  end
   # Takes ions in the form XX/YY and returns [XX.to_i, YY.to_i]
   def self.split_ions(ions)
     ions.split("/").map {|ion| ion.to_i }
@@ -1392,11 +1359,7 @@ class Sequest::PepXML::SearchHit
     end
   end
-  def self.from_pepxml_node(node, spec_query)
-    self.new.from_pepxml_node(node, spec_query)
-  end
-  def from_pepxml_node(node, spec_query)
+  def from_pepxml_node(node)
     self[0] = node['hit_rank'].to_i
     self[1] = node['peptide']
     self[2] = node['peptide_prev_aa']
@@ -1410,24 +1373,6 @@ class Sequest::PepXML::SearchHit
     self[10] = node['num_tol_term'].to_i
     self[11] = node['num_missed_cleavages'].to_i
     self[12] = node['is_rejected'].to_i
-    if modinfo_node = node.find_first("child::modification_info")
-      self[18] = Sequest::PepXML::SearchHit::ModificationInfo.from_pepxml_node(modinfo_node)
-    end
-    node.find("child::search_score").each do |ss_n|
-      case ss_n['name']
-      when 'deltacnstar'
-        self[13] = ss_n['value'].to_i
-      when 'xcorr'
-        self[14] = ss_n['value'].to_f
-      when 'deltacn'
-        self[15] = ss_n['value'].to_f
-      when 'spscore'
-        self[16] = ss_n['value'].to_f
-      when 'sprank'
-        self[17] = ss_n['value'].to_i
-      end
-    end
-    self[19] = spec_query
     self
   end