RubyGems - mspire - Versions diffs - 0.1.5 → 0.1.7 - Mend

mspire 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

data/Rakefile +5 -2
data/bin/bioworks_to_pepxml.rb +84 -40
data/bin/fasta_shaker.rb +100 -0
data/bin/filter_spec_id.rb +185 -23
data/bin/gi2annot.rb +2 -110
data/bin/id_class_anal.rb +31 -21
data/bin/id_precision.rb +12 -8
data/bin/{false_positive_rate.rb → precision.rb} +1 -1
data/bin/protein_summary.rb +55 -62
data/changelog.txt +34 -0
data/lib/align.rb +0 -1
data/lib/fasta.rb +88 -24
data/lib/gi.rb +114 -0
data/lib/roc.rb +64 -58
data/lib/spec_id/aa_freqs.rb +166 -0
data/lib/spec_id/bioworks.rb +5 -1
data/lib/spec_id/precision.rb +427 -0
data/lib/spec_id/proph.rb +2 -2
data/lib/spec_id/sequest.rb +810 -113
data/lib/spec_id/srf.rb +486 -0
data/lib/spec_id.rb +107 -23
data/release_notes.txt +11 -0
data/script/estimate_fpr_by_cysteine.rb +226 -0
data/script/filter-peps.rb +3 -3
data/script/find_cysteine_background.rb +137 -0
data/script/gen_database_searching.rb +11 -7
data/script/genuine_tps_and_probs.rb +136 -0
data/script/top_hit_per_scan.rb +5 -2
data/test/tc_aa_freqs.rb +59 -0
data/test/tc_bioworks.rb +6 -1
data/test/tc_bioworks_to_pepxml.rb +25 -18
data/test/tc_fasta.rb +81 -3
data/test/tc_fasta_shaker.rb +147 -0
data/test/tc_gi.rb +20 -0
data/test/tc_id_class_anal.rb +9 -12
data/test/tc_id_precision.rb +12 -11
data/test/{tc_false_positive_rate.rb → tc_precision.rb} +13 -22
data/test/tc_protein_summary.rb +31 -22
data/test/tc_roc.rb +95 -50
data/test/tc_sequest.rb +212 -145
data/test/tc_spec.rb +10 -5
data/test/tc_spec_id.rb +0 -2
data/test/tc_spec_id_xml.rb +36 -0
data/test/tc_srf.rb +216 -0
metadata +35 -21
data/lib/spec_id/false_positive_rate.rb +0 -476
data/test/tc_gi2annot.rb +0 -12

data/lib/spec_id/sequest.rb CHANGED Viewed

@@ -6,6 +6,74 @@ require 'set_from_hash'
 require 'spec_id/bioworks'
 require 'instance_var_set_from_hash'
 require 'spec/msrun'
+require 'spec_id/srf'
+class Numeric
+  # returns a string with a + or - on the front
+  def to_plus_minus_string
+    if self >= 0
+      '+' << self.to_s
+    else
+      '-' << self.to_s
+    end
+  end
+end
+##########################################
+# NEED TO ADD MODIFICATIONS and generally verify pepxml creation!!! :
+# HERE's an excerpt from an example file from tpp 2.9.2 that I'm going to follow:
+=begin
+<search_summary base_name="/regis/data3/search/akeller/LCQ/COMET/LIGHT/haloICAT2_41" search_engine="COMET" precursor_mass_type="average" fragment_mass_type="average">
+<sequence_search_constraint sequence="C"/>
+<aminoacid_modification aminoacid="C" massdiff="8.049" mass="553.765" variable="Y" binary="N"/>
+<aminoacid_modification aminoacid="C" massdiff="442.5772" mass="545.7160" variable="N"/>
+<aminoacid_modification aminoacid="M" massdiff="16.0000" mass="147.1926" variable="Y" binary="N" symbol="1"/>
+<parameter name="peptide_mass_tol" value="3.0000"/>
+<parameter name="peptide_mass_tol_units" value="DA"/>
+<parameter name="num_output_lines" value="10"/>
+<parameter name="remove_precursor_peak" value="0"/>
+<parameter name="num_dup_headers" value="1"/>
+<parameter name="email_address" value=""/>
+<parameter name="ion_series" value="010000010"/>
+<parameter name="max_num_var_mod_residues" value="3"/>
+<parameter name="md5_check_sum" value="2547286a77a35abe2af3f2e9825ab814"/>
+</search_summary>
+=end
+# and a guy with modifications:
+=begin
+<search_result spectrum="haloICAT2_41.1110.1110.2" start_scan="1110" end_scan="1110" precursor_neutral_mass="2000.6641" assumed_charge="2" index="28">
+<search_hit hit_rank="1" peptide="GCMPSKEVLSAGAHR" peptide_prev_aa="R" peptide_next_aa="Y" protein="Chr_ORF0132" num_tot_proteins="1" num_matched_ions="19" tot_num_ions="30" calc_neutral_pep_mass="2001.3685" massdiff="-0.704" num_tol_term="2" num_missed_cleavages="1" is_rejected="0">
+<modification_info modified_peptide="GC[546]M[147]PSKEVLSAGAHR">
+<mod_aminoacid_mass position="2" mass="545.7160"/>
+<mod_aminoacid_mass position="3" mass="147.1926"/>
+</modification_info>
+<search_score name="dotproduct" value="359"/>
+<search_score name="delta" value="0.296"/>
+<search_score name="deltastar" value="0"/>
+<search_score name="zscore" value="5.290"/>
+<search_score name="expect" value="0.000E+00"/>
+<peptideprophet_result probability="0.9994" all_ntt_prob="(0.3713,0.4360,0.9994)">
+<search_score_summary>
+<parameter name="fval" value="3.4002"/>
+<parameter name="ntt" value="2"/>
+<parameter name="nmc" value="1"/>
+<parameter name="massd" value="-0.704"/>
+</search_score_summary>
+</peptideprophet_result>
+=end
+# sequest.params option:
+# diff_search_options = 15.994910 M 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y
+# permanent mods are at the bottom: ...
+# add_A_Alanine = 0.0000                   ; added to A
+# add_S_Serine = 0.0000                    ; added to S
+# add_P_Proline = 0.0000                   ; added to P
+# add_V_Valine = 0.0000                    ; added to V
+# add_T_Threonine = 0.0000                 ; added to T
+# ...
 module SpecID::Sequest; end
 class SpecID::Sequest::PepXML; end
@@ -26,8 +94,12 @@ class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
     @xmlns = nil
     @xmlns_xsi = nil
     @xsi_schema_location = nil
-    self.set_from_hash(hash)
-    @msms_run_summary = yield
+    if hash
+      self.set_from_hash(hash)
+    end
+    if block_given?
+      @msms_run_summary = yield
+    end
   end
   # if no date string given, then it will set to Time.now
@@ -80,7 +152,8 @@ class SpecID::Sequest::PepXML::MSMSRunSummary
   include SpecIDXML
   # the version of TPP you are using (determines xml output)
-  # The name of the pep xml file (without extension)
+  # The name of the pep xml file (without extension) (but this is a long
+  # filename!!!)
   attr_accessor :base_name
   # The name of the mass spec manufacturer
   attr_accessor :ms_manufacturer
@@ -104,7 +177,9 @@ class SpecID::Sequest::PepXML::MSMSRunSummary
   # set to the return value of the block
   def initialize(hash=nil)
     @spectrum_queries = []
-    instance_var_set_from_hash(hash)
+    if hash
+      instance_var_set_from_hash(hash)
+    end
     if block_given? ; @spectrum_queries = yield end
   end
@@ -137,6 +212,8 @@ end
 class SpecID::Sequest::PepXML
   include SpecIDXML
+  ## CREATE a default version for the entire class
   class << self
     attr_accessor :pepxml_version
   end
@@ -144,7 +221,11 @@ class SpecID::Sequest::PepXML
   self.pepxml_version = DEF_VERSION # default version
   attr_accessor :pepxml_version, :msms_pipeline_analysis
+  ## the full path name (no extension)
   attr_accessor :base_name
+  attr_accessor :h_plus
+  attr_accessor :avg_parent
   #attr_accessor :spectrum_queries, :params, :base_name, :search_engine, :database, :raw_data_type, :raw_data, :out_data_type, :out_data, :sample_enzyme, :pepxml_version
   # returns an array of spectrum queries
@@ -153,10 +234,29 @@ class SpecID::Sequest::PepXML
   end
   # msms_pipeline_analysis is set to the result of the yielded block
-  def initialize(pepxml_version=DEF_VERSION)
+  # and set_mono_or_avg is called with params if given
+  def initialize(pepxml_version=DEF_VERSION, sequest_params_obj=nil)
     self.class.pepxml_version = pepxml_version
-    @msms_pipeline_analysis = yield
-    @base_name = @msms_pipeline_analysis.msms_run_summary.base_name
+    if sequest_params_obj
+      set_mono_or_avg(sequest_params_obj)
+    end
+    if block_given?
+      @msms_pipeline_analysis = yield
+      @base_name = @msms_pipeline_analysis.msms_run_summary.base_name
+    end
+  end
+  # sets @h_plus and @avg_parent from the sequest params object
+  def set_mono_or_avg(sequest_params_obj)
+    case sequest_params_obj.precursor_mass_type
+    when "monoisotopic" ; @avg_parent = false
+    else ; @avg_parent = true
+    end
+    case @avg_parent
+    when true ; @h_plus = SpecID::AVG[:h_plus]
+    when false ; @h_plus = SpecID::MONO[:h_plus]
+    end
   end
   def date
@@ -203,6 +303,190 @@ class SpecID::Sequest::PepXML
     end
   end
+  Default_Options = {
+    :out_path => nil,
+    :backup_db_path => '/project/marcotte/marcotte/ms/database',
+    # a PepXML option
+    :pepxml_version => DEF_VERSION,
+    ## MSMSRunSummary options:
+    # string must be recognized in sample_enzyme.rb
+    # or create your own SampleEnzyme object
+    :sample_enzyme => 'trypsin',
+    :ms_manufacturer => 'ThermoFinnigan',
+    :ms_model => 'LCQ Deca XP',
+    :ms_ionization => 'ESI',
+    :ms_mass_analyzer => 'Ion Trap',
+    :ms_detector => 'UNKNOWN',
+    :raw_data_type => "raw",
+    :raw_data => ".mzXML", ## even if you don't have it?
+    ## SearchSummary options:
+    :out_data_type => "out", ## may be srf?? don't think pepxml recognizes this yet
+    :out_data => ".tgz" ## may be srf??
+  }
+  # will dynamically set :ms_model and :ms_mass_analyzer from srf info
+  # (ignoring defaults or anything passed in) for LTQ Orbitrap
+  # and LCQ Deca XP
+  # See SRF::Sequest::PepXML::Default_Options hash for defaults
+  # unless given, the out_path will be given as the path of the srf_file
+  def self.new_from_srf(srf_file, opts={})
+    opts = Default_Options.merge(opts)
+    ## set the outpath
+    out_path = opts.delete(:out_path)
+    unless out_path
+      out_path = File.dirname(srf_file)
+    end
+    ## read the srf file
+    srf = SRF.new(srf_file)
+    params = srf.params
+    ## check to see if we need backup_db
+    backup_db_path = opts.delete(:backup_db_path)
+    unless File.exist? params.database
+      params.database_path = backup_db_path
+    end
+    #######################################################################
+    # PREPARE THE OPTIONS:
+    #######################################################################
+    ## remove items from the options hash that don't belong to
+    ppxml_version = opts.delete(:pepxml_version)
+    out_data_type = opts.delete(:out_data_type)
+    out_data = opts.delete(:out_data)
+    ## Extract meta info from srf
+    bn_noext = base_name_noext(srf.header.raw_filename)
+    opts[:ms_model] = srf.header.model
+    case opts[:ms_model]
+    when /Orbitrap/
+      opts[:ms_mass_analyzer] = 'Orbitrap'
+    when /LCQ Deca XP/
+      opts[:ms_mass_analyzer] = 'Ion Trap'
+    end
+    ## Create the base name
+    full_base_name_no_ext = make_base_name( File.expand_path(out_path), bn_noext)
+    opts[:base_name] = full_base_name_no_ext
+    ## Create the search summary:
+    search_summary_options = {
+      :search_database => SpecID::Sequest::PepXML::SearchDatabase.new(params),
+      :base_name => full_base_name_no_ext,
+      :out_data_type => out_data_type,
+      :out_data => out_data
+    }
+    opts[:search_summary] = SpecID::Sequest::PepXML::SearchSummary.new( params, search_summary_options)
+    ## Create the SampleEnzyme object if necessary
+    unless opts[:sample_enzyme].is_a? SampleEnzyme
+      opts[:sample_enzyme] = SampleEnzyme.new(opts[:sample_enzyme])
+    end
+    ## Create the pepxml obj
+    pepxml_obj = SpecID::Sequest::PepXML.new(ppxml_version, params)
+    ## name some common variables we'll need
+    h_plus = pepxml_obj.h_plus
+    avg_parent = pepxml_obj.avg_parent
+    #######################################################################
+    # CREATE the spectrum_queries_ar
+    #######################################################################
+    srf_index = srf.index
+    out_files = srf.out_files
+    spectrum_queries_arr = Array.new(srf.dta_files.size)
+    files_with_hits_index = 0  ## will end up being 1 indexed
+    srf.dta_files.each_with_index do |dta_file,i|
+      next if out_files[i].num_hits == 0
+      files_with_hits_index += 1
+      # Sort the hits
+      hits = out_files[i].hits
+      arr = hits.sort_by{|v| v.xcorr }
+      # Get proper deltacn and deltacnstar
+      # Prophet deltacn is not the same as the native Sequest deltacn
+      # It is the deltacn of the second best hit!
+      top_hit = arr.pop
+      second_hit = arr.last
+      if second_hit
+        top_hit[1] = second_hit[1]
+        deltacnstar = '0'
+      else
+        top_hit[1] = '1.0'
+        deltacnstar = '1'
+      end
+      ## mass calculations:
+      precursor_neutral_mass = dta_file.mh - h_plus
+      calc_neutral_pep_mass = top_hit[0] - h_plus
+      massdiff = precursor_neutral_mass - calc_neutral_pep_mass
+      if massdiff >= 0 ; massdiff = "+" + massdiff.to_s
+      else ; massdiff = massdiff.to_s end
+      (start_scan, end_scan, charge) = srf_index[i]
+      sq_hash = {
+        :spectrum => [bn_noext, start_scan, end_scan, charge].join('.'),
+        :start_scan => start_scan,
+        :end_scan => end_scan,
+        :precursor_neutral_mass => precursor_neutral_mass,
+        :assumed_charge => charge,
+        :pepxml_version => ppxml_version,
+        :index => files_with_hits_index,
+      }
+      #  NEED TO MODIFY SPLIT SEQUENCE TO DO MODS!
+      ## THIS IS ALL INNER LOOP, so we make every effort at speed here:
+      (prevaa, pepseq, nextaa) = SpecID::Sequest::PepXML::SearchHit.prepare_sequence(top_hit[8])
+      #  ind_keys = {:mh => 0, :deltacn => 1, :sp => 2, :xcorr => 3, :id => 4, :rsp => 5, :ions_matched => 6, :ions_total => 7, :peptide => 8, :reference => 9 }
+      sh_hash = {
+        :hit_rank => "1",
+        :peptide => pepseq,
+        :peptide_prev_aa => prevaa,
+        :peptide_next_aa => nextaa,
+        :protein => top_hit[9].split(" ").first,
+        :num_tot_proteins => top_hit[10],
+        :num_matched_ions => top_hit[6],
+        :tot_num_ions => top_hit[7],
+        :calc_neutral_pep_mass => calc_neutral_pep_mass,
+        :massdiff => massdiff,
+        :num_tol_term => SpecID::Sequest::PepXML::SearchHit.calc_num_tol_term(params, top_hit[8]),
+        :num_missed_cleavages => SpecID::Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, top_hit[8]),
+        :is_rejected => '0',
+        # These are search score attributes:
+        :xcorr => top_hit[3],
+        :deltacn => top_hit[1],
+        :deltacnstar => deltacnstar,
+        :spscore => top_hit[2],
+        :sprank => top_hit[5],
+      }
+      spectrum_queries_arr[files_with_hits_index] = SpecID::Sequest::PepXML::SpectrumQuery.new(sq_hash) do
+        search_result = SpecID::Sequest::PepXML::SearchResult.new do
+          [ SpecID::Sequest::PepXML::SearchHit.new(sh_hash) ] # there can be multiple hits
+        end # SearchResult
+        [search_result] # can be multiple
+      end
+    end
+    spectrum_queries_arr.compact!
+    #######################################################################
+    # ADD the pipeline analysis
+    #######################################################################
+    pipeline = SpecID::Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=> bn_noext +'.xml'}) do
+      SpecID::Sequest::PepXML::MSMSRunSummary.new(opts) { spectrum_queries_arr }
+    end
+    pepxml_obj.msms_pipeline_analysis = pipeline
+    pepxml_obj.base_name = pipeline.msms_run_summary.base_name
+    pepxml_obj
+  end
+  # Takes bioworks 3.2/3.3 xml output (with no filters)
   # Returns a list of PepXML objects
   # msdata = path to mzXML files (or .timeIndex files) (or @TODO: path to sqt file(s))
   # params = sequest.params file
@@ -246,6 +530,7 @@ class SpecID::Sequest::PepXML
     ## Create a hash of spectrum_query arrays by filename (this very big block):
     spectrum_queries_by_base_name = {}
+    pepxml_objs_by_base_name = {}
     # Hash by the filenames to split into filenames:
     bioworks.peps.hash_by(:base_name).each do |base_name, pep_arr|
@@ -262,7 +547,10 @@ class SpecID::Sequest::PepXML
         abort "invalid BioworksBrowser version: #{x}"
       end
-      spectrum_queries = pep_arr.hash_by(:first_scan, :last_scan, :charge).collect do |key,arr|
+      pepxml_obj = SpecID::Sequest::PepXML.new(pepxml_version, params)
+      pepxml_objs_by_base_name[base_name] = pepxml_obj
+      spectrum_queries_ar = pep_arr.hash_by(:first_scan, :last_scan, :charge).collect do |key,arr|
         # Sort_by_rank and take the top hit (to mimick out2summary):
@@ -270,28 +558,18 @@ class SpecID::Sequest::PepXML
         top_pep = arr.pop
         second_hit = arr.last # needed for deltacnstar
-        case params.precursor_mass_type
-        when "monoisotopic" ; avg_parent = false
-        else ; avg_parent = true
-        end
-        case avg_parent
-        when true ; h_plus = SpecID::AVG[:h_plus]
-        when false ; h_plus = SpecID::MONO[:h_plus]
-        end
         case calc_prec_by
         when :prec_mz_arr
-          precursor_neutral_mass = SpecID::Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.first_scan.to_i, top_pep.last_scan.to_i, prec_mz_arr, top_pep.charge.to_i, avg_parent)
+          precursor_neutral_mass = SpecID::Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.first_scan.to_i, top_pep.last_scan.to_i, prec_mz_arr, top_pep.charge.to_i, pepxml_obj.avg_parent)
         when :deltamass
-          precursor_neutral_mass = SpecID::Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.mass.to_f, top_pep.deltamass.to_f, avg_parent)
+          precursor_neutral_mass = SpecID::Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.mass.to_f, top_pep.deltamass.to_f, pepxml_obj.avg_parent)
         end
-        calc_neutral_pep_mass = (top_pep.mass.to_f - h_plus)
+        calc_neutral_pep_mass = (top_pep.mass.to_f - pepxml_obj.h_plus)
         massdiff = precursor_neutral_mass - calc_neutral_pep_mass
         if massdiff >= 0 ; massdiff = "+" + massdiff.to_s
-        else ; massdiff = massdiff.to_s end
+        else ; massdiff = massdiff.to_s end #already has a -
         # deltacn & star:
         # (NOTE: OLD?? out2summary wants the deltacn of the 2nd best hit.)
         if second_hit
@@ -317,7 +595,7 @@ class SpecID::Sequest::PepXML
             # NOTE: the bioworks mass is really M+H if two or more scans went
             # into the search_hit; calc_neutral_pep_mass is simply the avg of
             # precursor masses adjusted to be neutral
-            (prevaa, pepseq, nextaa) = SpecID::Sequest::PepXML::SearchHit.split_sequence(top_pep.sequence)
+            (prevaa, pepseq, nextaa) = SpecID::Sequest::PepXML::SearchHit.prepare_sequence(top_pep.sequence)
             (num_matched_ions, tot_num_ions) = SpecID::Sequest::PepXML::SearchHit.split_ions(top_pep.ions)
             search_hit = SpecID::Sequest::PepXML::SearchHit.new({
               :hit_rank => "1",
@@ -348,32 +626,36 @@ class SpecID::Sequest::PepXML
       # create an index by spectrum as results end up typically in out2summary
       # (I really dislike this order, however)
-      spectrum_queries = spectrum_queries.sort_by {|pep| pep.spectrum }
-      spectrum_queries.each_with_index {|res,index| res.index = "#{index + 1}" }
+      spectrum_queries_ar = spectrum_queries_ar.sort_by {|pep| pep.spectrum }
+      spectrum_queries_ar.each_with_index {|res,index| res.index = "#{index + 1}" }
-      spectrum_queries_by_base_name[base_name] = spectrum_queries
+      spectrum_queries_by_base_name[base_name] = spectrum_queries_ar
     end
-    spectrum_queries_by_base_name.collect do |base_name, spectrum_queries|
+    modifications_string = bioworks.modifications
+    spectrum_queries_by_base_name.collect do |base_name, spectrum_queries_ar|
       case pepxml_version
       when 18
-        SpecID::Sequest::PepXML.new(pepxml_version) do
-          SpecID::Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=>base_name+'.xml'}) do
-            full_base_name_no_ext = self.make_base_name( File.expand_path(out_path), base_name)
-            SpecID::Sequest::PepXML::MSMSRunSummary.new({
-              :base_name => full_base_name_no_ext,
-              :ms_manufacturer => ms_manufacturer,
-              :ms_model => ms_model,
-              :ms_ionization => ms_ionization,
-              :ms_mass_analyzer => ms_mass_analyzer,
-              :ms_detector => ms_detector,
-              :raw_data_type => raw_data_type,
-              :raw_data => raw_data,
-              :sample_enzyme => SampleEnzyme.new(sample_enzyme),
-              :search_summary => SpecID::Sequest::PepXML::SearchSummary.new(params, {:search_database => SpecID::Sequest::PepXML::SearchDatabase.new(params), :base_name => full_base_name_no_ext, :out_data_type => out_data_type, :out_data => out_data}),
-            }) do spectrum_queries end
-          end
+        pipeline =  SpecID::Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=>base_name+'.xml'}) do
+          full_base_name_no_ext = self.make_base_name( File.expand_path(out_path), base_name)
+          SpecID::Sequest::PepXML::MSMSRunSummary.new({
+            :base_name => full_base_name_no_ext,
+            :ms_manufacturer => ms_manufacturer,
+            :ms_model => ms_model,
+            :ms_ionization => ms_ionization,
+            :ms_mass_analyzer => ms_mass_analyzer,
+            :ms_detector => ms_detector,
+            :raw_data_type => raw_data_type,
+            :raw_data => raw_data,
+            :sample_enzyme => SampleEnzyme.new(sample_enzyme),
+            :search_summary => SpecID::Sequest::PepXML::SearchSummary.new(params, modifications_string, {:search_database => SpecID::Sequest::PepXML::SearchDatabase.new(params), :base_name => full_base_name_no_ext, :out_data_type => out_data_type, :out_data => out_data}),
+          }) { spectrum_queries_ar }
         end
+        pepxml_obj = pepxml_objs_by_base_name[base_name]
+        pepxml_obj.msms_pipeline_analysis = pipeline
+        pepxml_obj.base_name = pipeline.msms_run_summary.base_name
+        pepxml_obj
       when 0
         ## @TODO: NEED TO REVAMP THIS:
         #        SpecID::Sequest::PepXML.new(pepxml_version).set_from_hash({
@@ -429,6 +711,14 @@ class SpecID::Sequest::PepXML
     string
   end
+  # given any kind of filename (from windows or whatever)
+  # returns the base of the filename with no file extension
+  def self.base_name_noext(file)
+    file.gsub!("\\", '/')
+    File.basename(file).sub(/\.[\w^\.]+$/, '')
+  end
 end # PepXML
 ##
@@ -461,6 +751,15 @@ class SpecID::Sequest::Params
         one,two = line.split @@param_re
         two,comment = two.split @@param_two_split
         hash[one] = two.rstrip
+        # it is necessary to add this break so that params files inside srf
+        # files can be read.  This will terminate the reading at the end of
+        # the file even though there are more lines
+        if line =~ /added to U/ || line =~ /digest_mass_range/## Will only work on bioworks 3.2 & 3.3 (bioworks 3.1 last line => Elastase/Tryp...)
+          break
+        end
+        if line =~ /digest_mass_range/  # there is no space in the srf params files
+          break
+        end
       else
         break
       end
@@ -468,17 +767,26 @@ class SpecID::Sequest::Params
     hash
   end
+  # returns self
+  def parse_handle(fh)
+    sequest_line = fh.gets #[SEQUEST]
+    @opts = grab_params(fh)
+    @opts["search_engine"] = "SEQUEST"
+    @mods = grab_params(fh)
+    ## this gets rid of the .hdr postfix on indexed databases
+    @opts["first_database_name"] = @opts["first_database_name"].sub(/\.hdr$/, '')
+    self
+  end
   ## parses file
   ## and drops the .hdr behind indexed fasta files
+  ## returns self
   def parse(file)
     File.open(file) do |fh|
-      sequest_line = fh.gets #[SEQUEST]
-      @opts = grab_params(fh)
-      @opts["search_engine"] = "SEQUEST"
-      @mods = grab_params(fh)
+      parse_handle(fh)
     end
-    ## this gets rid of the .hdr postfix on indexed databases
-    @opts["first_database_name"] = @opts["first_database_name"].sub(/\.hdr$/, '')
+    self
   end
   # returns( split_after, except_before)
@@ -569,6 +877,17 @@ class SpecID::Sequest::Params
     @opts["first_database_name"]
   end
+  # returns the appropriate aminoacid mass lookup table (in spec_id.rb SpecID::MONO or
+  # SpecID::AVG based on precursor_mass_type
+  def mass_table
+    case precursor_mass_type
+    when 'average'
+      SpecID::AVG
+    when 'monoisotopic'
+      SpecID::MONO
+    end
+  end
   # at least in Bioworks 3.2, the First number after the enzyme
   # is the indication of the enzymatic end stringency (required):
   #   1 = Fully enzymatic
@@ -628,7 +947,7 @@ class SpecID::Sequest::PepXML::SearchResult
   attr_accessor :search_hits
   # if block given, then search_hits set to return value
-  def initialize()
+  def initialize
     if block_given? ; @search_hits = yield
     else ; @search_hits = [] end
   end
@@ -646,13 +965,16 @@ class SpecID::Sequest::PepXML::SearchSummary
   attr_accessor :base_name
   attr_accessor :out_data_type
   attr_accessor :out_data
+  attr_accessor :modifications
   # A SearchDatabase object (responds to :local_path and :type)
   attr_accessor :search_database
   # if given a sequest params object, then will set the following attributes:
   # args is a hash of parameters
-  def initialize(params=nil, args=nil)
+  # modifications_string -> See Modifications
+  def initialize(params, modifications_string='', args=nil)
     @search_id = nil
     @params = params
+    @modifications = SpecID::Sequest::PepXML::Modifications.new(params, modifications_string)
     if args ; set_from_hash(args) end
   end
@@ -665,16 +987,304 @@ class SpecID::Sequest::PepXML::SearchSummary
     else ; '1' end
   end
   def to_pepxml
     element_xml(:search_summary, [:base_name, :search_engine, :precursor_mass_type, :fragment_mass_type, :out_data_type, :out_data, :search_id]) do
       search_database.to_pepxml +
         short_element_xml(:enzymatic_search_constraint, [:enzyme, :max_num_internal_cleavages, :min_number_termini]) +
+        @modifications.to_pepxml +
         @params.pepxml_parameters
     end
   end
 end
+class SpecID::Sequest::PepXML::Modifications
+  include SpecIDXML
+  # sequest params object
+  attr_accessor :params
+  # array holding AAModifications
+  attr_accessor :aa_mods
+  # array holding TerminalModifications
+  attr_accessor :term_mods
+  # a hash of all differential modifications present by aa_one_letter_symbol
+  # and special_symbol. This is NOT the mass difference but the total mass {
+  # 'M*' => 155.5, 'S@' => 190.3 }.  NOTE: Since the termini are dependent on
+  # the amino acid sequence, they are give the *differential* mass.  The
+  # termini are given the special symbol as in sequest e.g. '[' => 12.22, #
+  # cterminus    ']' => 14.55 # nterminus
+  attr_accessor :masses_by_diff_mod_hash
+  # a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
+  # values are the special_symbols
+  attr_accessor :mod_symbols_hash
+  # The modification symbols string looks like this:
+  # (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
+  # ct is cterminal peptide (differential)
+  # nt is nterminal peptide (differential)
+  # the C is just cysteine
+  # will set_modifications and masses_by_diff_mod hash
+  def initialize(params, modification_symbols_string='')
+    @params = params
+    set_modifications(params, modification_symbols_string)
+  end
+  # set the masses_by_diff_mod and mod_symbols_hash from
+  def set_hashes(modification_symbols_string)
+    @mod_symbols_hash = {}
+    @masses_by_diff_mod = {}
+    if modification_symbols_string == nil || modification_symbols_string == ''
+      return nil
+    end
+    table = @params.mass_table
+    modification_symbols_string.split(/\)\s+\(/).each do |mod|
+      if mod =~ /\(?(\w{1,2})(.) (.[\d\.]+)\)?/
+        aa_as_sym = $1.to_sym,
+        @mod_symbols_hash[[aa_as_sym, $3.to_f]] = $2.dup
+        if $1 == 'ct' || $1 == 'nt'
+          @masses_by_diff_mod[$2] = $3.to_f
+        else
+          @masses_by_diff_mod[$1+$2] = $3.to_f + table[aa_as_sym]
+        end
+      end
+    end
+  end
+  # given a bare peptide (no end pieces) returns a ModificationInfo object
+  # e.g. given "]PEPT*IDE", NOT 'K.PEPTIDE.R'
+  # if there are no modifications, returns nil
+  def modification_info(peptide)
+    if @masses_by_diff_mod.size == 0
+      return nil
+    end
+    hash[:modified_peptide] = peptide.dup
+    hash = {}
+    hsh = @masses_by_diff_mod
+    table = @params.mass_table
+    h = table[:h]  # this? or h_plus ??
+    oh = table[:o] + h
+    ## only the termini can match a single char
+    if hsh.key? peptide[0,1]
+      # AA + H + differential_mod
+      hash[:mod_nterm_mass] = table[peptide[1,1].to_sym] + h + hsh[peptide[0,1]]
+      peptide.slice!( 1..-1 )
+    end
+    if hsh.key? peptide[-1,1]
+      # AA + OH + differential_mod
+      hash[:mod_cterm_mass] = table[peptide[-2,1].to_sym] + oh + hsh[peptide[-1,1]]
+      peptide.slice!( 0..-2 )
+    end
+    mod_array = []
+    (0...peptide.size).each do |i|
+      if hsh.key? peptide[i,2]
+        mod_array << [ i+1 , hsh[peptide[i,2]] ]
+      end
+    end
+    if mod_array.size > 0
+      hash[:mod_aminoacid_mass_array] = mod_array
+    end
+    if hash.size > 0
+      SpecID::Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
+    else
+      nil
+    end
+  end
+  # 1. sets aa_mods and term_mods from a sequest params object
+  # 2. sets @params
+  # 3. sets @masses_by_diff_mod
+  def set_modifications(params, modification_symbols_string)
+    @params = params
+    set_hashes(modification_symbols_string)
+    ####################################
+    ## static mods
+    ####################################
+    static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
+    static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
+    params.mods.each do |k,v|
+      v_to_f = v.to_f
+      if v_to_f != 0.0
+        if k =~ /add_(\w)_/
+          static_mods << [$1.to_sym, v_to_f]
+        else
+          static_terminal_mods << [k, v_to_f]
+        end
+      end
+    end
+    aa_hash = params.mass_table
+    ## Create the static_mods objects
+    static_mods.map! do |mod|
+      hash = {
+        :aminoacid => mod[0].to_s,
+        :massdiff => mod[1].to_plus_minus_string,
+        :mass => aa_hash[mod[0]] + mod[1],
+        :variable => 'N',
+        :binary => 'Y',
+      }
+      SpecID::Sequest::PepXML::AAModification.new(hash)
+    end
+    ## Create the static_terminal_mods objects
+    static_terminal_mods.map! do |mod|
+      terminus = if mod[0] =~ /Cterm/ ; 'c'
+                 else                 ; 'n' # only two possible termini
+                 end
+      protein_terminus = case mod[0]
+                         when /Nterm_protein/ ; 'n'
+                         when /Cterm_protein/ ; 'c'
+                         else nil
+                         end
+      # create the hash
+      hash = {
+        :terminus => terminus,
+        :massdiff => mod[1].to_plus_minus_string,
+        :variable => 'N',
+        :description => mod[0],
+      }
+      hash[:protein_terminus] = protein_terminus if protein_terminus
+      SpecID::Sequest::PepXML::TerminalModification.new(hash)
+    end
+    #################################
+    # Variable Mods:
+    #################################
+    arr = params.diff_search_options.rstrip.split(/\s+/)
+    # [aa.to_sym, diff.to_f]
+    variable_mods = []
+    (0...arr.size).step(2) do |i|
+      if arr[i].to_f != 0.0
+        variable_mods << [arr[i+1].to_sym, arr[i].to_f]
+      end
+    end
+    variable_mods.map! do |mod|
+      hash = {
+        :aminoacid => mod[0].to_s,
+        :massdiff => mod[1].to_plus_minus_string,
+        :mass => aa_hash[mod[0]] + mod[1],
+        :variable => 'Y',
+        :binary => 'N',
+        :symbol => @mod_symbols_hash[mod],
+      }
+      SpecID::Sequest::PepXML::AAModification.new(hash)
+    end
+    #################################
+    # TERMINAL Variable Mods:
+    #################################
+    # These are always peptide, not protein termini (for sequest)
+    (nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
+    to_add = []
+    if nterm_diff != 0.0
+      to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
+    end
+    if cterm_diff != 0.0
+      to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
+    end
+    variable_terminal_mods = to_add.map do |term, mssdiff, symb|
+      hash = {
+        :terminus => term,
+        :massdiff => mssdiff,
+        :variable => 'Y',
+        :symbol => symb,
+      }
+      SpecID::Sequest::PepXML::TerminalModification.new(hash)
+    end
+    #########################
+    # COLLECT THEM
+    #########################
+    @aa_mods = static_mods + variable_mods
+    @term_mods = static_terminal_mods + variable_terminal_mods
+  end
+  ## Generates the pepxml for static and differential amino acid mods based on
+  ## sequest object
+  def to_pepxml
+    st = ''
+    if @aa_mods
+      st << @aa_mods.map {|v| v.to_pepxml }.join
+    end
+    if @term_mods
+      st << @term_mods.map {|v| v.to_pepxml }.join
+    end
+    st
+  end
+end
+# Modified aminoacid, static or variable
+# unless otherwise stated, all attributes can be anything
+class SpecID::Sequest::PepXML::AAModification
+  include SpecIDXML
+  # The amino acid (one letter code)
+  attr_accessor :aminoacid
+  # Must be a string!!!!
+  # Mass difference with respect to unmodified aminoacid, must begin with
+  # either + (nonnegative) or - [e.g. +1.05446 or -2.3342]
+  # consider Numeric#to_plus_minus_string at top
+  attr_accessor :massdiff
+  # Mass of modified aminoacid
+  attr_accessor :mass
+  # Y if both modified and unmodified aminoacid could be present in the
+  # dataset, N if only modified aminoacid can be present
+  attr_accessor :variable
+  # whether modification can reside only at protein terminus (specified 'n',
+  # 'c', or 'nc')
+  attr_accessor :peptide_terminus
+  # Special symbol used by search engine to designate this modification
+  attr_accessor :symbol
+  # Y if each peptide must have only modified or unmodified aminoacid, N if a
+  # peptide may contain both modified and unmodified aminoacid
+  attr_accessor :binary
+  def initialize(hash=nil)
+    instance_var_set_from_hash(hash) if hash # can use unless there are weird methods
+  end
+  def to_pepxml
+    short_element_xml_from_instance_vars("aminoacid_modification")
+  end
+end
+# Modified aminoacid, static or variable
+class SpecID::Sequest::PepXML::TerminalModification
+  include SpecIDXML
+  # n for N-terminus, c for C-terminus
+  attr_accessor :terminus
+  # Mass difference with respect to unmodified terminus
+  attr_accessor :massdiff
+  # Mass of modified terminus
+  attr_accessor :mass
+  # Y if both modified and unmodified terminus could be present in the
+  # dataset, N if only modified terminus can be present
+  attr_accessor :variable
+  # Special symbol used by search engine to designate this modification
+  attr_accessor :symbol
+  # whether modification can reside only at protein terminus (specified n or
+  # c)
+  attr_accessor :protein_terminus
+  attr_accessor :description
+  def initialize(hash=nil)
+    instance_var_set_from_hash(hash) if hash # can use unless there are weird methods
+  end
+  def to_pepxml
+    short_element_xml_from_instance_vars("terminal_modification")
+  end
+end
 class SpecID::Sequest::PepXML::SearchDatabase
   include SpecIDXML
   attr_accessor :local_path
@@ -708,7 +1318,15 @@ end
 class SpecID::Sequest::PepXML::SpectrumQuery
   include SpecIDXML
-  attr_accessor :spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :search_results
+  # basename_noext.first_scan.last_scan.charge
+  attr_accessor :spectrum
+  attr_accessor :start_scan
+  attr_accessor :end_scan
+  attr_accessor :precursor_neutral_mass
+  attr_accessor :index
+  attr_accessor :search_results
   # this is a string
   attr_accessor :assumed_charge
   attr_accessor :pepxml_version
@@ -803,6 +1421,10 @@ end
 # this responds to flatten (so that it won't flatten).
 class SpecID::Sequest::PepXML::SearchHit < Array
   include SpecIDXML
+  Non_standard_amino_acid_char_re = /[^A-Z\.\-]/
+  # num_tot_proteins = "Number of unique proteins in search database containing peptide"
   #attr_accessor 0:hit_rank, 1:peptide, 2:peptide_prev_aa, 3:peptide_next_aa, 4:protein, 5:num_tot_proteins, 6:num_matched_ions, 7:tot_num_ions, 8:calc_neutral_pep_mass, 9:massdiff, 10:num_tol_term, 11:num_missed_cleavages, 12:is_rejected
   #attr_accessor 13:deltacnstar
   #attr_accessor 14:xcorr, 15:deltacn, 16:spscore, 17:sprank
@@ -811,69 +1433,82 @@ class SpecID::Sequest::PepXML::SearchHit < Array
   ind_keys = {:hit_rank => 0, :peptide => 1, :peptide_prev_aa => 2, :peptide_next_aa => 3, :protein => 4, :num_tot_proteins => 5, :num_matched_ions => 6, :tot_num_ions => 7, :calc_neutral_pep_mass => 8, :massdiff => 9, :num_tol_term => 10, :num_missed_cleavages => 11, :is_rejected => 12, :deltacnstar  => 13, :xcorr => 14, :deltacn => 15, :spscore => 16, :sprank => 17}
   @@methods = ind_keys.keys
   def hit_rank ; self[0] end ; def hit_rank=(oth) ; self[0] = oth end
-def peptide ; self[1] end ; def peptide=(oth) ; self[1] = oth end
-def peptide_prev_aa ; self[2] end ; def peptide_prev_aa=(oth) ; self[2] = oth end
-def peptide_next_aa ; self[3] end ; def peptide_next_aa=(oth) ; self[3] = oth end
-def protein ; self[4] end ; def protein=(oth) ; self[4] = oth end
-def num_tot_proteins ; self[5] end ; def num_tot_proteins=(oth) ; self[5] = oth end
-def num_matched_ions ; self[6] end ; def num_matched_ions=(oth) ; self[6] = oth end
-def tot_num_ions ; self[7] end ; def tot_num_ions=(oth) ; self[7] = oth end
-def calc_neutral_pep_mass ; self[8] end ; def calc_neutral_pep_mass=(oth) ; self[8] = oth end
-def massdiff ; self[9] end ; def massdiff=(oth) ; self[9] = oth end
-def num_tol_term ; self[10] end ; def num_tol_term=(oth) ; self[10] = oth end
-def num_missed_cleavages ; self[11] end ; def num_missed_cleavages=(oth) ; self[11] = oth end
-def is_rejected ; self[12] end ; def is_rejected=(oth) ; self[12] = oth end
-def deltacnstar ; self[13] end ; def deltacnstar=(oth) ; self[13] = oth end
-def xcorr ; self[14] end ; def xcorr=(oth) ; self[14] = oth end
-def deltacn ; self[15] end ; def deltacn=(oth) ; self[15] = oth end
-def spscore ; self[16] end ; def spscore=(oth) ; self[16] = oth end
-def sprank ; self[17] end ; def sprank=(oth) ; self[17] = oth end
-@@arr_size = ind_keys.size
-ind_keys.each {|k,v| ind_keys_w_eq["#{k}=".to_sym] = v }
-ind_keys.merge!(ind_keys_w_eq)
-ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
-# These are all search_score elements:
-# 1 if there is no second ranked hit, 0 otherwise
-def initialize(hash=nil)
-  super(@@arr_size)
-  self[0,18] = [hash[:hit_rank], hash[:peptide], hash[:peptide_prev_aa], hash[:peptide_next_aa], hash[:protein], hash[:num_tot_proteins], hash[:num_matched_ions], hash[:tot_num_ions], hash[:calc_neutral_pep_mass], hash[:massdiff], hash[:num_tol_term], hash[:num_missed_cleavages], hash[:is_rejected], hash[:deltacnstar], hash[:xcorr], hash[:deltacn], hash[:spscore], hash[:sprank]]
-  self
-  #if hash ; set_from_hash(hash) end
-end
+  def peptide ; self[1] end ; def peptide=(oth) ; self[1] = oth end
+  def peptide_prev_aa ; self[2] end ; def peptide_prev_aa=(oth) ; self[2] = oth end
+  def peptide_next_aa ; self[3] end ; def peptide_next_aa=(oth) ; self[3] = oth end
+  def protein ; self[4] end ; def protein=(oth) ; self[4] = oth end
+  def num_tot_proteins ; self[5] end ; def num_tot_proteins=(oth) ; self[5] = oth end
+  def num_matched_ions ; self[6] end ; def num_matched_ions=(oth) ; self[6] = oth end
+  def tot_num_ions ; self[7] end ; def tot_num_ions=(oth) ; self[7] = oth end
+  def calc_neutral_pep_mass ; self[8] end ; def calc_neutral_pep_mass=(oth) ; self[8] = oth end
+  def massdiff ; self[9] end ; def massdiff=(oth) ; self[9] = oth end
+  def num_tol_term ; self[10] end ; def num_tol_term=(oth) ; self[10] = oth end
+  def num_missed_cleavages ; self[11] end ; def num_missed_cleavages=(oth) ; self[11] = oth end
+  def is_rejected ; self[12] end ; def is_rejected=(oth) ; self[12] = oth end
+  def deltacnstar ; self[13] end ; def deltacnstar=(oth) ; self[13] = oth end
+  def xcorr ; self[14] end ; def xcorr=(oth) ; self[14] = oth end
+  def deltacn ; self[15] end ; def deltacn=(oth) ; self[15] = oth end
+  def spscore ; self[16] end ; def spscore=(oth) ; self[16] = oth end
+  def sprank ; self[17] end ; def sprank=(oth) ; self[17] = oth end
+  @@arr_size = ind_keys.size
+  ind_keys.each {|k,v| ind_keys_w_eq["#{k}=".to_sym] = v }
+  ind_keys.merge!(ind_keys_w_eq)
+  ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
+  # These are all search_score elements:
+  # 1 if there is no second ranked hit, 0 otherwise
-# Returns prev, peptide, next from sequence.  Parse errors return
-# nil,nil,nil
-#   R.PEPTIDE.A  # -> R, PEPTIDE, A
-#   R.PEPTIDE.-  # -> R, PEPTIDE, -
-#   PEPTIDE.A    # -> -, PEPTIDE, A
-#   A.PEPTIDE    # -> A, PEPTIDE, -
-#   PEPTIDE      # -> nil,nil,nil
-def self.split_sequence(val)
-  peptide_prev_aa = ""; peptide = ""; peptide_next_aa = ""
-  pieces = val.split(".")
-  case pieces.size
-  when 3
-    peptide_prev_aa, peptide, peptide_next_aa = *pieces
-  when 2
-    if pieces[0].size > 1  ## N termini
-      peptide_prev_aa, peptide, peptide_next_aa = '-', pieces[0], pieces[1]
-    else  ## C termini
-      peptide_prev_aa, peptide, peptide_next_aa = pieces[0], pieces[1], '-'
-    end
-  when 1  ## this must be a parse error!
-    peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
-  when 0
-    peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
-  end
-  return peptide_prev_aa, peptide, peptide_next_aa
-end
+  def initialize(hash=nil)
+    super(@@arr_size)
+    self[0,18] = [hash[:hit_rank], hash[:peptide], hash[:peptide_prev_aa], hash[:peptide_next_aa], hash[:protein], hash[:num_tot_proteins], hash[:num_matched_ions], hash[:tot_num_ions], hash[:calc_neutral_pep_mass], hash[:massdiff], hash[:num_tol_term], hash[:num_missed_cleavages], hash[:is_rejected], hash[:deltacnstar], hash[:xcorr], hash[:deltacn], hash[:spscore], hash[:sprank]]
+    self
+    #if hash ; set_from_hash(hash) end
+  end
+  # remove_non_amino_acids && split_sequence
+  def self.prepare_sequence(val)
+    nv = remove_non_amino_acids(val)
+    split_sequence(nv)
+  end
-def inspect
-    "#<SearchHit #{@@methods.map do |m| "#{m}:#{self.send(m)}" end.join(" ")}>"
+  # Returns prev, peptide, next from sequence.  Parse errors return
+  # nil,nil,nil
+  #   R.PEPTIDE.A  # -> R, PEPTIDE, A
+  #   R.PEPTIDE.-  # -> R, PEPTIDE, -
+  #   PEPTIDE.A    # -> -, PEPTIDE, A
+  #   A.PEPTIDE    # -> A, PEPTIDE, -
+  #   PEPTIDE      # -> nil,nil,nil
+  def self.split_sequence(val)
+    peptide_prev_aa = ""; peptide = ""; peptide_next_aa = ""
+    pieces = val.split('.')
+    case pieces.size
+    when 3
+      peptide_prev_aa, peptide, peptide_next_aa = *pieces
+    when 2
+      if pieces[0].size > 1  ## N termini
+        peptide_prev_aa, peptide, peptide_next_aa = '-', pieces[0], pieces[1]
+      else  ## C termini
+        peptide_prev_aa, peptide, peptide_next_aa = pieces[0], pieces[1], '-'
+      end
+    when 1  ## this must be a parse error!
+      peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
+    when 0
+      peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
+    end
+    return peptide_prev_aa, peptide, peptide_next_aa
+  end
+  # removes nonstandard chars with Non_standard_amino_acid_char_re
+  # preserves A-Z and '.
+  def self.remove_non_amino_acids(sequence)
+    sequence.gsub(Non_standard_amino_acid_char_re, '')
+  end
+  def inspect
+    var = @@methods.map do |m| "#{m}:#{self.send(m)}" end.join(" ")
+    "#<SearchHit #{var}>"
   end
   # requires Params object and full sequence (with heads and tails)
@@ -924,3 +1559,65 @@ def inspect
 end
+# Positions and masses of modifications
+class SpecID::Sequest::PepXML::SearchHit::ModificationInfo
+  include SpecIDXML
+  ## Should be something like this:
+  # <modification_info mod_nterm_mass=" " mod_nterm_mass=" " modified_peptide=" ">
+  #   <mod_aminoacid_mass position=" " mass=" "/>
+  # </modification_info>
+  # Mass of modified N terminus<
+  attr_accessor :mod_nterm_mass
+  # Mass of modified C terminus<
+  attr_accessor :mod_cterm_mass
+  # Peptide sequence (with indicated modifications)  I'm assuming that the
+  # native sequest indicators are OK here
+  attr_accessor :modified_peptide
+  ## A few main types:
+  # this should be an array of arrays: [[position, modified_mass], ...]
+  # position ranges from 1 to peptide length
+  attr_accessor :mod_aminoacid_mass_array
+  def initialize(hash=nil)
+    instance_var_set_from_hash(hash)
+  end
+  # Will escape any xml special chars in modified_peptide
+  def to_pepxml
+    ## Collect the modifications:
+    mod_strings = []
+    if @mod_aminoacid_mass_array
+      mod_strings = @mod_aminoacid_mass_array.map do |ar|
+        "position=\"#{ar[0]}\" mass=\"#{ar[1]}\""
+      end
+    end
+    ## Create the attribute string:
+    att_parts = []
+    if @mod_nterm_mass
+      att_parts << "mod_nterm_mass=\"#{@mod_nterm_mass}\""
+    end
+    if @mod_cterm_mass
+      att_parts << "mod_cterm_mass=\"#{@mod_cterm_mass}\""
+    end
+    if @modified_peptide
+      att_parts << "modified_peptide=\"#{escape_special_chars(@modified_peptide)}\""
+    end
+    element_xml_and_att_string('modification_info', att_parts.join(" ")) do
+      mod_strings.map {|st| short_element_xml_and_att_string('mod_aminoacid_mass', st) }.join
+    end
+  end
+  ##
+  # <modification_info modified_peptide="GC[546]M[147]PSKEVLSAGAHR">
+  # <mod_aminoacid_mass position="2" mass="545.7160"/>
+  # <mod_aminoacid_mass position="3" mass="147.1926"/>
+  # </modification_info>
+end