RubyGems - mspire - Versions diffs - 0.1.7 → 0.2.0 - Mend

mspire 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

data/Rakefile +41 -14
data/bin/bioworks2excel.rb +1 -1
data/bin/bioworks_to_pepxml.rb +46 -59
data/bin/fasta_shaker.rb +1 -1
data/bin/filter.rb +6 -0
data/bin/find_aa_freq.rb +23 -0
data/bin/id_precision.rb +3 -2
data/bin/mzxml_to_lmat.rb +2 -1
data/bin/pepproph_filter.rb +1 -1
data/bin/precision.rb +1 -1
data/bin/protein_summary.rb +2 -451
data/bin/raw_to_mzXML.rb +55 -0
data/bin/srf_group.rb +26 -0
data/changelog.txt +7 -0
data/lib/align.rb +3 -3
data/lib/fasta.rb +6 -1
data/lib/gi.rb +9 -4
data/lib/roc.rb +2 -0
data/lib/sample_enzyme.rb +2 -1
data/lib/spec/mzxml/parser.rb +2 -43
data/lib/spec/mzxml.rb +65 -2
data/lib/spec_id/aa_freqs.rb +10 -7
data/lib/spec_id/bioworks.rb +67 -87
data/lib/spec_id/filter.rb +794 -0
data/lib/spec_id/precision.rb +29 -36
data/lib/spec_id/proph.rb +5 -3
data/lib/spec_id/protein_summary.rb +459 -0
data/lib/spec_id/sequest.rb +323 -271
data/lib/spec_id/srf.rb +189 -135
data/lib/spec_id.rb +276 -227
data/lib/spec_id_xml.rb +101 -0
data/lib/toppred.rb +18 -0
data/script/degenerate_peptides.rb +47 -0
data/script/filter-peps.rb +5 -1
data/test/tc_align.rb +1 -1
data/test/tc_bioworks.rb +25 -22
data/test/tc_bioworks_to_pepxml.rb +37 -4
data/test/tc_fasta.rb +3 -1
data/test/tc_fasta_shaker.rb +8 -6
data/test/tc_filter.rb +203 -0
data/test/tc_gi.rb +6 -9
data/test/tc_id_precision.rb +31 -0
data/test/tc_mzxml.rb +8 -6
data/test/tc_peptide_parent_times.rb +2 -1
data/test/tc_precision.rb +1 -1
data/test/tc_proph.rb +5 -5
data/test/tc_protein_summary.rb +36 -13
data/test/tc_sequest.rb +78 -33
data/test/tc_spec_id.rb +128 -6
data/test/tc_srf.rb +84 -38
metadata +67 -62
data/bin/fasta_cat.rb +0 -39
data/bin/fasta_cat_mod.rb +0 -59
data/bin/fasta_mod.rb +0 -57
data/bin/filter_spec_id.rb +0 -365
data/bin/raw2mzXML.rb +0 -21
data/script/gen_database_searching.rb +0 -258

data/lib/spec_id/sequest.rb CHANGED Viewed

@@ -7,6 +7,7 @@ require 'spec_id/bioworks'
 require 'instance_var_set_from_hash'
 require 'spec/msrun'
 require 'spec_id/srf'
+require 'fileutils'
 class Numeric
   # returns a string with a + or - on the front
@@ -75,10 +76,10 @@ end
-module SpecID::Sequest; end
-class SpecID::Sequest::PepXML; end
+module Sequest; end
+class Sequest::PepXML; end
-class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
+class Sequest::PepXML::MSMSPipelineAnalysis
   include SpecIDXML
   # Version 1.2.3
   attr_writer :date
@@ -106,7 +107,7 @@ class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
   def date
     if @date ; @date
     else
-      case SpecID::Sequest::PepXML.pepxml_version
+      case Sequest::PepXML.pepxml_version
       when 18 ;  tarr = Time.now.to_a ; tarr[3..5].reverse.join('-') + "T#{tarr[0..2].reverse.join(':')}"
       when 0 ; Time.new.to_s
       end
@@ -132,7 +133,7 @@ class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
   end
   def to_pepxml
-    case SpecID::Sequest::PepXML.pepxml_version
+    case Sequest::PepXML.pepxml_version
     when 0
       element_xml(:msms_pipeline_analysis, [:date, :summary_xml]) do
         @msms_run_summary.to_pepxml
@@ -142,13 +143,13 @@ class SpecID::Sequest::PepXML::MSMSPipelineAnalysis
         @msms_run_summary.to_pepxml
       end
     else
-      abort "Don't know how to deal with version: #{SpecID::Sequest::PepXML.pepxml_version}"
+      abort "Don't know how to deal with version: #{Sequest::PepXML.pepxml_version}"
     end
   end
 end
-class SpecID::Sequest::PepXML::MSMSRunSummary
+class Sequest::PepXML::MSMSRunSummary
   include SpecIDXML
   # the version of TPP you are using (determines xml output)
@@ -184,7 +185,7 @@ class SpecID::Sequest::PepXML::MSMSRunSummary
   end
   def to_pepxml
-    case SpecID::Sequest::PepXML.pepxml_version
+    case Sequest::PepXML.pepxml_version
     when 18
       element_xml_and_att_string(:msms_run_summary, "base_name=\"#{base_name}\" msManufacturer=\"#{ms_manufacturer}\" msModel=\"#{ms_model}\" msIonization=\"#{ms_ionization}\" msMassAnalyzer=\"#{ms_mass_analyzer}\" msDetector=\"#{ms_detector}\" raw_data_type=\"#{raw_data_type}\" raw_data=\"#{raw_data}\"") do
         sample_enzyme.to_pepxml +
@@ -210,7 +211,7 @@ end
-class SpecID::Sequest::PepXML
+class Sequest::PepXML
   include SpecIDXML
   ## CREATE a default version for the entire class
@@ -292,21 +293,22 @@ class SpecID::Sequest::PepXML
   # objects.  Ideally, we'd like these attributes to reside elsewhere, but for
   # memory concerns, this is best for now.
   def self._prot_num_and_first_prot_by_pep(pep_array)
-    pep_array.hash_by(:sequence).each do |seq, pep_arr|
-      prots = pep_arr.collect { |pep| pep.prot }
-      prots.uniq!
-      _size = prots.size
+    pep_array.hash_by(:aaseq).each do |aasq, pep_arr|
+      prts = []
+      pep_arr.each { |pep| prts.push( *(pep.prots) ) }
+      prts.uniq!
+      _size = prts.size
       pep_arr.each do |pep|
         pep._num_prots = _size.to_s
-        pep._first_prot = prots.first
+        pep._first_prot = prts.first
       end
     end
   end
-  Default_Options = {
-    :out_path => nil,
-    :backup_db_path => '/project/marcotte/marcotte/ms/database',
+Default_Options = {
+    :out_path => '.',
+    #:backup_db_path => '.',
     # a PepXML option
     :pepxml_version => DEF_VERSION,
     ## MSMSRunSummary options:
@@ -314,15 +316,18 @@ class SpecID::Sequest::PepXML
     # or create your own SampleEnzyme object
     :sample_enzyme => 'trypsin',
     :ms_manufacturer => 'ThermoFinnigan',
-    :ms_model => 'LCQ Deca XP',
+    :ms_model => 'LCQ Deca XP Plus',
     :ms_ionization => 'ESI',
     :ms_mass_analyzer => 'Ion Trap',
     :ms_detector => 'UNKNOWN',
+    :ms_data => '.',      # path to ms data files (raw or mzxml)
     :raw_data_type => "raw",
     :raw_data => ".mzXML", ## even if you don't have it?
     ## SearchSummary options:
     :out_data_type => "out", ## may be srf?? don't think pepxml recognizes this yet
-    :out_data => ".tgz" ## may be srf??
+    :out_data => ".tgz", ## may be srf??
+    :copy_mzxml => false, # copy the mzxml file to the out_path (create it if necessary)
+    :print => false, # print the objects to file
   }
   # will dynamically set :ms_model and :ms_mass_analyzer from srf info
@@ -330,23 +335,23 @@ class SpecID::Sequest::PepXML
   # and LCQ Deca XP
   # See SRF::Sequest::PepXML::Default_Options hash for defaults
   # unless given, the out_path will be given as the path of the srf_file
-  def self.new_from_srf(srf_file, opts={})
+  # srf may be an object or a filename
+  def self.new_from_srf(srf, opts={})
     opts = Default_Options.merge(opts)
-    ## set the outpath
-    out_path = opts.delete(:out_path)
-    unless out_path
-      out_path = File.dirname(srf_file)
+    ## read the srf file
+    if srf.is_a? String
+      srf = SRF.new(srf)
     end
-    ## read the srf file
-    srf = SRF.new(srf_file)
+    ## set the outpath
+    out_path = opts.delete(:out_path)
     params = srf.params
     ## check to see if we need backup_db
     backup_db_path = opts.delete(:backup_db_path)
-    unless File.exist? params.database
+    if !File.exist?(params.database) && backup_db_path
       params.database_path = backup_db_path
     end
@@ -374,24 +379,47 @@ class SpecID::Sequest::PepXML
     ## Create the search summary:
     search_summary_options = {
-      :search_database => SpecID::Sequest::PepXML::SearchDatabase.new(params),
+      :search_database => Sequest::PepXML::SearchDatabase.new(params),
       :base_name => full_base_name_no_ext,
       :out_data_type => out_data_type,
       :out_data => out_data
     }
-    opts[:search_summary] = SpecID::Sequest::PepXML::SearchSummary.new( params, search_summary_options)
+    modifications_string = srf.header.modifications
+    search_summary = Sequest::PepXML::SearchSummary.new( params, modifications_string, search_summary_options)
     ## Create the SampleEnzyme object if necessary
     unless opts[:sample_enzyme].is_a? SampleEnzyme
       opts[:sample_enzyme] = SampleEnzyme.new(opts[:sample_enzyme])
     end
-    ## Create the pepxml obj
-    pepxml_obj = SpecID::Sequest::PepXML.new(ppxml_version, params)
+    ## Create the pepxml obj and top level objects
+    pepxml_obj = Sequest::PepXML.new(ppxml_version, params)
+    pipeline = Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=> bn_noext +'.xml'})
+    pepxml_obj.msms_pipeline_analysis = pipeline
+    pipeline.msms_run_summary = Sequest::PepXML::MSMSRunSummary.new(opts)
+    pipeline.msms_run_summary.search_summary = search_summary
+    modifications_obj = search_summary.modifications
     ## name some common variables we'll need
     h_plus = pepxml_obj.h_plus
     avg_parent = pepxml_obj.avg_parent
+    ## COPY MZXML FILES IF NECESSARY
+    if opts[:copy_mzxml]
+      mzxml_pathname_noext = File.join(opts[:ms_data], bn_noext)
+      to_copy = Spec::MzXML.file_to_mzxml(mzxml_pathname_noext)
+      if to_copy
+        FileUtils.cp to_copy, out_path
+      else
+        puts "Couldn't file mzXML file with base: #{mzxml_pathname_noext}"
+        puts "Perhaps you need to specifiy the location of the raw data"
+        puts "or need an mzXML converter (readw.exe or t2x)"
+        exit
+      end
+    end
     #######################################################################
     # CREATE the spectrum_queries_ar
     #######################################################################
@@ -420,6 +448,8 @@ class SpecID::Sequest::PepXML
         deltacnstar = '1'
       end
       ## mass calculations:
       precursor_neutral_mass = dta_file.mh - h_plus
       calc_neutral_pep_mass = top_hit[0] - h_plus
@@ -428,6 +458,9 @@ class SpecID::Sequest::PepXML
       else ; massdiff = massdiff.to_s end
       (start_scan, end_scan, charge) = srf_index[i]
       sq_hash = {
         :spectrum => [bn_noext, start_scan, end_scan, charge].join('.'),
         :start_scan => start_scan,
@@ -438,9 +471,13 @@ class SpecID::Sequest::PepXML
         :index => files_with_hits_index,
       }
+      spectrum_query = Sequest::PepXML::SpectrumQuery.new(sq_hash)
+      sequence = top_hit[8]
       #  NEED TO MODIFY SPLIT SEQUENCE TO DO MODS!
       ## THIS IS ALL INNER LOOP, so we make every effort at speed here:
-      (prevaa, pepseq, nextaa) = SpecID::Sequest::PepXML::SearchHit.prepare_sequence(top_hit[8])
+      (prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(sequence)
       #  ind_keys = {:mh => 0, :deltacn => 1, :sp => 2, :xcorr => 3, :id => 4, :rsp => 5, :ions_matched => 6, :ions_total => 7, :peptide => 8, :reference => 9 }
       sh_hash = {
@@ -448,14 +485,14 @@ class SpecID::Sequest::PepXML
         :peptide => pepseq,
         :peptide_prev_aa => prevaa,
         :peptide_next_aa => nextaa,
-        :protein => top_hit[9].split(" ").first,
-        :num_tot_proteins => top_hit[10],
+        :protein => top_hit[9].first.reference.split(" ").first,
+        :num_tot_proteins => top_hit[9].size,
         :num_matched_ions => top_hit[6],
         :tot_num_ions => top_hit[7],
         :calc_neutral_pep_mass => calc_neutral_pep_mass,
         :massdiff => massdiff,
-        :num_tol_term => SpecID::Sequest::PepXML::SearchHit.calc_num_tol_term(params, top_hit[8]),
-        :num_missed_cleavages => SpecID::Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, top_hit[8]),
+        :num_tol_term => Sequest::PepXML::SearchHit.calc_num_tol_term(params, sequence),
+        :num_missed_cleavages => Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, sequence),
         :is_rejected => '0',
         # These are search score attributes:
         :xcorr => top_hit[3],
@@ -463,51 +500,88 @@ class SpecID::Sequest::PepXML
         :deltacnstar => deltacnstar,
         :spscore => top_hit[2],
         :sprank => top_hit[5],
+        :modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(sequence)[1]),
       }
+      search_hit = Sequest::PepXML::SearchHit.new(sh_hash) # there can be multiple hits
-      spectrum_queries_arr[files_with_hits_index] = SpecID::Sequest::PepXML::SpectrumQuery.new(sq_hash) do
-        search_result = SpecID::Sequest::PepXML::SearchResult.new do
-          [ SpecID::Sequest::PepXML::SearchHit.new(sh_hash) ] # there can be multiple hits
-        end # SearchResult
-        [search_result] # can be multiple
-      end
+      search_result = Sequest::PepXML::SearchResult.new
+      search_result.search_hits = [search_hit]
+      spectrum_query.search_results = [search_result]
+      spectrum_queries_arr[files_with_hits_index] = spectrum_query
     end
     spectrum_queries_arr.compact!
-    #######################################################################
-    # ADD the pipeline analysis
-    #######################################################################
-    pipeline = SpecID::Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=> bn_noext +'.xml'}) do
-      SpecID::Sequest::PepXML::MSMSRunSummary.new(opts) { spectrum_queries_arr }
-    end
-    pepxml_obj.msms_pipeline_analysis = pipeline
+    pipeline.msms_run_summary.spectrum_queries = spectrum_queries_arr
     pepxml_obj.base_name = pipeline.msms_run_summary.base_name
+    pipeline.msms_run_summary.spectrum_queries =  spectrum_queries_arr
     pepxml_obj
   end
+  # takes an .srg or bioworks.xml file
+  # if possible, ensures that an mzXML file is present for each pepxml file
+  # :print => true, will print files
+  def self.set_from_bioworks(bioworks_file, opts={})
+    opts = Default_Options.merge(opts)
+    ## Create the out_path directory if necessary
+    unless File.exist? opts[:out_path]
+      FileUtils.mkpath(opts[:out_path])
+    end
+    unless File.directory? opts[:out_path]
+      abort "#{opts[:out_path]} must be a directory!"
+    end
+    spec_id = SpecID.new(bioworks_file)
+    pepxml_objs =
+    if spec_id.is_a? Bioworks
+      abort("must have opts[:params] set!") unless opts[:params]
+      set_from_bioworks_xml(bioworks_file, opts[:params], opts)
+    elsif spec_id.is_a? SRFGroup
+      spec_id.srfs.map do |srf|
+        new_from_srf(srf, opts)
+      end
+    else
+      abort "invalid object"
+    end
+    if opts[:print]
+      pepxml_objs.each do |obj|
+        obj.to_pepxml(obj.base_name + ".xml")
+      end
+    end
+    pepxml_objs
+  end
   # Takes bioworks 3.2/3.3 xml output (with no filters)
   # Returns a list of PepXML objects
-  # msdata = path to mzXML files (or .timeIndex files) (or @TODO: path to sqt file(s))
   # params = sequest.params file
   # bioworks = bioworks.xml exported multi-consensus view file
   # pepxml_version = 0 for tpp 1.2.3
   # pepxml_version = 18 for tpp 2.8.2, 2.8.3, 2.9.2
-  def self.set_from_bioworks(params, bioworks, msdata, out_path, pepxml_version=18, sample_enzyme='trypsin', ms_manufacturer='ThermoFinnigan', ms_model='LCQ Deca XP Plus', ms_ionization='ESI', ms_mass_analyzer='Ion Trap', ms_detector='UNKNOWN', raw_data_type="raw", raw_data=".mzXML", out_data_type="out", out_data=".tgz")
+  def self.set_from_bioworks_xml(bioworks, params, opts={})
+    opts = Default_Options.merge(opts)
+    pepxml_version, sample_enzyme, ms_manufacturer, ms_model, ms_ionization, ms_mass_analyzer, ms_detector, raw_data_type, raw_data, out_data_type, out_data, ms_data, out_path = opts.values_at(:pepxml_version, :sample_enzyme, :ms_manufacturer, :ms_model, :ms_ionization, :ms_mass_analyzer, :ms_detector, :raw_data_type, :raw_data, :out_data_type, :out_data, :ms_data, :out_path)
+    unless out_path
+      out_path = '.'
+    end
     supported_versions = [0,18]
-    unless supported_versions.include?(pepxml_version)
+    unless supported_versions.include?(opts[:pepxml_version])
       abort "pepxml_version: #{pepxml_version} not currently supported.  Current support is for versions #{supported_versions.join(', ')}"
     end
     ## Turn params and bioworks_obj into objects if necessary:
     # Params:
-    if params.class == SpecID::Sequest::Params  # OK!
-    elsif params.class == String ; params = SpecID::Sequest::Params.new(params)
+    if params.class == Sequest::Params  # OK!
+    elsif params.class == String ; params = Sequest::Params.new(params)
     else                         ; abort "Don't recognize #{params} as object or string!"
     end
     # Bioworks:
-    if bioworks.class == SpecID::Bioworks  # OK!
+    if bioworks.class == Bioworks  # OK!
     elsif bioworks.class == String ; bioworks = SpecID.new(bioworks)
     else                           ; abort "Don't recognize #{bioworks} as object or string!"
     end
@@ -516,39 +590,98 @@ class SpecID::Sequest::PepXML
     ## TURN THIS ON IF YOU THINK YOU MIGHT NOT BE GETTING PEPTIDES from
     ## bioworks
-    #bioworks.peps.each { |pep| if pep.class != SpecID::Bioworks::Pep ; puts "trying to pass as pep: "; p pep; abort "NOT a pep!" end }
+    #bioworks.peps.each { |pep| if pep.class != Bioworks::Pep ; puts "trying to pass as pep: "; p pep; abort "NOT a pep!" end }
+    ## check to see if we need backup_db
+    backup_db_path = opts.delete(:backup_db_path)
+    if !File.exist?(params.database) && backup_db_path
+      params.database_path = backup_db_path
+    end
     ## Start
     split_bio_objs = []
-    ## Create a hash by pep object containing num_tot_proteins
-    ## This is only valid if all hits are present (no previous thresholding)
-    self._prot_num_and_first_prot_by_pep(bioworks.peps)
     ## (num_prots_by_pep, prot_by_pep) =
     #num_prots_by_pep.each do |k,v| puts "k: #{k} v: #{v}\n"; break end ; prot_by_pep.each do |k,v| puts "k: #{k} v: #{v}" ; break end ; abort "HERE"
+    modifications_string = bioworks.modifications
+    search_summary = Sequest::PepXML::SearchSummary.new(params, modifications_string, {:search_database => Sequest::PepXML::SearchDatabase.new(params), :out_data_type => out_data_type, :out_data => out_data})
+    modifications_obj = search_summary.modifications
     ## Create a hash of spectrum_query arrays by filename (this very big block):
     spectrum_queries_by_base_name = {}
-    pepxml_objs_by_base_name = {}
     # Hash by the filenames to split into filenames:
-    bioworks.peps.hash_by(:base_name).each do |base_name, pep_arr|
+    bioworks.peps.hash_by(:base_name).map do |base_name, pep_arr|
+      pepxml_obj = Sequest::PepXML.new(pepxml_version, params)
+      full_base_name_no_ext = self.make_base_name( File.expand_path(out_path), base_name)
+      case pepxml_version
+      when 18
+        pipeline =  Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=>base_name+'.xml'})
+        msms_run_summary = Sequest::PepXML::MSMSRunSummary.new({
+          :base_name => full_base_name_no_ext,
+          :ms_manufacturer => ms_manufacturer,
+          :ms_model => ms_model,
+          :ms_ionization => ms_ionization,
+          :ms_mass_analyzer => ms_mass_analyzer,
+          :ms_detector => ms_detector,
+          :raw_data_type => raw_data_type,
+          :raw_data => raw_data,
+          :sample_enzyme => SampleEnzyme.new(sample_enzyme),
+          :search_summary => search_summary,
+        })
+        pipeline.msms_run_summary = msms_run_summary
+        pepxml_obj.msms_pipeline_analysis = pipeline
+        pepxml_obj.msms_pipeline_analysis.msms_run_summary.search_summary.base_name =  full_base_name_no_ext
+        pepxml_obj.base_name = full_base_name_no_ext
+        pepxml_obj
+      when 0
+        ## @TODO: NEED TO REVAMP THIS:
+        #        Sequest::PepXML.new(pepxml_version).set_from_hash({
+        #          :params => params,
+        #          :search_results => spectrum_queries_arr,
+        #          :base_name => self.make_base_name( File.expand_path(out_path), base_name),
+        #          :search_engine => params.search_engine,
+        #          :database => params.database,
+        #          :raw_data_type => "mzXML",
+        #          :raw_data => ".mzXML",
+        #          :out_data_type => "out",
+        #          :out_data => ".tgz",
+        #          :sample_enzyme => params.enzyme,
+        #        })
+      end
+      # Create a hash by pep object containing num_tot_proteins
+      # This is only valid if all hits are present (no previous thresholding)
+      # Since out2summary only acts on one folder at a time,
+      # we should only do it for one folder at a time! (that's why we do this
+      # here instead of globally)
+      self._prot_num_and_first_prot_by_pep(pep_arr)
       prec_mz_arr = nil
       case x = bioworks.version
       when /3.2/
         calc_prec_by = :prec_mz_arr
         # get the precursor_mz array for this filename
-        inner__full_base_name_no_ext = File.join(msdata, base_name)
-        prec_mz_arr = Spec::MSRun.precursor_mz_by_scan(inner__full_base_name_no_ext)
+        prec_mz_arr = Spec::MSRun.precursor_mz_by_scan(File.join(ms_data, base_name))
       when /3.3/
         calc_prec_by = :deltamass
       else
         abort "invalid BioworksBrowser version: #{x}"
       end
-      pepxml_obj = SpecID::Sequest::PepXML.new(pepxml_version, params)
-      pepxml_objs_by_base_name[base_name] = pepxml_obj
+      if opts[:copy_mzxml]
+        to_copy = Spec::MzXML.file_to_mzxml(File.join(ms_data, base_name))
+        if to_copy
+          FileUtils.cp to_copy, out_path
+        end
+      end
       spectrum_queries_ar = pep_arr.hash_by(:first_scan, :last_scan, :charge).collect do |key,arr|
@@ -561,9 +694,9 @@ class SpecID::Sequest::PepXML
         case calc_prec_by
         when :prec_mz_arr
-          precursor_neutral_mass = SpecID::Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.first_scan.to_i, top_pep.last_scan.to_i, prec_mz_arr, top_pep.charge.to_i, pepxml_obj.avg_parent)
+          precursor_neutral_mass = Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.first_scan.to_i, top_pep.last_scan.to_i, prec_mz_arr, top_pep.charge.to_i, pepxml_obj.avg_parent)
         when :deltamass
-          precursor_neutral_mass = SpecID::Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.mass.to_f, top_pep.deltamass.to_f, pepxml_obj.avg_parent)
+          precursor_neutral_mass = Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.mass.to_f, top_pep.deltamass.to_f, pepxml_obj.avg_parent)
         end
         calc_neutral_pep_mass = (top_pep.mass.to_f - pepxml_obj.h_plus)
@@ -581,98 +714,58 @@ class SpecID::Sequest::PepXML
         end
         # Create the nested structure of queries{results{hits}}
         # (Ruby's blocks work beautifully for things like this)
-        spec_query = SpecID::Sequest::PepXML::SpectrumQuery.new({
+        spec_query = Sequest::PepXML::SpectrumQuery.new({
           :spectrum => [top_pep.base_name, top_pep.first_scan, top_pep.last_scan, top_pep.charge].join("."),
           :start_scan => top_pep.first_scan,
           :end_scan => top_pep.last_scan,
           :precursor_neutral_mass => precursor_neutral_mass.to_s,
           :assumed_charge => top_pep.charge,
           :pepxml_version => pepxml_version,
-        }) do
-          search_result = SpecID::Sequest::PepXML::SearchResult.new do
-            ## Calculate some interdependent values;
-            # NOTE: the bioworks mass is really M+H if two or more scans went
-            # into the search_hit; calc_neutral_pep_mass is simply the avg of
-            # precursor masses adjusted to be neutral
-            (prevaa, pepseq, nextaa) = SpecID::Sequest::PepXML::SearchHit.prepare_sequence(top_pep.sequence)
-            (num_matched_ions, tot_num_ions) = SpecID::Sequest::PepXML::SearchHit.split_ions(top_pep.ions)
-            search_hit = SpecID::Sequest::PepXML::SearchHit.new({
-              :hit_rank => "1",
-              :peptide => pepseq,
-              :peptide_prev_aa => prevaa,
-              :peptide_next_aa => nextaa,
-              :protein => top_pep._first_prot.reference.split(" ").first,
-              :num_tot_proteins => top_pep._num_prots,
-              :num_matched_ions => num_matched_ions,
-              :tot_num_ions => tot_num_ions,
-              :calc_neutral_pep_mass => calc_neutral_pep_mass.to_s,
-              :massdiff => massdiff,
-              :num_tol_term => SpecID::Sequest::PepXML::SearchHit.calc_num_tol_term(params, top_pep.sequence).to_s,
-              :num_missed_cleavages => SpecID::Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, top_pep.sequence).to_s,
-              :is_rejected => "0",
-              # These are search score attributes:
-              :xcorr => top_pep.xcorr,
-              :deltacn => top_pep.deltacn,
-              :deltacnstar => deltacnstar,
-              :spscore => top_pep.sp,
-              :sprank => top_pep.rsp,
-            })
-            [search_hit]   # there can be multiple search hits
-          end # SearchResult
-          [search_result]  # can be multiple search_results
-        end # SpectrumQuery
-      end # Collects the spectrum queries
+        })
+        search_result = Sequest::PepXML::SearchResult.new
+        ## Calculate some interdependent values;
+        # NOTE: the bioworks mass is reallyf M+H if two or more scans went
+        # into the search_hit; calc_neutral_pep_mass is simply the avg of
+        # precursor masses adjusted to be neutral
+        (prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(top_pep.sequence)
+        (num_matched_ions, tot_num_ions) = Sequest::PepXML::SearchHit.split_ions(top_pep.ions)
+        search_hit = Sequest::PepXML::SearchHit.new({
+          :hit_rank => "1",
+          :peptide => pepseq,
+          :peptide_prev_aa => prevaa,
+          :peptide_next_aa => nextaa,
+          :protein => top_pep._first_prot.reference.split(" ").first,
+          :num_tot_proteins => top_pep._num_prots,
+          :num_matched_ions => num_matched_ions,
+          :tot_num_ions => tot_num_ions,
+          :calc_neutral_pep_mass => calc_neutral_pep_mass.to_s,
+          :massdiff => massdiff,
+          :num_tol_term => Sequest::PepXML::SearchHit.calc_num_tol_term(params, top_pep.sequence).to_s,
+          :num_missed_cleavages => Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, top_pep.sequence).to_s,
+          :is_rejected => "0",
+          # These are search score attributes:
+          :xcorr => top_pep.xcorr,
+          :deltacn => top_pep.deltacn,
+          :deltacnstar => deltacnstar,
+          :spscore => top_pep.sp,
+          :sprank => top_pep.rsp,
+          :modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(top_pep.sequence)[1]),
+        })
+        search_result.search_hits = [search_hit]   # there can be multiple search hits
+        spec_query.search_results = [search_result]  # can be multiple search_results
+        spec_query
+      end
       # create an index by spectrum as results end up typically in out2summary
       # (I really dislike this order, however)
       spectrum_queries_ar = spectrum_queries_ar.sort_by {|pep| pep.spectrum }
       spectrum_queries_ar.each_with_index {|res,index| res.index = "#{index + 1}" }
-      spectrum_queries_by_base_name[base_name] = spectrum_queries_ar
-    end
-    modifications_string = bioworks.modifications
-    spectrum_queries_by_base_name.collect do |base_name, spectrum_queries_ar|
-      case pepxml_version
-      when 18
-        pipeline =  SpecID::Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=>base_name+'.xml'}) do
-          full_base_name_no_ext = self.make_base_name( File.expand_path(out_path), base_name)
-          SpecID::Sequest::PepXML::MSMSRunSummary.new({
-            :base_name => full_base_name_no_ext,
-            :ms_manufacturer => ms_manufacturer,
-            :ms_model => ms_model,
-            :ms_ionization => ms_ionization,
-            :ms_mass_analyzer => ms_mass_analyzer,
-            :ms_detector => ms_detector,
-            :raw_data_type => raw_data_type,
-            :raw_data => raw_data,
-            :sample_enzyme => SampleEnzyme.new(sample_enzyme),
-            :search_summary => SpecID::Sequest::PepXML::SearchSummary.new(params, modifications_string, {:search_database => SpecID::Sequest::PepXML::SearchDatabase.new(params), :base_name => full_base_name_no_ext, :out_data_type => out_data_type, :out_data => out_data}),
-          }) { spectrum_queries_ar }
-        end
-        pepxml_obj = pepxml_objs_by_base_name[base_name]
-        pepxml_obj.msms_pipeline_analysis = pipeline
-        pepxml_obj.base_name = pipeline.msms_run_summary.base_name
-        pepxml_obj
-      when 0
-        ## @TODO: NEED TO REVAMP THIS:
-        #        SpecID::Sequest::PepXML.new(pepxml_version).set_from_hash({
-        #          :params => params,
-        #          :search_results => spectrum_queries_arr,
-        #          :base_name => self.make_base_name( File.expand_path(out_path), base_name),
-        #          :search_engine => params.search_engine,
-        #          :database => params.database,
-        #          :raw_data_type => "mzXML",
-        #          :raw_data => ".mzXML",
-        #          :out_data_type => "out",
-        #          :out_data => ".tgz",
-        #          :sample_enzyme => params.enzyme,
-        #        })
-      end
-    end # collects the pepxml objects
+      pipeline.msms_run_summary.spectrum_queries = spectrum_queries_ar
+      pepxml_obj
+    end ## collects pepxml_objs
   end
   def summary_xml
@@ -724,7 +817,7 @@ end # PepXML
 ##
 # In the future, this guy should accept any version of bioworks params file
 # and spit out any param queried.
-class SpecID::Sequest::Params
+class Sequest::Params
   include SpecIDXML
   # current attributes supported are:
@@ -941,7 +1034,7 @@ class SpecID::Sequest::Params
 end
-class SpecID::Sequest::PepXML::SearchResult
+class Sequest::PepXML::SearchResult
   include SpecIDXML
   # an array of search_hits
   attr_accessor :search_hits
@@ -959,7 +1052,7 @@ class SpecID::Sequest::PepXML::SearchResult
   end
 end
-class SpecID::Sequest::PepXML::SearchSummary
+class Sequest::PepXML::SearchSummary
   include SpecIDXML
   attr_accessor :params
   attr_accessor :base_name
@@ -974,7 +1067,7 @@ class SpecID::Sequest::PepXML::SearchSummary
   def initialize(params, modifications_string='', args=nil)
     @search_id = nil
     @params = params
-    @modifications = SpecID::Sequest::PepXML::Modifications.new(params, modifications_string)
+    @modifications = Sequest::PepXML::Modifications.new(params, modifications_string)
     if args ; set_from_hash(args) end
   end
@@ -999,7 +1092,7 @@ class SpecID::Sequest::PepXML::SearchSummary
 end
-class SpecID::Sequest::PepXML::Modifications
+class Sequest::PepXML::Modifications
   include SpecIDXML
   # sequest params object
@@ -1032,20 +1125,27 @@ class SpecID::Sequest::PepXML::Modifications
   # set the masses_by_diff_mod and mod_symbols_hash from
   def set_hashes(modification_symbols_string)
     @mod_symbols_hash = {}
     @masses_by_diff_mod = {}
-    if modification_symbols_string == nil || modification_symbols_string == ''
+    if (modification_symbols_string == nil || modification_symbols_string == '')
       return nil
     end
     table = @params.mass_table
     modification_symbols_string.split(/\)\s+\(/).each do |mod|
-      if mod =~ /\(?(\w{1,2})(.) (.[\d\.]+)\)?/
-        aa_as_sym = $1.to_sym,
-        @mod_symbols_hash[[aa_as_sym, $3.to_f]] = $2.dup
+      if mod =~ /\(?(\w+)(.) (.[\d\.]+)\)?/
         if $1 == 'ct' || $1 == 'nt'
-          @masses_by_diff_mod[$2] = $3.to_f
+          mass_diff = $3.to_f
+          @masses_by_diff_mod[$2] = mass_diff
+          @mod_symbols_hash[[$1, mass_diff]] = $2.dup
         else
-          @masses_by_diff_mod[$1+$2] = $3.to_f + table[aa_as_sym]
+          symbol_string = $2.dup
+          mass_diff = $3.to_f
+          $1.split('').each do |aa|
+            aa_as_sym = aa.to_sym
+            @masses_by_diff_mod[aa+symbol_string] = mass_diff + table[aa_as_sym]
+            @mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
+          end
         end
       end
     end
@@ -1058,8 +1158,8 @@ class SpecID::Sequest::PepXML::Modifications
     if @masses_by_diff_mod.size == 0
       return nil
     end
-    hash[:modified_peptide] = peptide.dup
     hash = {}
+    hash[:modified_peptide] = peptide.dup
     hsh = @masses_by_diff_mod
     table = @params.mass_table
     h = table[:h]  # this? or h_plus ??
@@ -1068,12 +1168,13 @@ class SpecID::Sequest::PepXML::Modifications
     if hsh.key? peptide[0,1]
       # AA + H + differential_mod
       hash[:mod_nterm_mass] = table[peptide[1,1].to_sym] + h + hsh[peptide[0,1]]
-      peptide.slice!( 1..-1 )
+      peptide = peptide[1...(peptide.size)]
     end
-    if hsh.key? peptide[-1,1]
+    if hsh.key? peptide[(peptide.size-1),1]
       # AA + OH + differential_mod
-      hash[:mod_cterm_mass] = table[peptide[-2,1].to_sym] + oh + hsh[peptide[-1,1]]
+      hash[:mod_cterm_mass] = table[peptide[(peptide.size-2),1].to_sym] + oh + hsh[peptide[-1,1]]
       peptide.slice!( 0..-2 )
+      peptide = peptide[0...(peptide.size-1)]
     end
     mod_array = []
     (0...peptide.size).each do |i|
@@ -1084,8 +1185,8 @@ class SpecID::Sequest::PepXML::Modifications
     if mod_array.size > 0
       hash[:mod_aminoacid_mass_array] = mod_array
     end
-    if hash.size > 0
-      SpecID::Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
+    if hash.size > 1  # if there is more than just the modified peptide there
+      Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
     else
       nil
     end
@@ -1127,7 +1228,7 @@ class SpecID::Sequest::PepXML::Modifications
         :variable => 'N',
         :binary => 'Y',
       }
-      SpecID::Sequest::PepXML::AAModification.new(hash)
+      Sequest::PepXML::AAModification.new(hash)
     end
     ## Create the static_terminal_mods objects
@@ -1149,7 +1250,7 @@ class SpecID::Sequest::PepXML::Modifications
         :description => mod[0],
       }
       hash[:protein_terminus] = protein_terminus if protein_terminus
-      SpecID::Sequest::PepXML::TerminalModification.new(hash)
+      Sequest::PepXML::TerminalModification.new(hash)
     end
     #################################
     # Variable Mods:
@@ -1159,20 +1260,25 @@ class SpecID::Sequest::PepXML::Modifications
     variable_mods = []
     (0...arr.size).step(2) do |i|
       if arr[i].to_f != 0.0
-        variable_mods << [arr[i+1].to_sym, arr[i].to_f]
+        variable_mods << [arr[i+1], arr[i].to_f]
       end
     end
-    variable_mods.map! do |mod|
-      hash = {
-        :aminoacid => mod[0].to_s,
-        :massdiff => mod[1].to_plus_minus_string,
-        :mass => aa_hash[mod[0]] + mod[1],
-        :variable => 'Y',
-        :binary => 'N',
-        :symbol => @mod_symbols_hash[mod],
-      }
-      SpecID::Sequest::PepXML::AAModification.new(hash)
+    mod_objects = []
+    variable_mods.each do |mod|
+      mod[0].split('').each do |aa|
+        hash = {
+          :aminoacid => aa,
+          :massdiff => mod[1].to_plus_minus_string,
+          :mass => aa_hash[aa.to_sym] + mod[1],
+          :variable => 'Y',
+          :binary => 'N',
+          :symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
+        }
+        mod_objects << Sequest::PepXML::AAModification.new(hash)
+      end
     end
+    variable_mods = mod_objects
     #################################
     # TERMINAL Variable Mods:
     #################################
@@ -1194,7 +1300,7 @@ class SpecID::Sequest::PepXML::Modifications
         :variable => 'Y',
         :symbol => symb,
       }
-      SpecID::Sequest::PepXML::TerminalModification.new(hash)
+      Sequest::PepXML::TerminalModification.new(hash)
     end
     #########################
@@ -1221,7 +1327,7 @@ end
 # Modified aminoacid, static or variable
 # unless otherwise stated, all attributes can be anything
-class SpecID::Sequest::PepXML::AAModification
+class Sequest::PepXML::AAModification
   include SpecIDXML
   # The amino acid (one letter code)
@@ -1256,7 +1362,7 @@ class SpecID::Sequest::PepXML::AAModification
 end
 # Modified aminoacid, static or variable
-class SpecID::Sequest::PepXML::TerminalModification
+class Sequest::PepXML::TerminalModification
   include SpecIDXML
   # n for N-terminus, c for C-terminus
@@ -1285,7 +1391,7 @@ class SpecID::Sequest::PepXML::TerminalModification
 end
-class SpecID::Sequest::PepXML::SearchDatabase
+class Sequest::PepXML::SearchDatabase
   include SpecIDXML
   attr_accessor :local_path
   attr_writer :seq_type
@@ -1316,7 +1422,7 @@ class SpecID::Sequest::PepXML::SearchDatabase
 end
-class SpecID::Sequest::PepXML::SpectrumQuery
+class Sequest::PepXML::SpectrumQuery
   include SpecIDXML
   # basename_noext.first_scan.last_scan.charge
@@ -1344,7 +1450,7 @@ class SpecID::Sequest::PepXML::SpectrumQuery
   # FOR PEPXML:
   ############################################################
   def to_pepxml
-    case SpecID::Sequest::PepXML.pepxml_version
+    case Sequest::PepXML.pepxml_version
     when 18
       element_xml("spectrum_query", [:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :assumed_charge, :index]) do
         @search_results.collect { |sr| sr.to_pepxml }.join
@@ -1412,102 +1518,35 @@ class SpecID::Sequest::PepXML::SpectrumQuery
 end
-# This object inherits from Array.  As such, it is very memory efficient
-# (compared to a normal object).  However, certain operations when used on
-# these objects will produce undesirable results: An array of these objects
-# will be flattened (becoming a long list of attributes) when 'flatten' is
-# called on them, which is not the behavior we want!  other odd behavior is
-# possible.  Possible fixes are to use a delegate class or redefine the way
-# this responds to flatten (so that it won't flatten).
-class SpecID::Sequest::PepXML::SearchHit < Array
+Sequest::PepXML::SearchHit = ArrayClass.new( %w( hit_rank peptide peptide_prev_aa peptide_next_aa protein num_tot_proteins num_matched_ions tot_num_ions calc_neutral_pep_mass massdiff num_tol_term num_missed_cleavages is_rejected deltacnstar xcorr deltacn spscore sprank modification_info) )
+# hit_rank=0 peptide=1 peptide_prev_aa=2 peptide_next_aa=3 protein=4 num_tot_proteins=5 num_matched_ions=6 tot_num_ions=7 calc_neutral_pep_mass=8 massdiff=9 num_tol_term=10 num_missed_cleavages=11 is_rejected=12 deltacnstar=13 xcorr=14 deltacn=15 spscore=16 sprank=17 modification_info=18
+class Sequest::PepXML::SearchHit
   include SpecIDXML
   Non_standard_amino_acid_char_re = /[^A-Z\.\-]/
-  # num_tot_proteins = "Number of unique proteins in search database containing peptide"
-  #attr_accessor 0:hit_rank, 1:peptide, 2:peptide_prev_aa, 3:peptide_next_aa, 4:protein, 5:num_tot_proteins, 6:num_matched_ions, 7:tot_num_ions, 8:calc_neutral_pep_mass, 9:massdiff, 10:num_tol_term, 11:num_missed_cleavages, 12:is_rejected
-  #attr_accessor 13:deltacnstar
-  #attr_accessor 14:xcorr, 15:deltacn, 16:spscore, 17:sprank
-  ind_keys = {} ; ind_keys_w_eq = {}; @@ind = {}
-  ind_keys = {:hit_rank => 0, :peptide => 1, :peptide_prev_aa => 2, :peptide_next_aa => 3, :protein => 4, :num_tot_proteins => 5, :num_matched_ions => 6, :tot_num_ions => 7, :calc_neutral_pep_mass => 8, :massdiff => 9, :num_tol_term => 10, :num_missed_cleavages => 11, :is_rejected => 12, :deltacnstar  => 13, :xcorr => 14, :deltacn => 15, :spscore => 16, :sprank => 17}
-  @@methods = ind_keys.keys
-  def hit_rank ; self[0] end ; def hit_rank=(oth) ; self[0] = oth end
-  def peptide ; self[1] end ; def peptide=(oth) ; self[1] = oth end
-  def peptide_prev_aa ; self[2] end ; def peptide_prev_aa=(oth) ; self[2] = oth end
-  def peptide_next_aa ; self[3] end ; def peptide_next_aa=(oth) ; self[3] = oth end
-  def protein ; self[4] end ; def protein=(oth) ; self[4] = oth end
-  def num_tot_proteins ; self[5] end ; def num_tot_proteins=(oth) ; self[5] = oth end
-  def num_matched_ions ; self[6] end ; def num_matched_ions=(oth) ; self[6] = oth end
-  def tot_num_ions ; self[7] end ; def tot_num_ions=(oth) ; self[7] = oth end
-  def calc_neutral_pep_mass ; self[8] end ; def calc_neutral_pep_mass=(oth) ; self[8] = oth end
-  def massdiff ; self[9] end ; def massdiff=(oth) ; self[9] = oth end
-  def num_tol_term ; self[10] end ; def num_tol_term=(oth) ; self[10] = oth end
-  def num_missed_cleavages ; self[11] end ; def num_missed_cleavages=(oth) ; self[11] = oth end
-  def is_rejected ; self[12] end ; def is_rejected=(oth) ; self[12] = oth end
-  def deltacnstar ; self[13] end ; def deltacnstar=(oth) ; self[13] = oth end
-  def xcorr ; self[14] end ; def xcorr=(oth) ; self[14] = oth end
-  def deltacn ; self[15] end ; def deltacn=(oth) ; self[15] = oth end
-  def spscore ; self[16] end ; def spscore=(oth) ; self[16] = oth end
-  def sprank ; self[17] end ; def sprank=(oth) ; self[17] = oth end
-  @@arr_size = ind_keys.size
-  ind_keys.each {|k,v| ind_keys_w_eq["#{k}=".to_sym] = v }
-  ind_keys.merge!(ind_keys_w_eq)
-  ind_keys.each {|k,v| @@ind[k] = v ; @@ind["#{k}"] = v}
   # These are all search_score elements:
   # 1 if there is no second ranked hit, 0 otherwise
+  tmp_verb = $VERBOSE
+  $VERBOSE = nil
   def initialize(hash=nil)
     super(@@arr_size)
-    self[0,18] = [hash[:hit_rank], hash[:peptide], hash[:peptide_prev_aa], hash[:peptide_next_aa], hash[:protein], hash[:num_tot_proteins], hash[:num_matched_ions], hash[:tot_num_ions], hash[:calc_neutral_pep_mass], hash[:massdiff], hash[:num_tol_term], hash[:num_missed_cleavages], hash[:is_rejected], hash[:deltacnstar], hash[:xcorr], hash[:deltacn], hash[:spscore], hash[:sprank]]
-    self
-    #if hash ; set_from_hash(hash) end
-  end
-  # remove_non_amino_acids && split_sequence
-  def self.prepare_sequence(val)
-    nv = remove_non_amino_acids(val)
-    split_sequence(nv)
-  end
-  # Returns prev, peptide, next from sequence.  Parse errors return
-  # nil,nil,nil
-  #   R.PEPTIDE.A  # -> R, PEPTIDE, A
-  #   R.PEPTIDE.-  # -> R, PEPTIDE, -
-  #   PEPTIDE.A    # -> -, PEPTIDE, A
-  #   A.PEPTIDE    # -> A, PEPTIDE, -
-  #   PEPTIDE      # -> nil,nil,nil
-  def self.split_sequence(val)
-    peptide_prev_aa = ""; peptide = ""; peptide_next_aa = ""
-    pieces = val.split('.')
-    case pieces.size
-    when 3
-      peptide_prev_aa, peptide, peptide_next_aa = *pieces
-    when 2
-      if pieces[0].size > 1  ## N termini
-        peptide_prev_aa, peptide, peptide_next_aa = '-', pieces[0], pieces[1]
-      else  ## C termini
-        peptide_prev_aa, peptide, peptide_next_aa = pieces[0], pieces[1], '-'
-      end
-    when 1  ## this must be a parse error!
-      peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
-    when 0
-      peptide_prev_aa, peptide, peptide_next_aa = nil,nil,nil
+    if hash
+      self[0,19] = [hash[:hit_rank], hash[:peptide], hash[:peptide_prev_aa], hash[:peptide_next_aa], hash[:protein], hash[:num_tot_proteins], hash[:num_matched_ions], hash[:tot_num_ions], hash[:calc_neutral_pep_mass], hash[:massdiff], hash[:num_tol_term], hash[:num_missed_cleavages], hash[:is_rejected], hash[:deltacnstar], hash[:xcorr], hash[:deltacn], hash[:spscore], hash[:sprank], hash[:modification_info]]
     end
-    return peptide_prev_aa, peptide, peptide_next_aa
-  end
-  # removes nonstandard chars with Non_standard_amino_acid_char_re
-  # preserves A-Z and '.
-  def self.remove_non_amino_acids(sequence)
-    sequence.gsub(Non_standard_amino_acid_char_re, '')
+    self
   end
+  $VERBOSE = tmp_verb
   def inspect
-    var = @@methods.map do |m| "#{m}:#{self.send(m)}" end.join(" ")
+    var = @@attributes.map do |m| "#{m}:#{self.send(m)}" end.join(" ")
     "#<SearchHit #{var}>"
   end
@@ -1515,7 +1554,7 @@ class SpecID::Sequest::PepXML::SearchHit < Array
   def self.calc_num_missed_cleavages(params, sequence)
     num_missed = 0
     split_after, except_before = params.enzyme_specificity
-    first, middle, last = self.split_sequence(sequence)
+    first, middle, last = SpecID::Pep.split_sequence(sequence)
     arr = middle.scan(/[#{split_after}][^#{except_before}]/)
     return arr.size
   end
@@ -1524,7 +1563,7 @@ class SpecID::Sequest::PepXML::SearchHit < Array
   def self.calc_num_tol_term(params, sequence)
     num_tol = 0
     split_after, except_before = params.enzyme_specificity
-    first, middle, last = self.split_sequence(sequence)
+    first, middle, last = SpecID::Pep.split_sequence(sequence)
     last_of_middle = middle[-1,1]
     first_of_middle = middle[0,1]
     if ( split_after.include?(first) && !except_before.include?(first_of_middle) ) || first == '-'
@@ -1552,15 +1591,23 @@ class SpecID::Sequest::PepXML::SearchHit < Array
   end
   def to_pepxml
+    mod_pepxml =
+      if self[18]
+        self[18].to_pepxml
+      else
+        ''
+      end
     element_xml("search_hit", [:hit_rank, :peptide, :peptide_prev_aa, :peptide_next_aa, :protein, :num_tot_proteins, :num_matched_ions, :tot_num_ions, :calc_neutral_pep_mass, :massdiff, :num_tol_term, :num_missed_cleavages, :is_rejected]) do
-      search_scores_xml(:xcorr, :deltacn, :deltacnstar, :spscore, :sprank)
+      mod_pepxml +
+        search_scores_xml(:xcorr, :deltacn, :deltacnstar, :spscore, :sprank)
     end
   end
 end
 # Positions and masses of modifications
-class SpecID::Sequest::PepXML::SearchHit::ModificationInfo
+class Sequest::PepXML::SearchHit::ModificationInfo
   include SpecIDXML
   ## Should be something like this:
@@ -1583,7 +1630,11 @@ class SpecID::Sequest::PepXML::SearchHit::ModificationInfo
   attr_accessor :mod_aminoacid_mass_array
   def initialize(hash=nil)
-    instance_var_set_from_hash(hash)
+    @mod_nterm_mass = nil
+    @mod_cterm_mass = nil
+    if hash
+      instance_var_set_from_hash(hash)
+    end
   end
   # Will escape any xml special chars in modified_peptide
@@ -1621,3 +1672,4 @@ class SpecID::Sequest::PepXML::SearchHit::ModificationInfo
 end