RubyGems - ms-ident - Versions diffs - 0.0.2 - Mend

ms-ident 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

data/.document +5 -0
data/Gemfile +31 -0
data/Gemfile.lock +32 -0
data/LICENSE +61 -0
data/README.rdoc +97 -0
data/Rakefile +54 -0
data/VERSION +1 -0
data/lib/merge.rb +7 -0
data/lib/ms/ident/pepxml/modifications/sequest.rb +237 -0
data/lib/ms/ident/pepxml/modifications.rb +94 -0
data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
data/lib/ms/ident/pepxml/msms_run_summary.rb +81 -0
data/lib/ms/ident/pepxml/parameters.rb +14 -0
data/lib/ms/ident/pepxml/pep_summary.rb +104 -0
data/lib/ms/ident/pepxml/prot_summary.rb +484 -0
data/lib/ms/ident/pepxml/sample_enzyme.rb +166 -0
data/lib/ms/ident/pepxml/search_database.rb +42 -0
data/lib/ms/ident/pepxml/search_hit/modification_info.rb +82 -0
data/lib/ms/ident/pepxml/search_hit.rb +141 -0
data/lib/ms/ident/pepxml/search_result.rb +28 -0
data/lib/ms/ident/pepxml/search_summary.rb +88 -0
data/lib/ms/ident/pepxml/spectrum_query.rb +83 -0
data/lib/ms/ident/pepxml.rb +61 -0
data/lib/ms/ident.rb +11 -0
data/schema/pepXML_v115.xsd +1458 -0
data/schema/pepXML_v19.xsd +1337 -0
data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
data/spec/ms/ident/pepxml_spec.rb +436 -0
data/spec/spec_helper.rb +40 -0
metadata +194 -0

data/spec/ms/ident/pepxml/sample_enzyme_spec.rb ADDED Viewed

@@ -0,0 +1,181 @@
+require 'spec_helper'
+require 'ms/ident/pepxml/sample_enzyme'
+require 'nokogiri'
+describe 'creating an Ms::Ident::Pepxml::SampleEnzyme' do
+  before do
+    @hash = {
+      :name => 'trypsin',
+      :cut => 'KR',
+      :no_cut => 'P',
+      :sense => 'C',
+    }
+  end
+  it 'can be set by a known enzyme name' do
+    se = Ms::Ident::Pepxml::SampleEnzyme.new('trypsin')
+    @hash.each do |k,v|
+      se.send(k).is v
+    end
+  end
+  it 'can be set manually with a hash' do
+    se = Ms::Ident::Pepxml::SampleEnzyme.new(@hash)
+    @hash.each do |k,v|
+      se.send(k).is v
+    end
+  end
+end
+describe 'an Ms::Ident::Pepxml::SampleEnzyme' do
+  before do
+    @sample_enzyme = Ms::Ident::Pepxml::SampleEnzyme.new(:name=>'trypsin',:cut=>'KR',:no_cut=>'P',:sense=>'C')
+  end
+  it 'generates a valid xml fragment' do
+    string = @sample_enzyme.to_xml
+    ok string.is_a?(String)
+    string.matches(/<sample_enzyme name="trypsin"/)
+    string.matches(/<specificity/)
+    %w(cut="KR" no_cut="P" sense="C").each {|re| string.matches(/#{re}/) }
+    ok !string.include?('version')
+  end
+  it 'adds to an xml builder object' do
+    builder = Nokogiri::XML::Builder.new
+    after = @sample_enzyme.to_xml(builder)
+    ok after.is_a?(Nokogiri::XML::Builder)
+    after.is builder
+    ok after.to_xml.is_a?(String)
+  end
+end
+xdescribe 'read in from an xml node' do
+  # placeholder until written
+end
+### DOES this kind of functionality belong in this kind of container????
+### SHOULD it be with ms-enzyme or ms-in_silico  ???????
+=begin
+require 'set'
+describe 'Ms::Ident::Pepxml::SampleEnzyme digesting sequences' do
+  it 'can digest with no missed cleavages' do
+    st = "CRGATKKTAGRPMEK"
+    SampleEnzyme.tryptic(st).should == %w(CR GATK K TAGRPMEK)
+    st = "CATRP"
+    SampleEnzyme.tryptic(st).should == %w(CATRP)
+    st = "RCATRP"
+    SampleEnzyme.tryptic(st).should == %w(R CATRP)
+    st = ""
+    SampleEnzyme.tryptic(st).should == []
+    st = "R"
+    SampleEnzyme.tryptic(st).should == %w(R)
+  end
+  it 'can digest with missed cleavages' do
+    st = "CRGATKKTAGRPMEKLLLERTKY"
+    zero = %w(CR GATK K TAGRPMEK LLLER TK Y)
+    SampleEnzyme.tryptic(st,0).to_set.should == zero.to_set
+    one = %w(CRGATK GATKK KTAGRPMEK TAGRPMEKLLLER LLLERTK TKY)
+    SampleEnzyme.tryptic(st,1).to_set.should == (zero+one).to_set
+    two = %w(CRGATKK GATKKTAGRPMEK KTAGRPMEKLLLER TAGRPMEKLLLERTK LLLERTKY)
+    all = zero + one + two
+    SampleEnzyme.tryptic(st,2).to_set.should == all.to_set
+  end
+  it 'contains duplicates IF there are duplicate tryptic sequences' do
+    st = "AAAAKCCCCKDDDDKCCCCK"
+    peps = SampleEnzyme.new('trypsin').digest(st, 2)
+    peps.select {|aaseq| aaseq == 'CCCCK'}.size.should == 2
+  end
+end
+describe SampleEnzyme, 'making enzyme calculations on sequences and aaseqs' do
+  before(:each) do
+    @full_KRP = SampleEnzyme.new do |se|
+      se.name = 'trypsin'
+      se.cut = 'KR'
+      se.no_cut = 'P'
+      se.sense = 'C'
+    end
+    @just_KR = SampleEnzyme.new do |se|
+      se.name = 'trypsin'
+      se.cut = 'KR'
+      se.no_cut = ''
+      se.sense = 'C'
+    end
+  end
+  it 'calculates the number of tolerant termini' do
+    exp = [{
+      # full KR/P
+      'K.EPTIDR.E' => 2,
+      'K.PEPTIDR.E' => 1,
+      'F.EEPTIDR.E' => 1,
+      'F.PEPTIDW.R' => 0,
+    },
+    {
+      # just KR
+      'K.EPTIDR.E' => 2,
+      'K.PEPTIDR.E' => 2,
+      'F.EEPTIDR.E' => 1,
+      'F.PEPTIDW.R' => 0,
+    }
+    ]
+    scall = Sequest::PepXML::SearchHit
+    sample_enzyme_ar = [@full_KRP, @just_KR]
+    sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
+      hash.each do |seq, val|
+        sample_enzyme.num_tol_term(seq).should == val
+      end
+    end
+  end
+  it 'calculates number of missed cleavages' do
+    exp = [{
+    "EPTIDR" => 0,
+    "PEPTIDR" => 0,
+    "EEPTIDR" => 0,
+    "PEPTIDW" => 0,
+    "PERPTIDW" => 0,
+    "PEPKPTIDW" => 0,
+    "PEPKTIDW" => 1,
+    "RTTIDR" => 1,
+    "RTTIKK" => 2,
+    "PKEPRTIDW" => 2,
+    "PKEPRTIDKP" => 2,
+    "PKEPRAALKPEERPTIDKW" => 3,
+    },
+    {
+    "EPTIDR" => 0,
+    "PEPTIDR" => 0,
+    "EEPTIDR" => 0,
+    "PEPTIDW" => 0,
+    "PERPTIDW" => 1,
+    "PEPKPTIDW" => 1,
+    "PEPKTIDW" => 1,
+    "RTTIDR" => 1,
+    "RTTIKK" => 2,
+    "PKEPRTIDW" => 2,
+    "PKEPRTIDKP" => 3,
+    "PKEPRAALKPEERPTIDKW" => 5,
+    }
+    ]
+    sample_enzyme_ar = [@full_KRP, @just_KR]
+    sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
+      hash.each do |aaseq, val|
+        #first, middle, last = SpecID::Pep.split_sequence(seq)
+        # note that we are only using the middle section!
+        sample_enzyme.num_missed_cleavages(aaseq).should == val
+      end
+    end
+  end
+end
+=end

data/spec/ms/ident/pepxml_spec.rb ADDED Viewed

@@ -0,0 +1,436 @@
+require 'spec_helper'
+require 'ms/mass'
+require 'ms/mass/aa'
+require 'ms/ident/pepxml'
+require 'ms/ident/pepxml/modifications'
+require 'ms/ident/pepxml/spectrum_query'
+require 'ms/ident/pepxml/search_result'
+require 'ms/ident/pepxml/search_hit'
+require 'ms/ident/pepxml/search_hit/modification_info'
+describe "creating an Ms::Ident::Pepxml" do
+  extend Ms::Ident
+  it "can be creating in a nested fashion reflecting internal structure" do
+    pepxml = Pepxml.new do |msms_pipeline_analysis|
+      msms_pipeline_analysis.merge!(:summary_xml => "020.xml") do |msms_run_summary|
+        # prep the sample enzyme and search_summary
+        msms_run_summary.merge!(
+          :base_name => '/home/jtprince/dev/mspire/020',
+          :ms_manufacturer => 'Thermo',
+          :ms_model => 'LTQ Orbitrap',
+          :ms_ionization => 'ESI',
+          :ms_mass_analyzer => 'Ion Trap',
+          :ms_detector => 'UNKNOWN'
+        ) do |sample_enzyme, search_summary, spectrum_queries|
+          sample_enzyme.merge!(:name=>'Trypsin',:cut=>'KR',:no_cut=>'P',:sense=>'C')
+          search_summary.merge!(
+            :base_name=>'/path/to/file/020',
+            :search_engine => 'SEQUEST',
+            :precursor_mass_type =>'monoisotopic',
+            :fragment_mass_type => 'average'
+          ) do |search_database, enzymatic_search_constraint, modifications, parameters|
+            search_database.merge!(:local_path => '/path/to/db.fasta', :seq_type => 'AA') # note seq_type == type
+            enzymatic_search_constraint.merge!(
+              :enzyme => 'Trypsin',
+              :max_num_internal_cleavages => 2,
+              :min_number_termini => 2
+            )
+            modifications << Pepxml::AminoacidModification.new(
+              :aminoacid => 'M', :massdiff => 15.9994, :mass => Ms::Mass::AA::MONO['M']+15.9994,
+              :variable => 'Y', :symbol => '*')
+              # invented, for example, a protein terminating mod
+            modifications << Pepxml::TerminalModification.new(
+              :terminus => 'c', :massdiff => 23.3333, :mass => Ms::Mass::MONO['oh'] + 23.3333,
+              :variable => 'Y', :symbol => '[', :protein_terminus => 'c',
+              :description => 'leave protein_terminus off if not protein mod'
+            )
+            modifications << Pepxml::TerminalModification.new(
+              :terminus => 'c', :massdiff => 25.42322, :mass => Ms::Mass::MONO['h+'] + 25.42322,
+              :variable => 'N', :symbol => ']', :description => 'example: c term mod'
+            )
+            parameters.merge!(
+                              :fragment_ion_tolerance => 1.0000,
+                              :digest_mass_range => '600.0 3500.0',
+                              :enzyme_info => 'Trypsin(KR/P) 1 1 KR P', # etc....
+                             )
+          end
+          spectrum_query1 = Pepxml::SpectrumQuery.new(
+            :spectrum  => '020.3.3.1', :start_scan => 3, :end_scan => 3,
+            :precursor_neutral_mass => 1120.93743421875, :assumed_charge => 1
+          ) do |search_results|
+            search_result1 = Pepxml::SearchResult.new do |search_hits|
+              modpositions = [[1, 243.1559], [6, 167.0581], [7,181.085]].map do |pair|
+                Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(*pair)
+              end
+              # order(modified_peptide, mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
+              # or can be set by hash
+              mod_info = Pepxml::SearchHit::ModificationInfo.new('Y#RLGGS#T#K', modpositions)
+              search_hit1 = Pepxml::SearchHit.new(
+                :hit_rank=>1, :peptide=>'YRLGGSTK', :peptide_prev_aa => "R", :peptide_next_aa => "K",
+                :protein => "gi|16130113|ref|NP_416680.1|", :num_tot_proteins => 1, :num_matched_ions => 5,
+                :tot_num_ions => 35, :calc_neutral_pep_mass => 1120.93163442, :massdiff => 0.00579979875010395,
+                :num_tol_term => 2, :num_missed_cleavages => 1, :is_rejected => 0,
+                :modification_info => mod_info) do |search_scores|
+                  search_scores.merge!(:xcorr => 0.12346, :deltacn => 0.7959, :deltacnstar => 0,
+                                     :spscore => 29.85, :sprank => 1)
+                end
+              search_hits << search_hit1
+            end
+            search_results << search_result1
+          end
+          spectrum_queries << spectrum_query1
+        end
+      end
+    end
+    puts pepxml.to_xml
+    pepxml.to_xml.matches /<msms_pipeline_analysis /
+  end
+end
+=begin
+    # splits string on ' 'and matches the line found by find_line_regexp in
+    # lines
+    def match_modline_pieces(lines, find_line_regexp, string)
+      pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
+      lines.each do |line|
+        if line =~ find_line_regexp
+          pieces.each do |piece|
+            line.should =~ piece
+          end
+        end
+      end
+    end
+    it 'gets modifications right in real run' do
+      @out_files.each do |fn|
+        fn.exist_as_a_file?.should be_true
+        beginning = IO.read(fn)
+        lines = beginning.split("\n")
+        [
+          [/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
+          [/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
+          [/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
+          [/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
+          [/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
+        ].each do |a,b|
+          match_modline_pieces(lines, a, b)
+        end
+        [
+        '<modification_info modified_peptide="Y#RLGGS#T#K">',
+        '<mod_aminoacid_mass position="1" mass="243.1559"/>',
+        '<mod_aminoacid_mass position="7" mass="167.0581"/>',
+        '</modification_info>',
+        '<mod_aminoacid_mass position="9" mass="181.085"/>'
+        ].each do |line|
+          beginning.should =~ /#{Regexp.escape(line)}/ # "a modification info for a peptide")
+        end
+      end
+    end
+  end
+end
+=begin
+describe "Ms::Ident::Pepxml created from small bioworks.xml" do
+  spec_large do
+    before(:all) do
+      tf_mzxml_path = Tfiles_l + "/yeast_gly_mzXML"
+      tf_params = Tfiles + "/bioworks32.params"
+      tf_bioworks_xml = Tfiles + "/bioworks_small.xml"
+      out_path = Tfiles
+      @pepxml_objs = Sequest::Pepxml.set_from_bioworks(tf_bioworks_xml, :params => tf_params, :ms_data => tf_mzxml_path, :out_path => out_path)
+    end
+    it 'gets some spectrum queries' do
+      @pepxml_objs.each do |obj|
+        (obj.spectrum_queries.size > 2).should be_true
+        (obj.spectrum_queries.first.search_results.first.search_hits.size > 0).should be_true
+      end
+      #@pepxml_objs.each do |pep| puts pep.to_pepxml end
+    end
+  end
+end
+describe Sequest::Pepxml, " created from large bioworks.xml" do
+  # assert_equal_by_pairs (really any old array)
+  def assert_equal_pairs(obj, arrs)
+    arrs.each do |arr|
+      #if obj.send(arr[1]) != arr[0]
+      #  puts "HELLO"
+      #  puts "OBJ answer"
+      #  p obj.send(arr[1])
+      #  puts "ar0"
+      #  p arr[0]
+      #  puts "ar1"
+      #  p arr[1]
+      #end
+      if arr[0].is_a? Float
+        obj.send(arr[1]).should be_close(arr[0], 0.0000000001)
+      else
+        obj.send(arr[1]).should == arr[0]
+      end
+    end
+  end
+  #swap the first to guys first
+  def assert_equal_pairs_swapped(obj, arrs)
+    arrs.each do |arr|
+      arr[0], arr[1] = arr[1], arr[0]
+    end
+    assert_equal_pairs(obj, arrs)
+  end
+  spec_large do
+    before(:all) do
+      st = Time.new
+      params = Tfiles + "/opd1/sequest.3.2.params"
+      bioworks_xml = Tfiles_l + "/opd1/bioworks.000.oldparams.xml"
+      mzxml_path = Tfiles_l + "/opd1"
+      out_path = Tfiles
+      @pepxml_version = 18
+      @pepxml_objs = Sequest::Pepxml.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => @pepxml_version})
+      puts "- takes #{Time.new - st} secs"
+    end
+    it 'extracts MSMSPipelineAnalysis' do
+      ######## HMMMMM...
+      Sequest::Pepxml.pepxml_version.should == @pepxml_version
+      # MSMSPipelineAnalysis
+      po = @pepxml_objs.first
+      msms_pipeline = po.msms_pipeline_analysis
+      msms_pipeline.xmlns.should == 'http://regis-web.systemsbiology.net/pepXML'
+      msms_pipeline.xmlns_xsi.should == 'http://www.w3.org/2001/XMLSchema-instance'
+      msms_pipeline.xsi_schema_location.should == 'http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd'
+      msms_pipeline.summary_xml.should == '000.xml'
+    end
+    it 'extracts MSmSRunSummary' do
+      # MSMSRunSummary
+      rs = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary
+      rs.base_name.should =~ /\/000/
+      assert_equal_pairs(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
+    end
+    it 'extracts SampleEnzyme' do
+      # SampleEnzyme
+      se = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.sample_enzyme
+      assert_equal_pairs(se, [ ['Trypsin', :name], ['KR', :cut], [nil, :no_cut], ['C', :sense], ])
+    end
+    it 'extracts SearchSummary' do
+      # SearchSummary
+      ss = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary
+      ss.is_a?(Sequest::Pepxml::SearchSummary).should be_true
+      ss.base_name.should =~ /\/000/
+      ss.peptide_mass_tol.should =~ /1\.500/
+      assert_equal_pairs_swapped(ss, [ # normal attributes
+                                 [:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
+                                 # enzymatic_search_constraint
+                                 [:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
+                                 # parameters
+                                 [:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
+      ])
+    end
+    it 'extracts SearchDatabase' do
+      # SearchDatabase
+      sd = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary.search_database
+      sd.is_a?(Sequest::Pepxml::SearchDatabase).should be_true
+      assert_equal_pairs_swapped(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
+    end
+    it 'returns SpectrumQueries' do
+      # SpectrumQueries
+      sq = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.spectrum_queries
+      spec = sq.first
+      assert_equal_pairs_swapped(spec, [
+                                 [:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
+                                 #[:precursor_neutral_mass, "1074.5920"], # out2summary
+                                 [:precursor_neutral_mass, 1074.666926], # mine
+                                 [:assumed_charge, 1], [:index, "1"],
+      ])
+      sh = spec.search_results.first.search_hits.first
+      assert_equal_pairs_swapped(sh, [
+                                 # normal attributes
+                                 [:hit_rank, 1],
+                                 [:peptide, "SIYFRNFK"],
+                                 [:peptide_prev_aa, "R"],
+                                 [:peptide_next_aa, "G"],
+                                 [:protein, "gi|16130084|ref|NP_416651.1|"],
+                                 [:num_tot_proteins, 1],
+                                 [:num_matched_ions, 4],
+                                 [:tot_num_ions, 14],
+                                 #[:calc_neutral_pep_mass, "1074.1920"], # out2summary
+                                 [:calc_neutral_pep_mass, 1074.23261], # mine
+                                 #[:massdiff, "+0.400000"], # out2summary
+                                 [:massdiff, 0.434316000000081],  # mine
+                                 [:num_tol_term, 2], [:num_missed_cleavages, 1], [:is_rejected, 0],
+                                 # search_score
+                                 [:xcorr, 0.4], [:deltacn, 0.023], [:deltacnstar, "0"], [:spscore, 78.8], [:sprank, 1],
+      ])
+      spec = sq[1]
+      assert_equal_pairs_swapped(spec, [
+                                 [:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
+                                 [:precursor_neutral_mass, 663.206111], # mine
+                                 [:assumed_charge, 1], [:index, "2"],
+      ])
+      sh = spec.search_results.first.search_hits.first
+      assert_equal_pairs_swapped(sh, [
+                                 # normal attributes
+                                 [:hit_rank, 1], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 10],
+                                 [:num_tol_term, 2], [:num_missed_cleavages, 0], [:is_rejected, 0],
+                                 #[:massdiff, "-0.600000"], # out2summary
+                                 [:massdiff, -0.556499000000031],  # mine
+                                 #[:calc_neutral_pep_mass, 663.7920], # out2summary
+                                 [:calc_neutral_pep_mass, 663.76261], # mine
+                                 # search_score
+                                 [:xcorr, 0.965], [:deltacn, 0.132], [:deltacnstar, "0"], [:spscore, 81.1], [:sprank, 1],
+      ])
+      spec = sq[9]
+      assert_equal_pairs_swapped(spec, [
+                                 [:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, 2],
+                                 #[:precursor_neutral_mass, "691.0920"], # out2summary
+                                 [:precursor_neutral_mass, 691.150992], # mine
+      ])
+      sh = spec.search_results.first.search_hits.first
+      assert_equal_pairs_swapped(sh, [
+                                 # normal attributes
+                                 [:hit_rank, 1], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 8], [:num_tol_term, 2],
+                                 #[:num_missed_cleavages, "0"],  # out2summary misses this!
+                                 [:num_missed_cleavages, 1],
+                                 [:is_rejected, 0],
+                                 #[:calc_neutral_pep_mass, "691.7920"], # out2summary
+                                 [:calc_neutral_pep_mass, 691.82261], # mine
+                                 #[:massdiff, "-0.700000"], # out2summary
+                                 [:massdiff, -0.67161800000008],  # mine
+                                 # search_score
+                                 [:xcorr, 0.903], [:deltacn, 0.333], [:deltacnstar, "0"], [:spscore, 172.8], [:sprank, 1],
+      ])
+    end
+    it 'can generate correct pepxml file' do
+      ## IF OUR OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
+      string = @pepxml_objs.first.to_pepxml
+      ans_lines = IO.read(Tfiles + "/opd1/000.my_answer.100lines.xml").split("\n")
+      base_name_re = /base_name=".*?files\//o
+      date_re = /date=".*?"/
+      string.split("\n").each_with_index do |line,i|
+        if i > 99 ; break end
+        ans, exp =
+          if i == 1
+            [line.sub(date_re,''), ans_lines[i].sub(date_re,'')]
+          elsif i == 2
+            [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t")]
+          elsif i == 6
+            [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t\t")]
+          else
+            [line, ans_lines[i]]
+          end
+        #ans.split('').zip(exp.split('')) do |l,a|
+        #  if l != a
+        #    puts line
+        #    puts ans_lines[i]
+        #    puts l
+        #    puts a
+        #  end
+        #end
+        if ans != exp
+          puts ans
+          puts exp
+        end
+        ans.should == exp
+        #line.sub(base_name_re,'').should == ans_lines[i].sub(base_name_re,'')
+      end
+    end
+  end
+end
+describe Sequest::Pepxml::Modifications do
+  before(:each) do
+    tf_params = Tfiles + "/bioworks32.params"
+    @params = Sequest::Params.new(tf_params)
+    # The params object here is completely unnecessary for this test, except
+    # that it sets up the mass table
+    @obj = Sequest::Pepxml::Modifications.new(@params, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
+  end
+  it 'creates a mod_symbols_hash' do
+    answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
+    @obj.mod_symbols_hash.should == answ
+    ## need more here
+  end
+  it 'creates a ModificationInfo object given a special peptide sequence' do
+    mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
+    @params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
+    @params.term_diff_search_options = "14.20000 12.33000"
+    mod = Sequest::Pepxml::Modifications.new(@params, mod_string)
+    ## no mods
+    peptide = "PEPTIDE"
+    mod.modification_info(peptide).should be_nil
+    peptide = "]M*EC^S@IDM#M*EMSCM["
+    modinfo = mod.modification_info(peptide)
+    modinfo.modified_peptide.should == peptide
+    modinfo.mod_nterm_mass.should be_close(146.40054, 0.000001)
+    modinfo.mod_cterm_mass.should be_close(160.52994, 0.000001)
+  end
+end
+describe Sequest::Pepxml::SearchHit::ModificationInfo do
+  before(:each) do
+    modaaobjs = [[3, 150.3], [6, 345.2]].map do |ar|
+      Sequest::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(ar)
+    end
+    hash = {
+      :mod_nterm_mass => 520.2,
+      :modified_peptide => "MOD*IFI^E&D",
+      :mod_aminoacid_masses => modaaobjs,
+    }
+    #answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
+    @obj = Sequest::Pepxml::SearchHit::ModificationInfo.new(hash)
+  end
+  def _re(st)
+    /#{Regexp.escape(st)}/
+  end
+  it 'can produce pepxml' do
+    answ = @obj.to_pepxml
+    answ.should =~ _re('<modification_info')
+    answ.should =~ _re(" mod_nterm_mass=\"520.2\"")
+    answ.should =~ _re(" modified_peptide=\"MOD*IFI^E&amp;D\"")
+    answ.should =~ _re("<mod_aminoacid_mass")
+    answ.should =~ _re(" position=\"3\"")
+    answ.should =~ _re(" mass=\"150.3\"")
+    answ.should =~ _re(" position=\"6\"")
+    answ.should =~ _re(" mass=\"345.2\"")
+    answ.should =~ _re("</modification_info>")
+  end
+end
+=end

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,40 @@
+require 'rubygems'
+require 'bundler'
+$spec_large = ENV['SPEC_LARGE']
+development = $spec_large ? :development_large : :development
+begin
+  Bundler.setup(:default, development)
+rescue Bundler::BundlerError => e
+  $stderr.puts e.message
+  $stderr.puts "Run `bundle install` to install missing gems"
+  exit e.status_code
+end
+require 'spec/more'
+load_testdata = lambda do
+  require 'ms/testdata'
+  SEQUEST_DIR = Ms::TESTDATA + '/sequest'
+end
+load_testdata.call if $spec_large
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
+Bacon.summary_on_exit
+def spec_large(&block)
+  if $spec_large
+    block.call
+  else
+    # Requires SPEC_LARGE=true and tfiles_large dir for testing large test files
+    it 'SKIPPING (not testing large files)' do
+    end
+  end
+end
+TESTFILES = File.dirname(__FILE__) + '/tfiles'