ms-sequest 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,7 @@ require 'ms/sequest/params'
17
17
  module Ms ; end
18
18
  module Ms::Sequest ; end
19
19
 
20
+
20
21
  class Ms::Sequest::Srf
21
22
 
22
23
  class NoSequestParamsError < ArgumentError
@@ -350,8 +351,8 @@ class Ms::Sequest::Srf::Header
350
351
  :modifications => 456,
351
352
  }
352
353
 
353
- # a Ms::Sequest::Srf::DTAGen object
354
354
  attr_accessor :version
355
+ # a Ms::Sequest::Srf::DTAGen object
355
356
  attr_accessor :dta_gen
356
357
  attr_accessor :enzyme
357
358
  attr_accessor :ion_series
@@ -732,8 +733,8 @@ end
732
733
  class Ms::Sequest::SrfGroup
733
734
  include Ms::Id::SearchGroup
734
735
 
735
- # inherets an array of Ms::Sequest::Srf::Out::Pep objects
736
- # inherets an array of Ms::Sequest::Srf::Out::Prot objects
736
+ # inherits an array of Ms::Sequest::Srf::Out::Pep objects
737
+ # inherits an array of Ms::Sequest::Srf::Out::Prot objects
737
738
 
738
739
  # see Ms::Id::Search for acceptable arguments
739
740
  # (filename, filenames, array of objects)
@@ -44,7 +44,7 @@ module Ms
44
44
  # :zip requires gem rubyzip to be installed and is *very* bloated
45
45
  # as it writes out all the files first!
46
46
  # :tgz requires gem archive-tar-minitar to be installed
47
- def to_dta_files(out_folder=nil, compress=nil)
47
+ def to_dta(out_folder=nil, compress=nil)
48
48
  outdir =
49
49
  if out_folder ; out_folder
50
50
  else base_name
@@ -27,7 +27,7 @@ module Ms
27
27
  when 'mgf'
28
28
  srf.to_mgf(newfile)
29
29
  when 'dta'
30
- srf.to_dta_files(newfile)
30
+ srf.to_dta(newfile)
31
31
  end
32
32
  end
33
33
  end
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/ruby
2
+
3
+ if ARGV.size == 0
4
+ puts "usage: #{File.basename(__FILE__)} <file>.fasta ..."
5
+ puts "outputs: <file>_NCBI.fasta ..."
6
+ puts ""
7
+ puts "(Bioworks 3.3.1 [maybe others] does not seem to read an IPI"
8
+ puts "formatted fasta database header lines. This will change an"
9
+ puts "IPI format to an NCBI style format that Bioworks can read."
10
+ exit
11
+ end
12
+
13
+ ARGV.each do |file|
14
+ base = file.chomp(File.extname(file))
15
+ outfile = base + '_NCBI' + ".fasta"
16
+ File.open(outfile, 'w') do |out|
17
+ IO.foreach(file) do |line|
18
+ if line =~ /^>/
19
+ (codes, *description) = line[1..-1].split(" ")
20
+ description = description.join(" ")
21
+ code_section = codes.split('|').map {|code| (key, val) = code.split(':') ; "#{key}|#{val}|" }.join
22
+ out.puts ">#{code_section} #{description}"
23
+ else
24
+ out.print line
25
+ end
26
+ end
27
+ end
28
+ end
29
+
@@ -0,0 +1,153 @@
1
+
2
+ # TODO work on this guy!
3
+ =begin
4
+
5
+ require File.expand_path( File.dirname(__FILE__) + '/../tap_spec_helper' )
6
+
7
+ require 'spec_id'
8
+ require 'spec_id/bioworks'
9
+ #require 'benchmark'
10
+
11
+ describe Bioworks, 'set from an xml file' do
12
+ # NEED TO DEBUG THIS PROB!
13
+ it 'can set one with labeled proteins' do
14
+ file = Tfiles + "/bioworks_with_INV_small.xml"
15
+ obj = Bioworks.new(file)
16
+ obj.prots.size.should == 19
17
+ file = Tfiles + '/bioworks_small.xml'
18
+ obj = Bioworks.new(file)
19
+ obj.prots.size.should == 106
20
+ end
21
+
22
+ it 'can parse an xml file NOT derived from multi-concensus' do
23
+ tf_bioworks_single_xml_small = Tfiles + '/bioworks_single_run_small.xml'
24
+ obj = Bioworks.new(tf_bioworks_single_xml_small)
25
+ gfn = '5prot_mix_michrom_20fmol_200pmol'
26
+ origfilename = '5prot_mix_michrom_20fmol_200pmol.RAW'
27
+ origfilepath = 'C:\Xcalibur\sequest'
28
+ obj.global_filename.should == gfn
29
+ obj.origfilename.should == origfilename
30
+ obj.origfilepath.should == origfilepath
31
+ obj.prots.size.should == 7
32
+ obj.prots.first.peps.first.base_name.should == gfn
33
+ obj.prots.first.peps.first.file.should == "152"
34
+ obj.prots.first.peps.first.charge.should == 2
35
+ # @TODO: add more tests here
36
+ end
37
+
38
+ it 'can output in excel format (**semi-verified right now)' do
39
+ tf_bioworks_to_excel = Tfiles + '/tf_bioworks2excel.bioXML'
40
+ tf_bioworks_to_excel_actual = Tfiles + '/tf_bioworks2excel.txt.actual'
41
+ tmpfile = Tfiles + "/tf_bioworks_to_excel.tmp"
42
+ bio = Bioworks.new(tf_bioworks_to_excel)
43
+ bio.to_excel(tmpfile)
44
+ tmpfile.exist_as_a_file?.should be_true
45
+ #File.should exist_as_a_file(tmpfile)
46
+ exp = _arr_of_arrs(tf_bioworks_to_excel_actual)
47
+ act = _arr_of_arrs(tmpfile)
48
+ exp.each_index do |i|
49
+ break if i == 23 ## this is where the ordering becomes arbitrary between guys with the same scans, but different filenames
50
+ _assert_equal_pieces(exp[i], act[i], exp[i][0] =~ /\d/)
51
+ end
52
+
53
+ File.unlink tmpfile
54
+ end
55
+
56
+ # prot is boolean if this is a protein line!
57
+ def _assert_equal_pieces(exp, act, prot)
58
+ # equal as floats (by delta)
59
+ exp.each_index do |i|
60
+ if i == 5 # both prots and peps
61
+ act[i].to_f.should be_close(exp[i].to_f, 0.1)
62
+ elsif i == 3 && !prot
63
+ act[i].to_f.should be_close(exp[i].to_f, 0.01)
64
+ elsif i == 6 && !prot
65
+ act[i].to_f.should be_close(exp[i].to_f, 0.01)
66
+ elsif i == 9 && prot
67
+ ## NEED TO GET THESE BACK (for consistency):
68
+ #act[i].split(" ")[0].should =~ exp[i].split(" ")[0]
69
+ else
70
+ ## NEED TO GET THESE BACK (for consistency):
71
+ #act[i].should == exp[i]
72
+ end
73
+ end
74
+ end
75
+
76
+ # takes a bioworks excel (in txt format) and outputs an arr of arrs
77
+ def _arr_of_arrs(file)
78
+ IO.readlines(file).collect do |line|
79
+ line.chomp!
80
+ line.split("\t")
81
+ end
82
+ end
83
+
84
+ it 'can return unique peptides and proteins by sequence+charge (private)' do
85
+ cnt = 0
86
+ answer = [%w(2 PEPTIDE), %w(3 PEPTIDE), %w(3 PEPY), %w(2 PEPY)]
87
+ exp_peps = answer.collect! do |arr|
88
+ pep = Bioworks::Pep.new
89
+ pep.charge = arr[0]
90
+ pep.sequence = arr[1]
91
+ pep
92
+ end
93
+ exp_prots = [[0,2],[1,4,5],[3],[6]].collect do |arr|
94
+ arr.collect do |num|
95
+ prot = Bioworks::Prot.new
96
+ prot.reference = "#{num}"
97
+ prot
98
+ end
99
+ end
100
+ exp_peps = exp_peps.zip(exp_prots)
101
+ exp_peps.collect! do |both|
102
+ both[0].prots = [both[1]]
103
+ both[0]
104
+ end
105
+
106
+ peptides = [%w(2 PEPTIDE), %w(3 PEPTIDE), %w(2 PEPTIDE), %w(3 PEPY), %w(3 PEPTIDE), %w(3 PEPTIDE), %w(2 PEPY)].collect do |arr|
107
+ pep = Bioworks::Pep.new
108
+ pep.charge = arr[0]
109
+ pep.sequence = arr[1]
110
+ pep.prots = [Bioworks::Prot.new]
111
+ pep.prots.first.reference = "#{cnt}"
112
+ cnt += 1
113
+ pep
114
+ end
115
+ peptides, proteins = Bioworks.new._uniq_peps_by_sequence_charge(peptides)
116
+ proteins.size.should == peptides.size
117
+ exp_peps.each_with_index do |pep, i|
118
+ peptides[i].charge.should == pep.charge
119
+ peptides[i].sequence.should == pep.sequence
120
+ end
121
+
122
+ exp_prots.each_index do |i|
123
+ exp_prots[i].each_index do |j|
124
+ proteins[i][j].reference.should == exp_prots[i][j].reference
125
+ end
126
+ end
127
+ end
128
+
129
+ end
130
+
131
+ describe Bioworks::Pep do
132
+ it 'can be initialized from a hash' do
133
+ hash = {:sequence => 0, :mass => 1, :deltamass => 2, :charge => 3, :xcorr => 4, :deltacn => 5, :sp => 6, :rsp => 7, :ions => 8, :count => 9, :tic => 10, :prots => 11, :base_name => 12, :first_scan => 13, :last_scan => 14, :peptide_probability => 15, :file => 16, :_num_prots => 17, :_first_prot => 18}
134
+ pep = Bioworks::Pep.new(hash)
135
+ hash.each do |k,v|
136
+ pep.send(k).should == v
137
+ end
138
+ end
139
+
140
+ it 'correctly extracts file information' do
141
+ pep = Bioworks::Pep.new
142
+ testing = ['005a, 1131', '005b, 1131 - 1133', '1131', '1131 - 1133']
143
+ answers = [%w(005a 1131 1131), %w(005b 1131 1133), [nil, '1131', '1131'], [nil, '1131', '1133']]
144
+ testing.zip(answers) do |ar|
145
+ ans = pep.class.extract_file_info(ar[0])
146
+ ans.join(" ").should == ar[1].join(" ")
147
+ end
148
+ end
149
+
150
+ end
151
+
152
+
153
+ =end
@@ -0,0 +1,131 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
2
+
3
+ require 'ms/sequest/params'
4
+
5
+ # returns a hash of all params
6
+ def simple_parse(filename)
7
+ hash = {}
8
+ IO.read(filename).split(/\r?\n/).select {|v| v =~ /^[a-z]/}.each do |line|
9
+ if line =~ /([^\s]+)\s*=\s*([^;]+)\s*;?/
10
+ hash[$1.dup] = $2.rstrip
11
+ end
12
+ end
13
+ hash
14
+ end
15
+
16
+ shared 'sequest params' do
17
+ before do
18
+ @obj = Ms::Sequest::Params.new(@file)
19
+ end
20
+
21
+ it 'has a method for every parameter in the file' do
22
+ hash = simple_parse(@file)
23
+ hash.each do |k,v|
24
+ @obj.send(k.to_sym).is v
25
+ end
26
+ end
27
+
28
+ it 'returns zero length string for params with no information' do
29
+ @obj.second_database_name.is ""
30
+ @obj.sequence_header_filter.is ""
31
+ end
32
+
33
+ it 'returns nil for params that do not exist and have no translation' do
34
+ @obj.google_plex.is nil
35
+ end
36
+
37
+ it 'provides consistent API between versions for important info' do
38
+ message = capture_stderr do
39
+ @api_hash.each do |k,v|
40
+ @obj.send(k).is v
41
+ end
42
+ end
43
+ end
44
+
45
+ it 'provides some backwards compatibility' do
46
+ @backwards_hash.each do |k,v|
47
+ @obj.send(k).is v
48
+ end
49
+ end
50
+
51
+ end
52
+
53
+ describe 'sequest params v 3.1' do
54
+
55
+ @file = TESTFILES + '/bioworks31.params'
56
+ @api_hash = {
57
+ :version => '3.1',
58
+ :enzyme => 'Trypsin',
59
+ :database => "C:\\Xcalibur\\database\\ecoli_K12.fasta",
60
+ :enzyme_specificity => [1, 'KR', ''],
61
+ :precursor_mass_type => "average",
62
+ :fragment_mass_type => "average",
63
+ :min_number_termini => '1',
64
+ }
65
+
66
+ @backwards_hash = {
67
+ :max_num_internal_cleavages => '2',
68
+ :fragment_ion_tol => '0.0000',
69
+ }
70
+
71
+ behaves_like 'sequest params'
72
+ end
73
+
74
+ describe 'sequest params v 3.2' do
75
+ @file = TESTFILES + '/bioworks32.params'
76
+ @api_hash = {
77
+ :version => '3.2',
78
+ :enzyme => 'Trypsin',
79
+ :database => "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
80
+ :enzyme_specificity => [1, 'KR', 'P'],
81
+ :precursor_mass_type => "average",
82
+ :fragment_mass_type => "average",
83
+ :min_number_termini => '2',
84
+ }
85
+
86
+ @backwards_hash = {
87
+ :max_num_internal_cleavages => '2',
88
+ :fragment_ion_tol => '1.0000',
89
+ }
90
+
91
+ behaves_like 'sequest params'
92
+ end
93
+
94
+ describe 'sequest params v 3.3' do
95
+ @file = TESTFILES + '/bioworks33.params'
96
+ @api_hash = {
97
+ :version => '3.3',
98
+ :enzyme => 'Trypsin',
99
+ :database => "C:\\Xcalibur\\database\\yeast.fasta",
100
+ :enzyme_specificity => [1, 'KR', ''],
101
+ :precursor_mass_type => "monoisotopic",
102
+ :fragment_mass_type => "monoisotopic",
103
+ :min_number_termini => '2',
104
+ }
105
+
106
+ @backwards_hash = {
107
+ :max_num_internal_cleavages => '2',
108
+ :fragment_ion_tol => '1.0000',
109
+ }
110
+ behaves_like 'sequest params'
111
+ end
112
+
113
+ describe 'sequest params v 3.2 from srf' do
114
+ @file = TESTFILES + '/7MIX_STD_110802_1.sequest_params_fragment.srf'
115
+ @api_hash = {
116
+ :version => '3.2',
117
+ :enzyme => 'Trypsin',
118
+ :database => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
119
+ :enzyme_specificity => [1, 'KR', 'P'],
120
+ :precursor_mass_type => "average",
121
+ :fragment_mass_type => "average",
122
+ :min_number_termini => '2',
123
+ }
124
+
125
+ @backwards_hash = {
126
+ :max_num_internal_cleavages => '2',
127
+ :fragment_ion_tol => '1.0000',
128
+ }
129
+ behaves_like 'sequest params'
130
+ end
131
+
@@ -0,0 +1,376 @@
1
+
2
+ =begin
3
+ require File.expand_path( File.dirname(__FILE__) + '/../../spec_helper' )
4
+
5
+ require 'spec_id'
6
+ require 'spec_id/sequest/pepxml'
7
+ #require 'ms/mzxml'
8
+
9
+
10
+ NODELETE = false
11
+
12
+ describe Sequest::PepXML, " created from small bioworks.xml" do
13
+
14
+ spec_large do
15
+ before(:all) do
16
+ tf_mzxml_path = Tfiles_l + "/yeast_gly_mzXML"
17
+
18
+ tf_params = Tfiles + "/bioworks32.params"
19
+ tf_bioworks_xml = Tfiles + "/bioworks_small.xml"
20
+ out_path = Tfiles
21
+ @pepxml_objs = Sequest::PepXML.set_from_bioworks(tf_bioworks_xml, :params => tf_params, :ms_data => tf_mzxml_path, :out_path => out_path)
22
+ end
23
+
24
+ it 'gets some spectrum queries' do
25
+ @pepxml_objs.each do |obj|
26
+ (obj.spectrum_queries.size > 2).should be_true
27
+ (obj.spectrum_queries.first.search_results.first.search_hits.size > 0).should be_true
28
+ end
29
+ #@pepxml_objs.each do |pep| puts pep.to_pepxml end
30
+ end
31
+ end
32
+ end
33
+
34
+
35
+
36
+ describe Sequest::PepXML, " created from large bioworks.xml" do
37
+ # assert_equal_by_pairs (really any old array)
38
+ def assert_equal_pairs(obj, arrs)
39
+ arrs.each do |arr|
40
+ #if obj.send(arr[1]) != arr[0]
41
+ # puts "HELLO"
42
+ # puts "OBJ answer"
43
+ # p obj.send(arr[1])
44
+ # puts "ar0"
45
+ # p arr[0]
46
+ # puts "ar1"
47
+ # p arr[1]
48
+ #end
49
+ if arr[0].is_a? Float
50
+ obj.send(arr[1]).should be_close(arr[0], 0.0000000001)
51
+ else
52
+ obj.send(arr[1]).should == arr[0]
53
+ end
54
+ end
55
+ end
56
+
57
+ #swap the first to guys first
58
+ def assert_equal_pairs_swapped(obj, arrs)
59
+ arrs.each do |arr|
60
+ arr[0], arr[1] = arr[1], arr[0]
61
+ end
62
+ assert_equal_pairs(obj, arrs)
63
+ end
64
+
65
+ spec_large do
66
+ before(:all) do
67
+ st = Time.new
68
+ params = Tfiles + "/opd1/sequest.3.2.params"
69
+ bioworks_xml = Tfiles_l + "/opd1/bioworks.000.oldparams.xml"
70
+ mzxml_path = Tfiles_l + "/opd1"
71
+ out_path = Tfiles
72
+ @pepxml_version = 18
73
+ @pepxml_objs = Sequest::PepXML.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => @pepxml_version})
74
+ puts "- takes #{Time.new - st} secs"
75
+ end
76
+
77
+ it 'extracts MSMSPipelineAnalysis' do
78
+ ######## HMMMMM...
79
+ Sequest::PepXML.pepxml_version.should == @pepxml_version
80
+
81
+ # MSMSPipelineAnalysis
82
+ po = @pepxml_objs.first
83
+ msms_pipeline = po.msms_pipeline_analysis
84
+ msms_pipeline.xmlns.should == 'http://regis-web.systemsbiology.net/pepXML'
85
+ msms_pipeline.xmlns_xsi.should == 'http://www.w3.org/2001/XMLSchema-instance'
86
+ msms_pipeline.xsi_schema_location.should == 'http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd'
87
+ msms_pipeline.summary_xml.should == '000.xml'
88
+ end
89
+
90
+ it 'extracts MSmSRunSummary' do
91
+ # MSMSRunSummary
92
+ rs = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary
93
+ rs.base_name.should =~ /\/000/
94
+ assert_equal_pairs(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
95
+ end
96
+
97
+ it 'extracts SampleEnzyme' do
98
+ # SampleEnzyme
99
+ se = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.sample_enzyme
100
+ assert_equal_pairs(se, [ ['Trypsin', :name], ['KR', :cut], [nil, :no_cut], ['C', :sense], ])
101
+ end
102
+
103
+ it 'extracts SearchSummary' do
104
+ # SearchSummary
105
+ ss = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary
106
+ ss.is_a?(Sequest::PepXML::SearchSummary).should be_true
107
+ ss.base_name.should =~ /\/000/
108
+ ss.peptide_mass_tol.should =~ /1\.500/
109
+ assert_equal_pairs_swapped(ss, [ # normal attributes
110
+ [:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
111
+
112
+ # enzymatic_search_constraint
113
+ [:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
114
+
115
+ # parameters
116
+ [:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
117
+ ])
118
+
119
+ end
120
+ it 'extracts SearchDatabase' do
121
+ # SearchDatabase
122
+ sd = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary.search_database
123
+ sd.is_a?(Sequest::PepXML::SearchDatabase).should be_true
124
+ assert_equal_pairs_swapped(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
125
+ end
126
+
127
+ it 'returns SpectrumQueries' do
128
+ # SpectrumQueries
129
+ sq = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.spectrum_queries
130
+ spec = sq.first
131
+ assert_equal_pairs_swapped(spec, [
132
+ [:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
133
+ #[:precursor_neutral_mass, "1074.5920"], # out2summary
134
+ [:precursor_neutral_mass, 1074.666926], # mine
135
+ [:assumed_charge, 1], [:index, "1"],
136
+ ])
137
+ sh = spec.search_results.first.search_hits.first
138
+ assert_equal_pairs_swapped(sh, [
139
+ # normal attributes
140
+ [:hit_rank, 1],
141
+ [:peptide, "SIYFRNFK"],
142
+ [:peptide_prev_aa, "R"],
143
+ [:peptide_next_aa, "G"],
144
+ [:protein, "gi|16130084|ref|NP_416651.1|"],
145
+ [:num_tot_proteins, 1],
146
+ [:num_matched_ions, 4],
147
+ [:tot_num_ions, 14],
148
+ #[:calc_neutral_pep_mass, "1074.1920"], # out2summary
149
+ [:calc_neutral_pep_mass, 1074.23261], # mine
150
+ #[:massdiff, "+0.400000"], # out2summary
151
+ [:massdiff, 0.434316000000081], # mine
152
+ [:num_tol_term, 2], [:num_missed_cleavages, 1], [:is_rejected, 0],
153
+
154
+ # search_score
155
+ [:xcorr, 0.4], [:deltacn, 0.023], [:deltacnstar, "0"], [:spscore, 78.8], [:sprank, 1],
156
+ ])
157
+
158
+ spec = sq[1]
159
+ assert_equal_pairs_swapped(spec, [
160
+ [:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
161
+ [:precursor_neutral_mass, 663.206111], # mine
162
+ [:assumed_charge, 1], [:index, "2"],
163
+ ])
164
+
165
+ sh = spec.search_results.first.search_hits.first
166
+ assert_equal_pairs_swapped(sh, [
167
+ # normal attributes
168
+ [:hit_rank, 1], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 10],
169
+ [:num_tol_term, 2], [:num_missed_cleavages, 0], [:is_rejected, 0],
170
+ #[:massdiff, "-0.600000"], # out2summary
171
+ [:massdiff, -0.556499000000031], # mine
172
+ #[:calc_neutral_pep_mass, 663.7920], # out2summary
173
+ [:calc_neutral_pep_mass, 663.76261], # mine
174
+
175
+ # search_score
176
+ [:xcorr, 0.965], [:deltacn, 0.132], [:deltacnstar, "0"], [:spscore, 81.1], [:sprank, 1],
177
+ ])
178
+
179
+ spec = sq[9]
180
+ assert_equal_pairs_swapped(spec, [
181
+ [:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, 2],
182
+ #[:precursor_neutral_mass, "691.0920"], # out2summary
183
+ [:precursor_neutral_mass, 691.150992], # mine
184
+ ])
185
+
186
+ sh = spec.search_results.first.search_hits.first
187
+ assert_equal_pairs_swapped(sh, [
188
+ # normal attributes
189
+ [:hit_rank, 1], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 8], [:num_tol_term, 2],
190
+
191
+ #[:num_missed_cleavages, "0"], # out2summary misses this!
192
+ [:num_missed_cleavages, 1],
193
+ [:is_rejected, 0],
194
+ #[:calc_neutral_pep_mass, "691.7920"], # out2summary
195
+ [:calc_neutral_pep_mass, 691.82261], # mine
196
+ #[:massdiff, "-0.700000"], # out2summary
197
+ [:massdiff, -0.67161800000008], # mine
198
+
199
+ # search_score
200
+ [:xcorr, 0.903], [:deltacn, 0.333], [:deltacnstar, "0"], [:spscore, 172.8], [:sprank, 1],
201
+ ])
202
+ end
203
+
204
+ it 'can generate correct pepxml file' do
205
+
206
+ ## IF OUR OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
207
+ string = @pepxml_objs.first.to_pepxml
208
+ ans_lines = IO.read(Tfiles + "/opd1/000.my_answer.100lines.xml").split("\n")
209
+ base_name_re = /base_name=".*?files\//o
210
+ date_re = /date=".*?"/
211
+ string.split("\n").each_with_index do |line,i|
212
+ if i > 99 ; break end
213
+ ans, exp =
214
+ if i == 1
215
+ [line.sub(date_re,''), ans_lines[i].sub(date_re,'')]
216
+ elsif i == 2
217
+ [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t")]
218
+ elsif i == 6
219
+ [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t\t")]
220
+ else
221
+ [line, ans_lines[i]]
222
+ end
223
+
224
+ #ans.split('').zip(exp.split('')) do |l,a|
225
+ # if l != a
226
+ # puts line
227
+ # puts ans_lines[i]
228
+ # puts l
229
+ # puts a
230
+ # end
231
+ #end
232
+ if ans != exp
233
+ puts ans
234
+ puts exp
235
+ end
236
+ ans.should == exp
237
+ #line.sub(base_name_re,'').should == ans_lines[i].sub(base_name_re,'')
238
+ end
239
+ end
240
+ end
241
+ end
242
+
243
+
244
+
245
+ describe Sequest::PepXML::Modifications do
246
+ before(:each) do
247
+ tf_params = Tfiles + "/bioworks32.params"
248
+ @params = Sequest::Params.new(tf_params)
249
+ # The params object here is completely unnecessary for this test, except
250
+ # that it sets up the mass table
251
+ @obj = Sequest::PepXML::Modifications.new(@params, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
252
+ end
253
+ it 'creates a mod_symbols_hash' do
254
+ answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
255
+ @obj.mod_symbols_hash.should == answ
256
+ ## need more here
257
+ end
258
+
259
+ it 'creates a ModificationInfo object given a special peptide sequence' do
260
+ mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
261
+ @params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
262
+ @params.term_diff_search_options = "14.20000 12.33000"
263
+ mod = Sequest::PepXML::Modifications.new(@params, mod_string)
264
+ ## no mods
265
+ peptide = "PEPTIDE"
266
+ mod.modification_info(peptide).should be_nil
267
+ peptide = "]M*EC^S@IDM#M*EMSCM["
268
+ modinfo = mod.modification_info(peptide)
269
+ modinfo.modified_peptide.should == peptide
270
+ modinfo.mod_nterm_mass.should be_close(146.40054, 0.000001)
271
+ modinfo.mod_cterm_mass.should be_close(160.52994, 0.000001)
272
+ end
273
+
274
+ end
275
+
276
+ describe Sequest::PepXML::SearchHit::ModificationInfo do
277
+
278
+ before(:each) do
279
+ modaaobjs = [[3, 150.3], [6, 345.2]].map do |ar|
280
+ Sequest::PepXML::SearchHit::ModificationInfo::ModAminoacidMass.new(ar)
281
+ end
282
+ hash = {
283
+ :mod_nterm_mass => 520.2,
284
+ :modified_peptide => "MOD*IFI^E&D",
285
+ :mod_aminoacid_masses => modaaobjs,
286
+ }
287
+ #answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
288
+ @obj = Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
289
+ end
290
+
291
+ def _re(st)
292
+ /#{Regexp.escape(st)}/
293
+ end
294
+
295
+ it 'can produce pepxml' do
296
+ answ = @obj.to_pepxml
297
+ answ.should =~ _re('<modification_info')
298
+ answ.should =~ _re(" mod_nterm_mass=\"520.2\"")
299
+ answ.should =~ _re(" modified_peptide=\"MOD*IFI^E&amp;D\"")
300
+ answ.should =~ _re("<mod_aminoacid_mass")
301
+ answ.should =~ _re(" position=\"3\"")
302
+ answ.should =~ _re(" mass=\"150.3\"")
303
+ answ.should =~ _re(" position=\"6\"")
304
+ answ.should =~ _re(" mass=\"345.2\"")
305
+ answ.should =~ _re("</modification_info>")
306
+ end
307
+ end
308
+
309
+ describe 'bioworks file with modifications transformed into pepxml' do
310
+
311
+ spec_large do
312
+ before(:all) do
313
+ modfiles_sequest_dir = Tfiles_l + '/opd1_2runs_2mods/sequest33/'
314
+ modfiles_data_dir = Tfiles_l + '/opd1_2runs_2mods/data/'
315
+ @srgfile = modfiles_sequest_dir + 'tmp.srg'
316
+ @out_path = modfiles_sequest_dir + 'pepxml'
317
+ modfiles = %w(020 040).map do |file|
318
+ modfiles_sequest_dir + file + ".srf"
319
+ end
320
+ objs = Sequest::PepXML.set_from_bioworks( SRFGroup.new(modfiles).to_srg(@srgfile), {:ms_data => modfiles_data_dir, :out_path => @out_path, :print => true, :backup_db_path => '/project/marcotte/marcotte/ms/database'} )
321
+ @out_files = %w(020 040).map do |file|
322
+ @out_path + '/' + file + '.xml'
323
+ end
324
+ end
325
+
326
+ after(:all) do
327
+ File.unlink(@srgfile) unless NODELETE
328
+ FileUtils.rm_r(@out_path)
329
+ #@out_files.each do |fn|
330
+ # File.unlink(fn) unless NODELETE
331
+ #end
332
+ end
333
+
334
+ # splits string on ' 'and matches the line found by find_line_regexp in
335
+ # lines
336
+ def match_modline_pieces(lines, find_line_regexp, string)
337
+ pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
338
+ lines.each do |line|
339
+ if line =~ find_line_regexp
340
+ pieces.each do |piece|
341
+ line.should =~ piece
342
+ end
343
+ end
344
+ end
345
+ end
346
+
347
+ it 'gets modifications right in real run' do
348
+ @out_files.each do |fn|
349
+ fn.exist_as_a_file?.should be_true
350
+ beginning = IO.read(fn)
351
+ lines = beginning.split("\n")
352
+ [
353
+ [/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
354
+
355
+ [/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
356
+ [/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
357
+ [/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
358
+ [/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
359
+ ].each do |a,b|
360
+ match_modline_pieces(lines, a, b)
361
+ end
362
+ [
363
+ '<modification_info modified_peptide="Y#RLGGS#T#K">',
364
+ '<mod_aminoacid_mass position="1" mass="243.1559"/>',
365
+ '<mod_aminoacid_mass position="7" mass="167.0581"/>',
366
+ '</modification_info>',
367
+ '<mod_aminoacid_mass position="9" mass="181.085"/>'
368
+ ].each do |line|
369
+ beginning.should =~ /#{Regexp.escape(line)}/ # "a modification info for a peptide")
370
+ end
371
+ end
372
+ end
373
+ end
374
+ end
375
+
376
+ =end