ms-ident 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,181 @@
1
+
2
+ require 'spec_helper'
3
+ require 'ms/ident/pepxml/sample_enzyme'
4
+ require 'nokogiri'
5
+
6
+ describe 'creating an Ms::Ident::Pepxml::SampleEnzyme' do
7
+ before do
8
+ @hash = {
9
+ :name => 'trypsin',
10
+ :cut => 'KR',
11
+ :no_cut => 'P',
12
+ :sense => 'C',
13
+ }
14
+ end
15
+ it 'can be set by a known enzyme name' do
16
+ se = Ms::Ident::Pepxml::SampleEnzyme.new('trypsin')
17
+ @hash.each do |k,v|
18
+ se.send(k).is v
19
+ end
20
+ end
21
+
22
+ it 'can be set manually with a hash' do
23
+ se = Ms::Ident::Pepxml::SampleEnzyme.new(@hash)
24
+ @hash.each do |k,v|
25
+ se.send(k).is v
26
+ end
27
+ end
28
+ end
29
+
30
+ describe 'an Ms::Ident::Pepxml::SampleEnzyme' do
31
+ before do
32
+ @sample_enzyme = Ms::Ident::Pepxml::SampleEnzyme.new(:name=>'trypsin',:cut=>'KR',:no_cut=>'P',:sense=>'C')
33
+ end
34
+ it 'generates a valid xml fragment' do
35
+ string = @sample_enzyme.to_xml
36
+ ok string.is_a?(String)
37
+ string.matches(/<sample_enzyme name="trypsin"/)
38
+ string.matches(/<specificity/)
39
+ %w(cut="KR" no_cut="P" sense="C").each {|re| string.matches(/#{re}/) }
40
+ ok !string.include?('version')
41
+ end
42
+ it 'adds to an xml builder object' do
43
+ builder = Nokogiri::XML::Builder.new
44
+ after = @sample_enzyme.to_xml(builder)
45
+ ok after.is_a?(Nokogiri::XML::Builder)
46
+ after.is builder
47
+ ok after.to_xml.is_a?(String)
48
+ end
49
+ end
50
+
51
+ xdescribe 'read in from an xml node' do
52
+ # placeholder until written
53
+ end
54
+
55
+ ### DOES this kind of functionality belong in this kind of container????
56
+ ### SHOULD it be with ms-enzyme or ms-in_silico ???????
57
+
58
+ =begin
59
+ require 'set'
60
+
61
+ describe 'Ms::Ident::Pepxml::SampleEnzyme digesting sequences' do
62
+ it 'can digest with no missed cleavages' do
63
+ st = "CRGATKKTAGRPMEK"
64
+ SampleEnzyme.tryptic(st).should == %w(CR GATK K TAGRPMEK)
65
+ st = "CATRP"
66
+ SampleEnzyme.tryptic(st).should == %w(CATRP)
67
+ st = "RCATRP"
68
+ SampleEnzyme.tryptic(st).should == %w(R CATRP)
69
+ st = ""
70
+ SampleEnzyme.tryptic(st).should == []
71
+ st = "R"
72
+ SampleEnzyme.tryptic(st).should == %w(R)
73
+ end
74
+
75
+ it 'can digest with missed cleavages' do
76
+ st = "CRGATKKTAGRPMEKLLLERTKY"
77
+ zero = %w(CR GATK K TAGRPMEK LLLER TK Y)
78
+ SampleEnzyme.tryptic(st,0).to_set.should == zero.to_set
79
+ one = %w(CRGATK GATKK KTAGRPMEK TAGRPMEKLLLER LLLERTK TKY)
80
+ SampleEnzyme.tryptic(st,1).to_set.should == (zero+one).to_set
81
+ two = %w(CRGATKK GATKKTAGRPMEK KTAGRPMEKLLLER TAGRPMEKLLLERTK LLLERTKY)
82
+ all = zero + one + two
83
+ SampleEnzyme.tryptic(st,2).to_set.should == all.to_set
84
+ end
85
+
86
+ it 'contains duplicates IF there are duplicate tryptic sequences' do
87
+ st = "AAAAKCCCCKDDDDKCCCCK"
88
+ peps = SampleEnzyme.new('trypsin').digest(st, 2)
89
+ peps.select {|aaseq| aaseq == 'CCCCK'}.size.should == 2
90
+ end
91
+
92
+ end
93
+
94
+ describe SampleEnzyme, 'making enzyme calculations on sequences and aaseqs' do
95
+
96
+ before(:each) do
97
+ @full_KRP = SampleEnzyme.new do |se|
98
+ se.name = 'trypsin'
99
+ se.cut = 'KR'
100
+ se.no_cut = 'P'
101
+ se.sense = 'C'
102
+ end
103
+ @just_KR = SampleEnzyme.new do |se|
104
+ se.name = 'trypsin'
105
+ se.cut = 'KR'
106
+ se.no_cut = ''
107
+ se.sense = 'C'
108
+ end
109
+ end
110
+
111
+ it 'calculates the number of tolerant termini' do
112
+ exp = [{
113
+ # full KR/P
114
+ 'K.EPTIDR.E' => 2,
115
+ 'K.PEPTIDR.E' => 1,
116
+ 'F.EEPTIDR.E' => 1,
117
+ 'F.PEPTIDW.R' => 0,
118
+ },
119
+ {
120
+ # just KR
121
+ 'K.EPTIDR.E' => 2,
122
+ 'K.PEPTIDR.E' => 2,
123
+ 'F.EEPTIDR.E' => 1,
124
+ 'F.PEPTIDW.R' => 0,
125
+ }
126
+ ]
127
+ scall = Sequest::PepXML::SearchHit
128
+ sample_enzyme_ar = [@full_KRP, @just_KR]
129
+ sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
130
+ hash.each do |seq, val|
131
+ sample_enzyme.num_tol_term(seq).should == val
132
+ end
133
+ end
134
+ end
135
+
136
+ it 'calculates number of missed cleavages' do
137
+ exp = [{
138
+ "EPTIDR" => 0,
139
+ "PEPTIDR" => 0,
140
+ "EEPTIDR" => 0,
141
+ "PEPTIDW" => 0,
142
+ "PERPTIDW" => 0,
143
+ "PEPKPTIDW" => 0,
144
+ "PEPKTIDW" => 1,
145
+ "RTTIDR" => 1,
146
+ "RTTIKK" => 2,
147
+ "PKEPRTIDW" => 2,
148
+ "PKEPRTIDKP" => 2,
149
+ "PKEPRAALKPEERPTIDKW" => 3,
150
+ },
151
+ {
152
+ "EPTIDR" => 0,
153
+ "PEPTIDR" => 0,
154
+ "EEPTIDR" => 0,
155
+ "PEPTIDW" => 0,
156
+ "PERPTIDW" => 1,
157
+ "PEPKPTIDW" => 1,
158
+ "PEPKTIDW" => 1,
159
+ "RTTIDR" => 1,
160
+ "RTTIKK" => 2,
161
+ "PKEPRTIDW" => 2,
162
+ "PKEPRTIDKP" => 3,
163
+ "PKEPRAALKPEERPTIDKW" => 5,
164
+ }
165
+ ]
166
+
167
+ sample_enzyme_ar = [@full_KRP, @just_KR]
168
+ sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
169
+ hash.each do |aaseq, val|
170
+ #first, middle, last = SpecID::Pep.split_sequence(seq)
171
+ # note that we are only using the middle section!
172
+ sample_enzyme.num_missed_cleavages(aaseq).should == val
173
+ end
174
+ end
175
+ end
176
+
177
+ end
178
+ =end
179
+
180
+
181
+
@@ -0,0 +1,436 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/mass'
4
+ require 'ms/mass/aa'
5
+ require 'ms/ident/pepxml'
6
+ require 'ms/ident/pepxml/modifications'
7
+ require 'ms/ident/pepxml/spectrum_query'
8
+ require 'ms/ident/pepxml/search_result'
9
+ require 'ms/ident/pepxml/search_hit'
10
+ require 'ms/ident/pepxml/search_hit/modification_info'
11
+
12
+ describe "creating an Ms::Ident::Pepxml" do
13
+ extend Ms::Ident
14
+
15
+ it "can be creating in a nested fashion reflecting internal structure" do
16
+ pepxml = Pepxml.new do |msms_pipeline_analysis|
17
+ msms_pipeline_analysis.merge!(:summary_xml => "020.xml") do |msms_run_summary|
18
+ # prep the sample enzyme and search_summary
19
+ msms_run_summary.merge!(
20
+ :base_name => '/home/jtprince/dev/mspire/020',
21
+ :ms_manufacturer => 'Thermo',
22
+ :ms_model => 'LTQ Orbitrap',
23
+ :ms_ionization => 'ESI',
24
+ :ms_mass_analyzer => 'Ion Trap',
25
+ :ms_detector => 'UNKNOWN'
26
+ ) do |sample_enzyme, search_summary, spectrum_queries|
27
+ sample_enzyme.merge!(:name=>'Trypsin',:cut=>'KR',:no_cut=>'P',:sense=>'C')
28
+ search_summary.merge!(
29
+ :base_name=>'/path/to/file/020',
30
+ :search_engine => 'SEQUEST',
31
+ :precursor_mass_type =>'monoisotopic',
32
+ :fragment_mass_type => 'average'
33
+ ) do |search_database, enzymatic_search_constraint, modifications, parameters|
34
+ search_database.merge!(:local_path => '/path/to/db.fasta', :seq_type => 'AA') # note seq_type == type
35
+ enzymatic_search_constraint.merge!(
36
+ :enzyme => 'Trypsin',
37
+ :max_num_internal_cleavages => 2,
38
+ :min_number_termini => 2
39
+ )
40
+ modifications << Pepxml::AminoacidModification.new(
41
+ :aminoacid => 'M', :massdiff => 15.9994, :mass => Ms::Mass::AA::MONO['M']+15.9994,
42
+ :variable => 'Y', :symbol => '*')
43
+ # invented, for example, a protein terminating mod
44
+ modifications << Pepxml::TerminalModification.new(
45
+ :terminus => 'c', :massdiff => 23.3333, :mass => Ms::Mass::MONO['oh'] + 23.3333,
46
+ :variable => 'Y', :symbol => '[', :protein_terminus => 'c',
47
+ :description => 'leave protein_terminus off if not protein mod'
48
+ )
49
+ modifications << Pepxml::TerminalModification.new(
50
+ :terminus => 'c', :massdiff => 25.42322, :mass => Ms::Mass::MONO['h+'] + 25.42322,
51
+ :variable => 'N', :symbol => ']', :description => 'example: c term mod'
52
+ )
53
+ parameters.merge!(
54
+ :fragment_ion_tolerance => 1.0000,
55
+ :digest_mass_range => '600.0 3500.0',
56
+ :enzyme_info => 'Trypsin(KR/P) 1 1 KR P', # etc....
57
+ )
58
+ end
59
+ spectrum_query1 = Pepxml::SpectrumQuery.new(
60
+ :spectrum => '020.3.3.1', :start_scan => 3, :end_scan => 3,
61
+ :precursor_neutral_mass => 1120.93743421875, :assumed_charge => 1
62
+ ) do |search_results|
63
+ search_result1 = Pepxml::SearchResult.new do |search_hits|
64
+ modpositions = [[1, 243.1559], [6, 167.0581], [7,181.085]].map do |pair|
65
+ Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(*pair)
66
+ end
67
+ # order(modified_peptide, mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
68
+ # or can be set by hash
69
+ mod_info = Pepxml::SearchHit::ModificationInfo.new('Y#RLGGS#T#K', modpositions)
70
+ search_hit1 = Pepxml::SearchHit.new(
71
+ :hit_rank=>1, :peptide=>'YRLGGSTK', :peptide_prev_aa => "R", :peptide_next_aa => "K",
72
+ :protein => "gi|16130113|ref|NP_416680.1|", :num_tot_proteins => 1, :num_matched_ions => 5,
73
+ :tot_num_ions => 35, :calc_neutral_pep_mass => 1120.93163442, :massdiff => 0.00579979875010395,
74
+ :num_tol_term => 2, :num_missed_cleavages => 1, :is_rejected => 0,
75
+ :modification_info => mod_info) do |search_scores|
76
+ search_scores.merge!(:xcorr => 0.12346, :deltacn => 0.7959, :deltacnstar => 0,
77
+ :spscore => 29.85, :sprank => 1)
78
+ end
79
+ search_hits << search_hit1
80
+ end
81
+ search_results << search_result1
82
+ end
83
+ spectrum_queries << spectrum_query1
84
+ end
85
+ end
86
+ end
87
+ puts pepxml.to_xml
88
+ pepxml.to_xml.matches /<msms_pipeline_analysis /
89
+ end
90
+ end
91
+
92
+ =begin
93
+ # splits string on ' 'and matches the line found by find_line_regexp in
94
+ # lines
95
+ def match_modline_pieces(lines, find_line_regexp, string)
96
+ pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
97
+ lines.each do |line|
98
+ if line =~ find_line_regexp
99
+ pieces.each do |piece|
100
+ line.should =~ piece
101
+ end
102
+ end
103
+ end
104
+ end
105
+
106
+
107
+ it 'gets modifications right in real run' do
108
+ @out_files.each do |fn|
109
+ fn.exist_as_a_file?.should be_true
110
+ beginning = IO.read(fn)
111
+ lines = beginning.split("\n")
112
+ [
113
+ [/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
114
+
115
+ [/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
116
+ [/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
117
+ [/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
118
+ [/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
119
+ ].each do |a,b|
120
+ match_modline_pieces(lines, a, b)
121
+ end
122
+ [
123
+ '<modification_info modified_peptide="Y#RLGGS#T#K">',
124
+ '<mod_aminoacid_mass position="1" mass="243.1559"/>',
125
+ '<mod_aminoacid_mass position="7" mass="167.0581"/>',
126
+ '</modification_info>',
127
+ '<mod_aminoacid_mass position="9" mass="181.085"/>'
128
+ ].each do |line|
129
+ beginning.should =~ /#{Regexp.escape(line)}/ # "a modification info for a peptide")
130
+ end
131
+ end
132
+ end
133
+ end
134
+ end
135
+
136
+
137
+
138
+ =begin
139
+ describe "Ms::Ident::Pepxml created from small bioworks.xml" do
140
+
141
+ spec_large do
142
+ before(:all) do
143
+ tf_mzxml_path = Tfiles_l + "/yeast_gly_mzXML"
144
+
145
+ tf_params = Tfiles + "/bioworks32.params"
146
+ tf_bioworks_xml = Tfiles + "/bioworks_small.xml"
147
+ out_path = Tfiles
148
+ @pepxml_objs = Sequest::Pepxml.set_from_bioworks(tf_bioworks_xml, :params => tf_params, :ms_data => tf_mzxml_path, :out_path => out_path)
149
+ end
150
+
151
+ it 'gets some spectrum queries' do
152
+ @pepxml_objs.each do |obj|
153
+ (obj.spectrum_queries.size > 2).should be_true
154
+ (obj.spectrum_queries.first.search_results.first.search_hits.size > 0).should be_true
155
+ end
156
+ #@pepxml_objs.each do |pep| puts pep.to_pepxml end
157
+ end
158
+ end
159
+ end
160
+
161
+
162
+
163
+ describe Sequest::Pepxml, " created from large bioworks.xml" do
164
+ # assert_equal_by_pairs (really any old array)
165
+ def assert_equal_pairs(obj, arrs)
166
+ arrs.each do |arr|
167
+ #if obj.send(arr[1]) != arr[0]
168
+ # puts "HELLO"
169
+ # puts "OBJ answer"
170
+ # p obj.send(arr[1])
171
+ # puts "ar0"
172
+ # p arr[0]
173
+ # puts "ar1"
174
+ # p arr[1]
175
+ #end
176
+ if arr[0].is_a? Float
177
+ obj.send(arr[1]).should be_close(arr[0], 0.0000000001)
178
+ else
179
+ obj.send(arr[1]).should == arr[0]
180
+ end
181
+ end
182
+ end
183
+
184
+ #swap the first to guys first
185
+ def assert_equal_pairs_swapped(obj, arrs)
186
+ arrs.each do |arr|
187
+ arr[0], arr[1] = arr[1], arr[0]
188
+ end
189
+ assert_equal_pairs(obj, arrs)
190
+ end
191
+
192
+ spec_large do
193
+ before(:all) do
194
+ st = Time.new
195
+ params = Tfiles + "/opd1/sequest.3.2.params"
196
+ bioworks_xml = Tfiles_l + "/opd1/bioworks.000.oldparams.xml"
197
+ mzxml_path = Tfiles_l + "/opd1"
198
+ out_path = Tfiles
199
+ @pepxml_version = 18
200
+ @pepxml_objs = Sequest::Pepxml.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => @pepxml_version})
201
+ puts "- takes #{Time.new - st} secs"
202
+ end
203
+
204
+ it 'extracts MSMSPipelineAnalysis' do
205
+ ######## HMMMMM...
206
+ Sequest::Pepxml.pepxml_version.should == @pepxml_version
207
+
208
+ # MSMSPipelineAnalysis
209
+ po = @pepxml_objs.first
210
+ msms_pipeline = po.msms_pipeline_analysis
211
+ msms_pipeline.xmlns.should == 'http://regis-web.systemsbiology.net/pepXML'
212
+ msms_pipeline.xmlns_xsi.should == 'http://www.w3.org/2001/XMLSchema-instance'
213
+ msms_pipeline.xsi_schema_location.should == 'http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd'
214
+ msms_pipeline.summary_xml.should == '000.xml'
215
+ end
216
+
217
+ it 'extracts MSmSRunSummary' do
218
+ # MSMSRunSummary
219
+ rs = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary
220
+ rs.base_name.should =~ /\/000/
221
+ assert_equal_pairs(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
222
+ end
223
+
224
+ it 'extracts SampleEnzyme' do
225
+ # SampleEnzyme
226
+ se = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.sample_enzyme
227
+ assert_equal_pairs(se, [ ['Trypsin', :name], ['KR', :cut], [nil, :no_cut], ['C', :sense], ])
228
+ end
229
+
230
+ it 'extracts SearchSummary' do
231
+ # SearchSummary
232
+ ss = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary
233
+ ss.is_a?(Sequest::Pepxml::SearchSummary).should be_true
234
+ ss.base_name.should =~ /\/000/
235
+ ss.peptide_mass_tol.should =~ /1\.500/
236
+ assert_equal_pairs_swapped(ss, [ # normal attributes
237
+ [:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
238
+
239
+ # enzymatic_search_constraint
240
+ [:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
241
+
242
+ # parameters
243
+ [:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
244
+ ])
245
+
246
+ end
247
+ it 'extracts SearchDatabase' do
248
+ # SearchDatabase
249
+ sd = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary.search_database
250
+ sd.is_a?(Sequest::Pepxml::SearchDatabase).should be_true
251
+ assert_equal_pairs_swapped(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
252
+ end
253
+
254
+ it 'returns SpectrumQueries' do
255
+ # SpectrumQueries
256
+ sq = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.spectrum_queries
257
+ spec = sq.first
258
+ assert_equal_pairs_swapped(spec, [
259
+ [:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
260
+ #[:precursor_neutral_mass, "1074.5920"], # out2summary
261
+ [:precursor_neutral_mass, 1074.666926], # mine
262
+ [:assumed_charge, 1], [:index, "1"],
263
+ ])
264
+ sh = spec.search_results.first.search_hits.first
265
+ assert_equal_pairs_swapped(sh, [
266
+ # normal attributes
267
+ [:hit_rank, 1],
268
+ [:peptide, "SIYFRNFK"],
269
+ [:peptide_prev_aa, "R"],
270
+ [:peptide_next_aa, "G"],
271
+ [:protein, "gi|16130084|ref|NP_416651.1|"],
272
+ [:num_tot_proteins, 1],
273
+ [:num_matched_ions, 4],
274
+ [:tot_num_ions, 14],
275
+ #[:calc_neutral_pep_mass, "1074.1920"], # out2summary
276
+ [:calc_neutral_pep_mass, 1074.23261], # mine
277
+ #[:massdiff, "+0.400000"], # out2summary
278
+ [:massdiff, 0.434316000000081], # mine
279
+ [:num_tol_term, 2], [:num_missed_cleavages, 1], [:is_rejected, 0],
280
+
281
+ # search_score
282
+ [:xcorr, 0.4], [:deltacn, 0.023], [:deltacnstar, "0"], [:spscore, 78.8], [:sprank, 1],
283
+ ])
284
+
285
+ spec = sq[1]
286
+ assert_equal_pairs_swapped(spec, [
287
+ [:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
288
+ [:precursor_neutral_mass, 663.206111], # mine
289
+ [:assumed_charge, 1], [:index, "2"],
290
+ ])
291
+
292
+ sh = spec.search_results.first.search_hits.first
293
+ assert_equal_pairs_swapped(sh, [
294
+ # normal attributes
295
+ [:hit_rank, 1], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 10],
296
+ [:num_tol_term, 2], [:num_missed_cleavages, 0], [:is_rejected, 0],
297
+ #[:massdiff, "-0.600000"], # out2summary
298
+ [:massdiff, -0.556499000000031], # mine
299
+ #[:calc_neutral_pep_mass, 663.7920], # out2summary
300
+ [:calc_neutral_pep_mass, 663.76261], # mine
301
+
302
+ # search_score
303
+ [:xcorr, 0.965], [:deltacn, 0.132], [:deltacnstar, "0"], [:spscore, 81.1], [:sprank, 1],
304
+ ])
305
+
306
+ spec = sq[9]
307
+ assert_equal_pairs_swapped(spec, [
308
+ [:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, 2],
309
+ #[:precursor_neutral_mass, "691.0920"], # out2summary
310
+ [:precursor_neutral_mass, 691.150992], # mine
311
+ ])
312
+
313
+ sh = spec.search_results.first.search_hits.first
314
+ assert_equal_pairs_swapped(sh, [
315
+ # normal attributes
316
+ [:hit_rank, 1], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 8], [:num_tol_term, 2],
317
+
318
+ #[:num_missed_cleavages, "0"], # out2summary misses this!
319
+ [:num_missed_cleavages, 1],
320
+ [:is_rejected, 0],
321
+ #[:calc_neutral_pep_mass, "691.7920"], # out2summary
322
+ [:calc_neutral_pep_mass, 691.82261], # mine
323
+ #[:massdiff, "-0.700000"], # out2summary
324
+ [:massdiff, -0.67161800000008], # mine
325
+
326
+ # search_score
327
+ [:xcorr, 0.903], [:deltacn, 0.333], [:deltacnstar, "0"], [:spscore, 172.8], [:sprank, 1],
328
+ ])
329
+ end
330
+
331
+ it 'can generate correct pepxml file' do
332
+
333
+ ## IF OUR OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
334
+ string = @pepxml_objs.first.to_pepxml
335
+ ans_lines = IO.read(Tfiles + "/opd1/000.my_answer.100lines.xml").split("\n")
336
+ base_name_re = /base_name=".*?files\//o
337
+ date_re = /date=".*?"/
338
+ string.split("\n").each_with_index do |line,i|
339
+ if i > 99 ; break end
340
+ ans, exp =
341
+ if i == 1
342
+ [line.sub(date_re,''), ans_lines[i].sub(date_re,'')]
343
+ elsif i == 2
344
+ [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t")]
345
+ elsif i == 6
346
+ [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t\t")]
347
+ else
348
+ [line, ans_lines[i]]
349
+ end
350
+
351
+ #ans.split('').zip(exp.split('')) do |l,a|
352
+ # if l != a
353
+ # puts line
354
+ # puts ans_lines[i]
355
+ # puts l
356
+ # puts a
357
+ # end
358
+ #end
359
+ if ans != exp
360
+ puts ans
361
+ puts exp
362
+ end
363
+ ans.should == exp
364
+ #line.sub(base_name_re,'').should == ans_lines[i].sub(base_name_re,'')
365
+ end
366
+ end
367
+ end
368
+ end
369
+
370
+
371
+
372
+ describe Sequest::Pepxml::Modifications do
373
+ before(:each) do
374
+ tf_params = Tfiles + "/bioworks32.params"
375
+ @params = Sequest::Params.new(tf_params)
376
+ # The params object here is completely unnecessary for this test, except
377
+ # that it sets up the mass table
378
+ @obj = Sequest::Pepxml::Modifications.new(@params, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
379
+ end
380
+ it 'creates a mod_symbols_hash' do
381
+ answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
382
+ @obj.mod_symbols_hash.should == answ
383
+ ## need more here
384
+ end
385
+
386
+ it 'creates a ModificationInfo object given a special peptide sequence' do
387
+ mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
388
+ @params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
389
+ @params.term_diff_search_options = "14.20000 12.33000"
390
+ mod = Sequest::Pepxml::Modifications.new(@params, mod_string)
391
+ ## no mods
392
+ peptide = "PEPTIDE"
393
+ mod.modification_info(peptide).should be_nil
394
+ peptide = "]M*EC^S@IDM#M*EMSCM["
395
+ modinfo = mod.modification_info(peptide)
396
+ modinfo.modified_peptide.should == peptide
397
+ modinfo.mod_nterm_mass.should be_close(146.40054, 0.000001)
398
+ modinfo.mod_cterm_mass.should be_close(160.52994, 0.000001)
399
+ end
400
+
401
+ end
402
+
403
+ describe Sequest::Pepxml::SearchHit::ModificationInfo do
404
+
405
+ before(:each) do
406
+ modaaobjs = [[3, 150.3], [6, 345.2]].map do |ar|
407
+ Sequest::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(ar)
408
+ end
409
+ hash = {
410
+ :mod_nterm_mass => 520.2,
411
+ :modified_peptide => "MOD*IFI^E&D",
412
+ :mod_aminoacid_masses => modaaobjs,
413
+ }
414
+ #answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
415
+ @obj = Sequest::Pepxml::SearchHit::ModificationInfo.new(hash)
416
+ end
417
+
418
+ def _re(st)
419
+ /#{Regexp.escape(st)}/
420
+ end
421
+
422
+ it 'can produce pepxml' do
423
+ answ = @obj.to_pepxml
424
+ answ.should =~ _re('<modification_info')
425
+ answ.should =~ _re(" mod_nterm_mass=\"520.2\"")
426
+ answ.should =~ _re(" modified_peptide=\"MOD*IFI^E&amp;D\"")
427
+ answ.should =~ _re("<mod_aminoacid_mass")
428
+ answ.should =~ _re(" position=\"3\"")
429
+ answ.should =~ _re(" mass=\"150.3\"")
430
+ answ.should =~ _re(" position=\"6\"")
431
+ answ.should =~ _re(" mass=\"345.2\"")
432
+ answ.should =~ _re("</modification_info>")
433
+ end
434
+ end
435
+
436
+ =end
@@ -0,0 +1,40 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+
4
+ $spec_large = ENV['SPEC_LARGE']
5
+ development = $spec_large ? :development_large : :development
6
+
7
+ begin
8
+ Bundler.setup(:default, development)
9
+ rescue Bundler::BundlerError => e
10
+ $stderr.puts e.message
11
+ $stderr.puts "Run `bundle install` to install missing gems"
12
+ exit e.status_code
13
+ end
14
+ require 'spec/more'
15
+
16
+
17
+ load_testdata = lambda do
18
+ require 'ms/testdata'
19
+ SEQUEST_DIR = Ms::TESTDATA + '/sequest'
20
+ end
21
+
22
+ load_testdata.call if $spec_large
23
+
24
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
25
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
26
+
27
+ Bacon.summary_on_exit
28
+
29
+
30
+ def spec_large(&block)
31
+ if $spec_large
32
+ block.call
33
+ else
34
+ # Requires SPEC_LARGE=true and tfiles_large dir for testing large test files
35
+ it 'SKIPPING (not testing large files)' do
36
+ end
37
+ end
38
+ end
39
+
40
+ TESTFILES = File.dirname(__FILE__) + '/tfiles'