ms-ident 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,181 @@
1
+
2
+ require 'spec_helper'
3
+ require 'ms/ident/pepxml/sample_enzyme'
4
+ require 'nokogiri'
5
+
6
+ describe 'creating an Ms::Ident::Pepxml::SampleEnzyme' do
7
+ before do
8
+ @hash = {
9
+ :name => 'trypsin',
10
+ :cut => 'KR',
11
+ :no_cut => 'P',
12
+ :sense => 'C',
13
+ }
14
+ end
15
+ it 'can be set by a known enzyme name' do
16
+ se = Ms::Ident::Pepxml::SampleEnzyme.new('trypsin')
17
+ @hash.each do |k,v|
18
+ se.send(k).is v
19
+ end
20
+ end
21
+
22
+ it 'can be set manually with a hash' do
23
+ se = Ms::Ident::Pepxml::SampleEnzyme.new(@hash)
24
+ @hash.each do |k,v|
25
+ se.send(k).is v
26
+ end
27
+ end
28
+ end
29
+
30
+ describe 'an Ms::Ident::Pepxml::SampleEnzyme' do
31
+ before do
32
+ @sample_enzyme = Ms::Ident::Pepxml::SampleEnzyme.new(:name=>'trypsin',:cut=>'KR',:no_cut=>'P',:sense=>'C')
33
+ end
34
+ it 'generates a valid xml fragment' do
35
+ string = @sample_enzyme.to_xml
36
+ ok string.is_a?(String)
37
+ string.matches(/<sample_enzyme name="trypsin"/)
38
+ string.matches(/<specificity/)
39
+ %w(cut="KR" no_cut="P" sense="C").each {|re| string.matches(/#{re}/) }
40
+ ok !string.include?('version')
41
+ end
42
+ it 'adds to an xml builder object' do
43
+ builder = Nokogiri::XML::Builder.new
44
+ after = @sample_enzyme.to_xml(builder)
45
+ ok after.is_a?(Nokogiri::XML::Builder)
46
+ after.is builder
47
+ ok after.to_xml.is_a?(String)
48
+ end
49
+ end
50
+
51
+ xdescribe 'read in from an xml node' do
52
+ # placeholder until written
53
+ end
54
+
55
+ ### DOES this kind of functionality belong in this kind of container????
56
+ ### SHOULD it be with ms-enzyme or ms-in_silico ???????
57
+
58
+ =begin
59
+ require 'set'
60
+
61
+ describe 'Ms::Ident::Pepxml::SampleEnzyme digesting sequences' do
62
+ it 'can digest with no missed cleavages' do
63
+ st = "CRGATKKTAGRPMEK"
64
+ SampleEnzyme.tryptic(st).should == %w(CR GATK K TAGRPMEK)
65
+ st = "CATRP"
66
+ SampleEnzyme.tryptic(st).should == %w(CATRP)
67
+ st = "RCATRP"
68
+ SampleEnzyme.tryptic(st).should == %w(R CATRP)
69
+ st = ""
70
+ SampleEnzyme.tryptic(st).should == []
71
+ st = "R"
72
+ SampleEnzyme.tryptic(st).should == %w(R)
73
+ end
74
+
75
+ it 'can digest with missed cleavages' do
76
+ st = "CRGATKKTAGRPMEKLLLERTKY"
77
+ zero = %w(CR GATK K TAGRPMEK LLLER TK Y)
78
+ SampleEnzyme.tryptic(st,0).to_set.should == zero.to_set
79
+ one = %w(CRGATK GATKK KTAGRPMEK TAGRPMEKLLLER LLLERTK TKY)
80
+ SampleEnzyme.tryptic(st,1).to_set.should == (zero+one).to_set
81
+ two = %w(CRGATKK GATKKTAGRPMEK KTAGRPMEKLLLER TAGRPMEKLLLERTK LLLERTKY)
82
+ all = zero + one + two
83
+ SampleEnzyme.tryptic(st,2).to_set.should == all.to_set
84
+ end
85
+
86
+ it 'contains duplicates IF there are duplicate tryptic sequences' do
87
+ st = "AAAAKCCCCKDDDDKCCCCK"
88
+ peps = SampleEnzyme.new('trypsin').digest(st, 2)
89
+ peps.select {|aaseq| aaseq == 'CCCCK'}.size.should == 2
90
+ end
91
+
92
+ end
93
+
94
+ describe SampleEnzyme, 'making enzyme calculations on sequences and aaseqs' do
95
+
96
+ before(:each) do
97
+ @full_KRP = SampleEnzyme.new do |se|
98
+ se.name = 'trypsin'
99
+ se.cut = 'KR'
100
+ se.no_cut = 'P'
101
+ se.sense = 'C'
102
+ end
103
+ @just_KR = SampleEnzyme.new do |se|
104
+ se.name = 'trypsin'
105
+ se.cut = 'KR'
106
+ se.no_cut = ''
107
+ se.sense = 'C'
108
+ end
109
+ end
110
+
111
+ it 'calculates the number of tolerant termini' do
112
+ exp = [{
113
+ # full KR/P
114
+ 'K.EPTIDR.E' => 2,
115
+ 'K.PEPTIDR.E' => 1,
116
+ 'F.EEPTIDR.E' => 1,
117
+ 'F.PEPTIDW.R' => 0,
118
+ },
119
+ {
120
+ # just KR
121
+ 'K.EPTIDR.E' => 2,
122
+ 'K.PEPTIDR.E' => 2,
123
+ 'F.EEPTIDR.E' => 1,
124
+ 'F.PEPTIDW.R' => 0,
125
+ }
126
+ ]
127
+ scall = Sequest::PepXML::SearchHit
128
+ sample_enzyme_ar = [@full_KRP, @just_KR]
129
+ sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
130
+ hash.each do |seq, val|
131
+ sample_enzyme.num_tol_term(seq).should == val
132
+ end
133
+ end
134
+ end
135
+
136
+ it 'calculates number of missed cleavages' do
137
+ exp = [{
138
+ "EPTIDR" => 0,
139
+ "PEPTIDR" => 0,
140
+ "EEPTIDR" => 0,
141
+ "PEPTIDW" => 0,
142
+ "PERPTIDW" => 0,
143
+ "PEPKPTIDW" => 0,
144
+ "PEPKTIDW" => 1,
145
+ "RTTIDR" => 1,
146
+ "RTTIKK" => 2,
147
+ "PKEPRTIDW" => 2,
148
+ "PKEPRTIDKP" => 2,
149
+ "PKEPRAALKPEERPTIDKW" => 3,
150
+ },
151
+ {
152
+ "EPTIDR" => 0,
153
+ "PEPTIDR" => 0,
154
+ "EEPTIDR" => 0,
155
+ "PEPTIDW" => 0,
156
+ "PERPTIDW" => 1,
157
+ "PEPKPTIDW" => 1,
158
+ "PEPKTIDW" => 1,
159
+ "RTTIDR" => 1,
160
+ "RTTIKK" => 2,
161
+ "PKEPRTIDW" => 2,
162
+ "PKEPRTIDKP" => 3,
163
+ "PKEPRAALKPEERPTIDKW" => 5,
164
+ }
165
+ ]
166
+
167
+ sample_enzyme_ar = [@full_KRP, @just_KR]
168
+ sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
169
+ hash.each do |aaseq, val|
170
+ #first, middle, last = SpecID::Pep.split_sequence(seq)
171
+ # note that we are only using the middle section!
172
+ sample_enzyme.num_missed_cleavages(aaseq).should == val
173
+ end
174
+ end
175
+ end
176
+
177
+ end
178
+ =end
179
+
180
+
181
+
@@ -0,0 +1,436 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/mass'
4
+ require 'ms/mass/aa'
5
+ require 'ms/ident/pepxml'
6
+ require 'ms/ident/pepxml/modifications'
7
+ require 'ms/ident/pepxml/spectrum_query'
8
+ require 'ms/ident/pepxml/search_result'
9
+ require 'ms/ident/pepxml/search_hit'
10
+ require 'ms/ident/pepxml/search_hit/modification_info'
11
+
12
+ describe "creating an Ms::Ident::Pepxml" do
13
+ extend Ms::Ident
14
+
15
+ it "can be creating in a nested fashion reflecting internal structure" do
16
+ pepxml = Pepxml.new do |msms_pipeline_analysis|
17
+ msms_pipeline_analysis.merge!(:summary_xml => "020.xml") do |msms_run_summary|
18
+ # prep the sample enzyme and search_summary
19
+ msms_run_summary.merge!(
20
+ :base_name => '/home/jtprince/dev/mspire/020',
21
+ :ms_manufacturer => 'Thermo',
22
+ :ms_model => 'LTQ Orbitrap',
23
+ :ms_ionization => 'ESI',
24
+ :ms_mass_analyzer => 'Ion Trap',
25
+ :ms_detector => 'UNKNOWN'
26
+ ) do |sample_enzyme, search_summary, spectrum_queries|
27
+ sample_enzyme.merge!(:name=>'Trypsin',:cut=>'KR',:no_cut=>'P',:sense=>'C')
28
+ search_summary.merge!(
29
+ :base_name=>'/path/to/file/020',
30
+ :search_engine => 'SEQUEST',
31
+ :precursor_mass_type =>'monoisotopic',
32
+ :fragment_mass_type => 'average'
33
+ ) do |search_database, enzymatic_search_constraint, modifications, parameters|
34
+ search_database.merge!(:local_path => '/path/to/db.fasta', :seq_type => 'AA') # note seq_type == type
35
+ enzymatic_search_constraint.merge!(
36
+ :enzyme => 'Trypsin',
37
+ :max_num_internal_cleavages => 2,
38
+ :min_number_termini => 2
39
+ )
40
+ modifications << Pepxml::AminoacidModification.new(
41
+ :aminoacid => 'M', :massdiff => 15.9994, :mass => Ms::Mass::AA::MONO['M']+15.9994,
42
+ :variable => 'Y', :symbol => '*')
43
+ # invented, for example, a protein terminating mod
44
+ modifications << Pepxml::TerminalModification.new(
45
+ :terminus => 'c', :massdiff => 23.3333, :mass => Ms::Mass::MONO['oh'] + 23.3333,
46
+ :variable => 'Y', :symbol => '[', :protein_terminus => 'c',
47
+ :description => 'leave protein_terminus off if not protein mod'
48
+ )
49
+ modifications << Pepxml::TerminalModification.new(
50
+ :terminus => 'c', :massdiff => 25.42322, :mass => Ms::Mass::MONO['h+'] + 25.42322,
51
+ :variable => 'N', :symbol => ']', :description => 'example: c term mod'
52
+ )
53
+ parameters.merge!(
54
+ :fragment_ion_tolerance => 1.0000,
55
+ :digest_mass_range => '600.0 3500.0',
56
+ :enzyme_info => 'Trypsin(KR/P) 1 1 KR P', # etc....
57
+ )
58
+ end
59
+ spectrum_query1 = Pepxml::SpectrumQuery.new(
60
+ :spectrum => '020.3.3.1', :start_scan => 3, :end_scan => 3,
61
+ :precursor_neutral_mass => 1120.93743421875, :assumed_charge => 1
62
+ ) do |search_results|
63
+ search_result1 = Pepxml::SearchResult.new do |search_hits|
64
+ modpositions = [[1, 243.1559], [6, 167.0581], [7,181.085]].map do |pair|
65
+ Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(*pair)
66
+ end
67
+ # order(modified_peptide, mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
68
+ # or can be set by hash
69
+ mod_info = Pepxml::SearchHit::ModificationInfo.new('Y#RLGGS#T#K', modpositions)
70
+ search_hit1 = Pepxml::SearchHit.new(
71
+ :hit_rank=>1, :peptide=>'YRLGGSTK', :peptide_prev_aa => "R", :peptide_next_aa => "K",
72
+ :protein => "gi|16130113|ref|NP_416680.1|", :num_tot_proteins => 1, :num_matched_ions => 5,
73
+ :tot_num_ions => 35, :calc_neutral_pep_mass => 1120.93163442, :massdiff => 0.00579979875010395,
74
+ :num_tol_term => 2, :num_missed_cleavages => 1, :is_rejected => 0,
75
+ :modification_info => mod_info) do |search_scores|
76
+ search_scores.merge!(:xcorr => 0.12346, :deltacn => 0.7959, :deltacnstar => 0,
77
+ :spscore => 29.85, :sprank => 1)
78
+ end
79
+ search_hits << search_hit1
80
+ end
81
+ search_results << search_result1
82
+ end
83
+ spectrum_queries << spectrum_query1
84
+ end
85
+ end
86
+ end
87
+ puts pepxml.to_xml
88
+ pepxml.to_xml.matches /<msms_pipeline_analysis /
89
+ end
90
+ end
91
+
92
+ =begin
93
+ # splits string on ' 'and matches the line found by find_line_regexp in
94
+ # lines
95
+ def match_modline_pieces(lines, find_line_regexp, string)
96
+ pieces = string.split(' ').map {|v| /#{Regexp.escape(v)}/ }
97
+ lines.each do |line|
98
+ if line =~ find_line_regexp
99
+ pieces.each do |piece|
100
+ line.should =~ piece
101
+ end
102
+ end
103
+ end
104
+ end
105
+
106
+
107
+ it 'gets modifications right in real run' do
108
+ @out_files.each do |fn|
109
+ fn.exist_as_a_file?.should be_true
110
+ beginning = IO.read(fn)
111
+ lines = beginning.split("\n")
112
+ [
113
+ [/aminoacid="M"/, '<aminoacid_modification symbol="*" massdiff="+15.9994" aminoacid="M" variable="Y" binary="N" mass="147.192"'],
114
+
115
+ [/aminoacid="S"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="S" variable="Y" binary="N" mass="167.0581"'],
116
+ [/aminoacid="T"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="T" variable="Y" binary="N" mass="181.085"'],
117
+ [/aminoacid="Y"/, '<aminoacid_modification symbol="#" massdiff="+79.9799" aminoacid="Y" variable="Y" binary="N" mass="243.1559"'],
118
+ [/parameter name="diff_search_options"/, '<parameter name="diff_search_options" value="15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y"/>'],
119
+ ].each do |a,b|
120
+ match_modline_pieces(lines, a, b)
121
+ end
122
+ [
123
+ '<modification_info modified_peptide="Y#RLGGS#T#K">',
124
+ '<mod_aminoacid_mass position="1" mass="243.1559"/>',
125
+ '<mod_aminoacid_mass position="7" mass="167.0581"/>',
126
+ '</modification_info>',
127
+ '<mod_aminoacid_mass position="9" mass="181.085"/>'
128
+ ].each do |line|
129
+ beginning.should =~ /#{Regexp.escape(line)}/ # "a modification info for a peptide")
130
+ end
131
+ end
132
+ end
133
+ end
134
+ end
135
+
136
+
137
+
138
+ =begin
139
+ describe "Ms::Ident::Pepxml created from small bioworks.xml" do
140
+
141
+ spec_large do
142
+ before(:all) do
143
+ tf_mzxml_path = Tfiles_l + "/yeast_gly_mzXML"
144
+
145
+ tf_params = Tfiles + "/bioworks32.params"
146
+ tf_bioworks_xml = Tfiles + "/bioworks_small.xml"
147
+ out_path = Tfiles
148
+ @pepxml_objs = Sequest::Pepxml.set_from_bioworks(tf_bioworks_xml, :params => tf_params, :ms_data => tf_mzxml_path, :out_path => out_path)
149
+ end
150
+
151
+ it 'gets some spectrum queries' do
152
+ @pepxml_objs.each do |obj|
153
+ (obj.spectrum_queries.size > 2).should be_true
154
+ (obj.spectrum_queries.first.search_results.first.search_hits.size > 0).should be_true
155
+ end
156
+ #@pepxml_objs.each do |pep| puts pep.to_pepxml end
157
+ end
158
+ end
159
+ end
160
+
161
+
162
+
163
+ describe Sequest::Pepxml, " created from large bioworks.xml" do
164
+ # assert_equal_by_pairs (really any old array)
165
+ def assert_equal_pairs(obj, arrs)
166
+ arrs.each do |arr|
167
+ #if obj.send(arr[1]) != arr[0]
168
+ # puts "HELLO"
169
+ # puts "OBJ answer"
170
+ # p obj.send(arr[1])
171
+ # puts "ar0"
172
+ # p arr[0]
173
+ # puts "ar1"
174
+ # p arr[1]
175
+ #end
176
+ if arr[0].is_a? Float
177
+ obj.send(arr[1]).should be_close(arr[0], 0.0000000001)
178
+ else
179
+ obj.send(arr[1]).should == arr[0]
180
+ end
181
+ end
182
+ end
183
+
184
+ #swap the first to guys first
185
+ def assert_equal_pairs_swapped(obj, arrs)
186
+ arrs.each do |arr|
187
+ arr[0], arr[1] = arr[1], arr[0]
188
+ end
189
+ assert_equal_pairs(obj, arrs)
190
+ end
191
+
192
+ spec_large do
193
+ before(:all) do
194
+ st = Time.new
195
+ params = Tfiles + "/opd1/sequest.3.2.params"
196
+ bioworks_xml = Tfiles_l + "/opd1/bioworks.000.oldparams.xml"
197
+ mzxml_path = Tfiles_l + "/opd1"
198
+ out_path = Tfiles
199
+ @pepxml_version = 18
200
+ @pepxml_objs = Sequest::Pepxml.set_from_bioworks_xml(bioworks_xml, params, {:ms_data => mzxml_path, :out_path => out_path, :pepxml_version => @pepxml_version})
201
+ puts "- takes #{Time.new - st} secs"
202
+ end
203
+
204
+ it 'extracts MSMSPipelineAnalysis' do
205
+ ######## HMMMMM...
206
+ Sequest::Pepxml.pepxml_version.should == @pepxml_version
207
+
208
+ # MSMSPipelineAnalysis
209
+ po = @pepxml_objs.first
210
+ msms_pipeline = po.msms_pipeline_analysis
211
+ msms_pipeline.xmlns.should == 'http://regis-web.systemsbiology.net/pepXML'
212
+ msms_pipeline.xmlns_xsi.should == 'http://www.w3.org/2001/XMLSchema-instance'
213
+ msms_pipeline.xsi_schema_location.should == 'http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd'
214
+ msms_pipeline.summary_xml.should == '000.xml'
215
+ end
216
+
217
+ it 'extracts MSmSRunSummary' do
218
+ # MSMSRunSummary
219
+ rs = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary
220
+ rs.base_name.should =~ /\/000/
221
+ assert_equal_pairs(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
222
+ end
223
+
224
+ it 'extracts SampleEnzyme' do
225
+ # SampleEnzyme
226
+ se = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.sample_enzyme
227
+ assert_equal_pairs(se, [ ['Trypsin', :name], ['KR', :cut], [nil, :no_cut], ['C', :sense], ])
228
+ end
229
+
230
+ it 'extracts SearchSummary' do
231
+ # SearchSummary
232
+ ss = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary
233
+ ss.is_a?(Sequest::Pepxml::SearchSummary).should be_true
234
+ ss.base_name.should =~ /\/000/
235
+ ss.peptide_mass_tol.should =~ /1\.500/
236
+ assert_equal_pairs_swapped(ss, [ # normal attributes
237
+ [:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
238
+
239
+ # enzymatic_search_constraint
240
+ [:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
241
+
242
+ # parameters
243
+ [:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
244
+ ])
245
+
246
+ end
247
+ it 'extracts SearchDatabase' do
248
+ # SearchDatabase
249
+ sd = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.search_summary.search_database
250
+ sd.is_a?(Sequest::Pepxml::SearchDatabase).should be_true
251
+ assert_equal_pairs_swapped(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
252
+ end
253
+
254
+ it 'returns SpectrumQueries' do
255
+ # SpectrumQueries
256
+ sq = @pepxml_objs.first.msms_pipeline_analysis.msms_run_summary.spectrum_queries
257
+ spec = sq.first
258
+ assert_equal_pairs_swapped(spec, [
259
+ [:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
260
+ #[:precursor_neutral_mass, "1074.5920"], # out2summary
261
+ [:precursor_neutral_mass, 1074.666926], # mine
262
+ [:assumed_charge, 1], [:index, "1"],
263
+ ])
264
+ sh = spec.search_results.first.search_hits.first
265
+ assert_equal_pairs_swapped(sh, [
266
+ # normal attributes
267
+ [:hit_rank, 1],
268
+ [:peptide, "SIYFRNFK"],
269
+ [:peptide_prev_aa, "R"],
270
+ [:peptide_next_aa, "G"],
271
+ [:protein, "gi|16130084|ref|NP_416651.1|"],
272
+ [:num_tot_proteins, 1],
273
+ [:num_matched_ions, 4],
274
+ [:tot_num_ions, 14],
275
+ #[:calc_neutral_pep_mass, "1074.1920"], # out2summary
276
+ [:calc_neutral_pep_mass, 1074.23261], # mine
277
+ #[:massdiff, "+0.400000"], # out2summary
278
+ [:massdiff, 0.434316000000081], # mine
279
+ [:num_tol_term, 2], [:num_missed_cleavages, 1], [:is_rejected, 0],
280
+
281
+ # search_score
282
+ [:xcorr, 0.4], [:deltacn, 0.023], [:deltacnstar, "0"], [:spscore, 78.8], [:sprank, 1],
283
+ ])
284
+
285
+ spec = sq[1]
286
+ assert_equal_pairs_swapped(spec, [
287
+ [:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
288
+ [:precursor_neutral_mass, 663.206111], # mine
289
+ [:assumed_charge, 1], [:index, "2"],
290
+ ])
291
+
292
+ sh = spec.search_results.first.search_hits.first
293
+ assert_equal_pairs_swapped(sh, [
294
+ # normal attributes
295
+ [:hit_rank, 1], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 10],
296
+ [:num_tol_term, 2], [:num_missed_cleavages, 0], [:is_rejected, 0],
297
+ #[:massdiff, "-0.600000"], # out2summary
298
+ [:massdiff, -0.556499000000031], # mine
299
+ #[:calc_neutral_pep_mass, 663.7920], # out2summary
300
+ [:calc_neutral_pep_mass, 663.76261], # mine
301
+
302
+ # search_score
303
+ [:xcorr, 0.965], [:deltacn, 0.132], [:deltacnstar, "0"], [:spscore, 81.1], [:sprank, 1],
304
+ ])
305
+
306
+ spec = sq[9]
307
+ assert_equal_pairs_swapped(spec, [
308
+ [:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, 2],
309
+ #[:precursor_neutral_mass, "691.0920"], # out2summary
310
+ [:precursor_neutral_mass, 691.150992], # mine
311
+ ])
312
+
313
+ sh = spec.search_results.first.search_hits.first
314
+ assert_equal_pairs_swapped(sh, [
315
+ # normal attributes
316
+ [:hit_rank, 1], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, 1], [:num_matched_ions, 5], [:tot_num_ions, 8], [:num_tol_term, 2],
317
+
318
+ #[:num_missed_cleavages, "0"], # out2summary misses this!
319
+ [:num_missed_cleavages, 1],
320
+ [:is_rejected, 0],
321
+ #[:calc_neutral_pep_mass, "691.7920"], # out2summary
322
+ [:calc_neutral_pep_mass, 691.82261], # mine
323
+ #[:massdiff, "-0.700000"], # out2summary
324
+ [:massdiff, -0.67161800000008], # mine
325
+
326
+ # search_score
327
+ [:xcorr, 0.903], [:deltacn, 0.333], [:deltacnstar, "0"], [:spscore, 172.8], [:sprank, 1],
328
+ ])
329
+ end
330
+
331
+ it 'can generate correct pepxml file' do
332
+
333
+ ## IF OUR OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
334
+ string = @pepxml_objs.first.to_pepxml
335
+ ans_lines = IO.read(Tfiles + "/opd1/000.my_answer.100lines.xml").split("\n")
336
+ base_name_re = /base_name=".*?files\//o
337
+ date_re = /date=".*?"/
338
+ string.split("\n").each_with_index do |line,i|
339
+ if i > 99 ; break end
340
+ ans, exp =
341
+ if i == 1
342
+ [line.sub(date_re,''), ans_lines[i].sub(date_re,'')]
343
+ elsif i == 2
344
+ [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t")]
345
+ elsif i == 6
346
+ [line.sub(base_name_re,''), ans_lines[i].sub(base_name_re, '').sub(/^\s+/, "\t\t")]
347
+ else
348
+ [line, ans_lines[i]]
349
+ end
350
+
351
+ #ans.split('').zip(exp.split('')) do |l,a|
352
+ # if l != a
353
+ # puts line
354
+ # puts ans_lines[i]
355
+ # puts l
356
+ # puts a
357
+ # end
358
+ #end
359
+ if ans != exp
360
+ puts ans
361
+ puts exp
362
+ end
363
+ ans.should == exp
364
+ #line.sub(base_name_re,'').should == ans_lines[i].sub(base_name_re,'')
365
+ end
366
+ end
367
+ end
368
+ end
369
+
370
+
371
+
372
+ describe Sequest::Pepxml::Modifications do
373
+ before(:each) do
374
+ tf_params = Tfiles + "/bioworks32.params"
375
+ @params = Sequest::Params.new(tf_params)
376
+ # The params object here is completely unnecessary for this test, except
377
+ # that it sets up the mass table
378
+ @obj = Sequest::Pepxml::Modifications.new(@params, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
379
+ end
380
+ it 'creates a mod_symbols_hash' do
381
+ answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
382
+ @obj.mod_symbols_hash.should == answ
383
+ ## need more here
384
+ end
385
+
386
+ it 'creates a ModificationInfo object given a special peptide sequence' do
387
+ mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
388
+ @params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
389
+ @params.term_diff_search_options = "14.20000 12.33000"
390
+ mod = Sequest::Pepxml::Modifications.new(@params, mod_string)
391
+ ## no mods
392
+ peptide = "PEPTIDE"
393
+ mod.modification_info(peptide).should be_nil
394
+ peptide = "]M*EC^S@IDM#M*EMSCM["
395
+ modinfo = mod.modification_info(peptide)
396
+ modinfo.modified_peptide.should == peptide
397
+ modinfo.mod_nterm_mass.should be_close(146.40054, 0.000001)
398
+ modinfo.mod_cterm_mass.should be_close(160.52994, 0.000001)
399
+ end
400
+
401
+ end
402
+
403
+ describe Sequest::Pepxml::SearchHit::ModificationInfo do
404
+
405
+ before(:each) do
406
+ modaaobjs = [[3, 150.3], [6, 345.2]].map do |ar|
407
+ Sequest::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(ar)
408
+ end
409
+ hash = {
410
+ :mod_nterm_mass => 520.2,
411
+ :modified_peptide => "MOD*IFI^E&D",
412
+ :mod_aminoacid_masses => modaaobjs,
413
+ }
414
+ #answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
415
+ @obj = Sequest::Pepxml::SearchHit::ModificationInfo.new(hash)
416
+ end
417
+
418
+ def _re(st)
419
+ /#{Regexp.escape(st)}/
420
+ end
421
+
422
+ it 'can produce pepxml' do
423
+ answ = @obj.to_pepxml
424
+ answ.should =~ _re('<modification_info')
425
+ answ.should =~ _re(" mod_nterm_mass=\"520.2\"")
426
+ answ.should =~ _re(" modified_peptide=\"MOD*IFI^E&amp;D\"")
427
+ answ.should =~ _re("<mod_aminoacid_mass")
428
+ answ.should =~ _re(" position=\"3\"")
429
+ answ.should =~ _re(" mass=\"150.3\"")
430
+ answ.should =~ _re(" position=\"6\"")
431
+ answ.should =~ _re(" mass=\"345.2\"")
432
+ answ.should =~ _re("</modification_info>")
433
+ end
434
+ end
435
+
436
+ =end
@@ -0,0 +1,40 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+
4
+ $spec_large = ENV['SPEC_LARGE']
5
+ development = $spec_large ? :development_large : :development
6
+
7
+ begin
8
+ Bundler.setup(:default, development)
9
+ rescue Bundler::BundlerError => e
10
+ $stderr.puts e.message
11
+ $stderr.puts "Run `bundle install` to install missing gems"
12
+ exit e.status_code
13
+ end
14
+ require 'spec/more'
15
+
16
+
17
+ load_testdata = lambda do
18
+ require 'ms/testdata'
19
+ SEQUEST_DIR = Ms::TESTDATA + '/sequest'
20
+ end
21
+
22
+ load_testdata.call if $spec_large
23
+
24
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
25
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
26
+
27
+ Bacon.summary_on_exit
28
+
29
+
30
+ def spec_large(&block)
31
+ if $spec_large
32
+ block.call
33
+ else
34
+ # Requires SPEC_LARGE=true and tfiles_large dir for testing large test files
35
+ it 'SKIPPING (not testing large files)' do
36
+ end
37
+ end
38
+ end
39
+
40
+ TESTFILES = File.dirname(__FILE__) + '/tfiles'