mspire 0.3.9 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/INSTALL +24 -7
  2. data/README +15 -13
  3. data/README.rdoc +18 -0
  4. data/Rakefile +50 -14
  5. data/bin/aafreqs.rb +0 -0
  6. data/bin/bioworks2excel.rb +0 -0
  7. data/bin/bioworks_to_pepxml.rb +2 -1
  8. data/bin/bioworks_to_pepxml_gui.rb +0 -0
  9. data/bin/fasta_shaker.rb +0 -0
  10. data/bin/filter_and_validate.rb +0 -0
  11. data/bin/gi2annot.rb +0 -0
  12. data/bin/id_class_anal.rb +0 -0
  13. data/bin/id_precision.rb +0 -0
  14. data/bin/ms_to_lmat.rb +0 -0
  15. data/bin/pepproph_filter.rb +0 -0
  16. data/bin/protein_summary.rb +0 -0
  17. data/bin/protxml2prots_peps.rb +0 -0
  18. data/bin/raw_to_mzXML.rb +3 -3
  19. data/bin/run_percolator.rb +122 -0
  20. data/bin/sqt_group.rb +0 -0
  21. data/bin/srf_group.rb +0 -0
  22. data/changelog.txt +29 -0
  23. data/lib/ms/gradient_program.rb +0 -1
  24. data/lib/ms/msrun.rb +62 -29
  25. data/lib/ms/parser/mzdata/axml.rb +55 -0
  26. data/lib/ms/parser/mzdata/dom.rb +51 -36
  27. data/lib/ms/parser/mzdata.rb +8 -2
  28. data/lib/ms/parser/mzxml/axml.rb +59 -0
  29. data/lib/ms/parser/mzxml/dom.rb +80 -57
  30. data/lib/ms/parser/mzxml/hpricot.rb +1 -1
  31. data/lib/ms/parser/mzxml/libxml.rb +6 -2
  32. data/lib/ms/parser/mzxml.rb +110 -3
  33. data/lib/ms/parser.rb +4 -4
  34. data/lib/ms/precursor.rb +19 -4
  35. data/lib/ms/scan.rb +7 -7
  36. data/lib/ms/spectrum.rb +249 -58
  37. data/lib/mspire.rb +1 -1
  38. data/lib/spec_id/bioworks.rb +2 -2
  39. data/lib/spec_id/precision/filter/cmdline.rb +8 -1
  40. data/lib/spec_id/precision/prob/cmdline.rb +2 -2
  41. data/lib/spec_id/precision/prob.rb +1 -0
  42. data/lib/spec_id/proph/pep_summary.rb +3 -4
  43. data/lib/spec_id/proph/prot_summary.rb +3 -3
  44. data/lib/spec_id/protein_summary.rb +1 -1
  45. data/lib/spec_id/sequest/pepxml.rb +5 -5
  46. data/lib/spec_id/sqt.rb +4 -4
  47. data/lib/spec_id/srf.rb +49 -8
  48. data/lib/spec_id.rb +5 -0
  49. data/lib/xml_style_parser.rb +16 -2
  50. data/script/compile_and_plot_smriti_final.rb +0 -0
  51. data/script/create_little_pepxml.rb +0 -0
  52. data/script/degenerate_peptides.rb +0 -0
  53. data/script/estimate_fpr_by_cysteine.rb +0 -0
  54. data/script/extract_gradient_programs.rb +1 -1
  55. data/script/find_cysteine_background.rb +0 -0
  56. data/script/genuine_tps_and_probs.rb +0 -0
  57. data/script/get_apex_values_rexml.rb +0 -0
  58. data/script/mascot_fix_pepxml.rb +123 -0
  59. data/script/msvis.rb +0 -0
  60. data/script/mzXML2timeIndex.rb +0 -0
  61. data/script/peps_per_bin.rb +0 -0
  62. data/script/prep_dir.rb +0 -0
  63. data/script/simple_protein_digestion.rb +0 -0
  64. data/script/smriti_final_analysis.rb +0 -0
  65. data/script/sqt_to_meta.rb +0 -0
  66. data/script/top_hit_per_scan.rb +0 -0
  67. data/script/toppred_to_yaml.rb +0 -0
  68. data/script/tpp_installer.rb +0 -0
  69. data/specs/bin/prob_validate_spec.rb +5 -2
  70. data/specs/bin/protein_summary_spec.rb +5 -1
  71. data/specs/ms/msrun_spec.rb +176 -133
  72. data/specs/ms/parser_spec.rb +3 -3
  73. data/specs/ms/spectrum_spec.rb +0 -2
  74. data/specs/spec_id/precision/filter_spec.rb +4 -1
  75. data/specs/spec_id/precision/prob_spec.rb +2 -2
  76. data/specs/spec_id/sequest/pepxml_spec.rb +1 -1
  77. data/specs/spec_id/sqt_spec.rb +5 -5
  78. data/specs/spec_id/srf_spec.rb +56 -93
  79. data/specs/spec_id/srf_spec_helper.rb +121 -284
  80. data/specs/spec_id_spec.rb +3 -0
  81. data/specs/transmem/toppred_spec.rb +1 -0
  82. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +683 -0
  83. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +382 -0
  84. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +683 -0
  85. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +382 -0
  86. data/test_files/opd1_2runs_2mods/data/README.txt +6 -0
  87. metadata +247 -229
@@ -2,13 +2,15 @@
2
2
  require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
3
3
  require 'ms/msrun'
4
4
  require 'ostruct'
5
+ require 'fileutils'
6
+ require 'ms/parser/mzxml'
5
7
 
6
8
  parsers = %w(AXML LibXML XMLParser Regexp REXML)
7
9
 
8
10
  XMLStyleParser::Parser_precedence.replace( %w(AXML) )
9
11
 
10
12
 
11
- describe "an msrun with basic, non-spectral information", :shared => true do
13
+ shared_examples_for "an msrun with basic, non-spectral information" do
12
14
  it 'knows the type and version of file' do
13
15
  @run.filetype.should == @info.filetype
14
16
  @run.version.should == @info.version
@@ -37,12 +39,11 @@ describe "an msrun with basic, non-spectral information", :shared => true do
37
39
  it 'has correct first two scans and last scan' do
38
40
  [0,1,-1].each do |i|
39
41
  @info.scans[i].each do |k,v|
40
- if k == :precursors
41
- v.zip( @run.scans[i].send(k) ) do |exp, act|
42
- act.mz.should be_close(exp.mz, 0.000001)
43
- #if act.intensity # intensity not guaranteed to exist!
44
- # act.intensity.should == exp.intensity
45
- #end
42
+ testing = @run.scans[i].send(k)
43
+ if k == :precursor
44
+ testing.mz.should be_close(v.mz, 0.000001)
45
+ if testing.intensity # intensity not guaranteed to exist!
46
+ testing.intensity.should == v.intensity
46
47
  end
47
48
  else
48
49
  @run.scans[i].send(k).should == v
@@ -52,7 +53,7 @@ describe "an msrun with basic, non-spectral information", :shared => true do
52
53
  end
53
54
  end
54
55
 
55
- describe "an msrun with spectrum", :shared => true do
56
+ shared_examples_for "an msrun with spectrum" do
56
57
 
57
58
  it 'has all scans with spectrum data' do
58
59
  @run.scans.size.should == @info.scan_count
@@ -68,26 +69,27 @@ describe "an msrun with spectrum", :shared => true do
68
69
  @run.start_and_end_mz(2).should == @info.start_and_end_mz2
69
70
  end
70
71
 
72
+ it_should_behave_like "an msrun with basic, non-spectral information"
73
+ end
74
+
75
+ # some xml formats have precursor intensities built in, some do not
76
+ shared_examples_for "an msrun with precursor intensities" do
77
+
71
78
  it "has correct prec inten for first two scans and last scan" do
72
79
  [0,1,-1].each do |i|
73
80
  if i == 0
74
- # currently we do diff't things for ms_level 1 scans! is it nil or []
75
- #@run.scans[i].precursors.should == []
76
- #@run.scans[i].precursors.should be_nil
81
+ @run.scans[i].precursor.should be_nil
77
82
  next
78
83
  end
79
- expected = @info.scans[i][:precursors]
80
- @run.scans[i].precursors.zip(expected) do |act,exp|
81
- act.mz.should be_close(exp.mz, 0.000001)
82
- act.intensity.should == exp.intensity
83
- end
84
+ expected = @info.scans[i][:precursor]
85
+ @run.scans[i].precursor.mz.should be_close(expected.mz, 0.000001)
86
+ @run.scans[i].precursor.intensity.should == expected.intensity
84
87
  end
85
88
  end
86
89
 
87
- it_should_behave_like "an msrun with basic, non-spectral information"
88
90
  end
89
91
 
90
- describe 'a basic scan info generator', :shared => true do
92
+ shared_examples_for 'a basic scan info generator' do
91
93
 
92
94
  def check_table(table, answer)
93
95
  answer.each do |k,v|
@@ -143,13 +145,13 @@ MzXML_version_1_info = MyOpenStruct.new do |info|
143
145
  :num => 2,
144
146
  :ms_level => 2,
145
147
  :time => 1.90,
146
- :precursors => [MS::Precursor.new(:mz => 391.045410, :intensity => 6986078.0)]
148
+ :precursor => MS::Precursor.new(:mz => 391.045410, :intensity => 6986078.0)
147
149
  }
148
150
  info.scans[-1] = {
149
151
  :num => 3748,
150
152
  :ms_level => 2,
151
153
  :time => 5102.55,
152
- :precursors => [MS::Precursor.new(:mz => 433.564941, :intensity => 481800.0)]
154
+ :precursor => MS::Precursor.new(:mz => 433.564941, :intensity => 481800.0)
153
155
  }
154
156
  info.scan_count0 = info.scan_count
155
157
  info.scan_count1 = 937
@@ -163,7 +165,7 @@ describe MS::MSRun, "on mzXML version 1 files (w/o spectra)" do
163
165
  before(:all) do
164
166
  @info = MzXML_version_1_info
165
167
  start = Time.now
166
- @run = @info.klass.new(@info.file, :spectra => false)
168
+ @run = @info.klass.new(@info.file, :lazy => :no_spectra)
167
169
  puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
168
170
  end
169
171
  it_should_behave_like "an msrun with basic, non-spectral information"
@@ -176,34 +178,35 @@ describe MS::MSRun, "on mzXML version 1 files (w/spectra)" do
176
178
  before(:all) do
177
179
  @info = MzXML_version_1_info
178
180
  start = Time.now
179
- @run = @info.klass.new(@info.file)
181
+ @run = @info.klass.new(@info.file, :lazy => :not)
180
182
  puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
181
183
  end
182
184
 
183
185
  it_should_behave_like "an msrun with spectrum"
186
+ it_should_behave_like "an msrun with precursor intensities"
184
187
  it_should_behave_like 'a basic scan info generator'
185
188
  end
186
189
  end
187
190
 
188
191
  MzXML_version_20_info = MyOpenStruct.new do |info|
189
- info.file = Tfiles_l + '/opd1_2runs_2mods/data/020.readw.mzXML'
192
+ info.file = Tfiles + '/opd1_2runs_2mods/data/020.readw.mzXML'
190
193
  info.klass = MS::MSRun
191
194
  info.filetype = :mzxml
192
195
  info.version = '2.0'
193
- info.scan_count = 3620
196
+ info.scan_count = 20
194
197
  #info.scan_counts = ??
195
198
  info.start_time = 0.13
196
- info.end_time = 5099.69
199
+ info.end_time = 27.31
197
200
  info.num_to_prec_mz_hash = {
198
201
  0 => nil,
199
202
  1 => nil,
200
203
  2 => 390.9291992,
201
204
  3 => 1121.944824,
202
205
  4 => 1321.913574,
203
- 3617 => nil,
204
- 3618 => 828.2867432,
205
- 3619 => 424.8538208,
206
- 3620 => 357.0411987,
206
+ 17 => nil,
207
+ 18 => 308.795959,
208
+ 19 => 444.983337,
209
+ 20 => 361.671875,
207
210
  }
208
211
  info.scans = {}
209
212
  info.scans[0]= {
@@ -215,61 +218,80 @@ MzXML_version_20_info = MyOpenStruct.new do |info|
215
218
  :num => 2,
216
219
  :ms_level => 2,
217
220
  :time => 1.49,
218
- :precursors => [MS::Precursor.new(:mz => 390.9291992, :intensity => 8.14409e+006)]
221
+ :precursor => MS::Precursor.new(:mz => 390.9291992, :intensity => 8.14409e+006)
219
222
  }
220
223
  info.scans[-1] = {
221
- :num => 3620,
224
+ :num => 20,
222
225
  :ms_level => 2,
223
- :time => 5099.69,
224
- :precursors => [MS::Precursor.new(:mz => 357.0411987, :intensity => 643017.0)]
226
+ :time => 27.31,
227
+ :precursor => MS::Precursor.new(:mz => 361.671875, :intensity => 572148.0)
225
228
  }
226
229
  info.scan_count0 = info.scan_count
227
- info.scan_count1 = 905
228
- info.scan_count2 = 2715
229
- info.start_and_end_mz1 = [300.0, 1500.0]
230
+ info.scan_count1 = 5
231
+ info.scan_count2 = 15
232
+ info.start_and_end_mz1 = [300.0, 1499] # apparently nothing brushes right up to 1500
230
233
  # that first number on start_and_end_mz2 is a arbitrary as to accuracy...
231
234
  # I'm not sure the correct answer
232
- info.start_and_end_mz2 = [110.0, 2000.0]
235
+ info.start_and_end_mz2 = [110.0, 1955] ## again, this is based on data
233
236
  end
234
237
 
235
238
  describe MS::MSRun, "on mzXML version 2.0 files (w/o spectra)" do
236
- spec_large do
237
- before(:all) do
238
- @info = MzXML_version_20_info
239
- start = Time.now
240
- @run = @info.klass.new(@info.file, :spectra => false)
241
- puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
242
- end
239
+ before(:all) do
240
+ @info = MzXML_version_20_info
241
+ start = Time.now
242
+ @run = @info.klass.new(@info.file, :lazy => :no_spectra)
243
+ puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
244
+ end
243
245
 
244
- it_should_behave_like "an msrun with basic, non-spectral information"
245
- it_should_behave_like 'a basic scan info generator'
246
+ #it_should_behave_like "an msrun with basic, non-spectral information"
247
+ it_should_behave_like 'a basic scan info generator'
248
+
249
+ it 'fixes bad scan tags on the fly!' do
250
+ # if this test works, this is true
246
251
  end
247
252
  end
248
253
 
249
- describe MS::MSRun, "on mzXML version 2.0 files (w/spectra)" do
250
- spec_large do
251
- before(:all) do
252
- @info = MzXML_version_20_info
253
- start = Time.now
254
- @run = @info.klass.new(@info.file)
255
- puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
256
- end
254
+ shared_examples_for "an mzXML version 2.0 file (w/spectra)" do
255
+ before(:all) do
256
+ @info = MzXML_version_20_info
257
+ # first fix the file of bad scan tags
258
+ @info.file
259
+ #start = Time.now
260
+ @fh = File.open(@info.file)
261
+ @run = @info.klass.new(@fh, :lazy => @lazy_type)
262
+ #puts "- read #{File.basename(@new_file)} in #{Time.now - start} seconds" if $specdoc
263
+ end
257
264
 
258
- it_should_behave_like "an msrun with spectrum"
259
- it_should_behave_like 'a basic scan info generator'
265
+ after(:all) do
266
+ # @fh.close # not sure why, but the filehandle is already closed!
267
+ # Maybe because the filehandle went out of scope??
260
268
  end
269
+
270
+ it_should_behave_like "an msrun with spectrum" # <- trouble
271
+ it_should_behave_like 'a basic scan info generator'
261
272
  end
262
273
 
274
+ #[:io, :string, :not].each do |lazy_type|
275
+ [:io, :string, :not].each do |lazy_type|
276
+ describe MS::MSRun, "mzXML v2.0 with :lazy => :#{lazy_type}" do
277
+ before(:all) { @lazy_type = lazy_type}
278
+ it_should_behave_like "an mzXML version 2.0 file (w/spectra)"
279
+ end
280
+
281
+ end
282
+
263
283
  Mzdata_105_info = MyOpenStruct.new do |info|
264
- info.file = Tfiles_l + '/opd1_2runs_2mods/data/020.mzData.xml'
284
+ info.file = Tfiles + '/opd1_2runs_2mods/data/020.mzData.xml'
265
285
  info.klass = MS::MSRun
266
286
  info.filetype = :mzdata
267
287
  info.version = '1.05'
268
- info.scan_count = 3619 # this should be 3620, they drop the last scan
288
+ # NOTE that the real file drops the last scan!! giving a mismatch
289
+ info.scan_count = 20
269
290
  info.start_time = 0.13002 # minutes == 0.00216667
270
291
  # This is the correct one!, but Thermo drops last scan
271
292
  # info.end_time = 5099.688 #84.9948
272
- info.end_time = 84.968500*60 # 5098.11
293
+ #info.end_time = 84.968500*60 # 5098.11
294
+ info.end_time = 0.455167 * 60
273
295
 
274
296
  info.num_to_prec_mz_hash = {
275
297
  0 => nil,
@@ -277,9 +299,10 @@ Mzdata_105_info = MyOpenStruct.new do |info|
277
299
  2 => 390.9291992,
278
300
  3 => 1121.944824,
279
301
  4 => 1321.913574,
280
- 3617 => nil,
281
- 3618 => 828.2867432,
282
- 3619 => 424.8538208,
302
+ 17 => nil,
303
+ 18 => 308.795959,
304
+ 19 => 444.983337,
305
+ 20 => 361.671875,
283
306
  # 3620 => 357.0411987, Bioworks 3.3 is broken
284
307
  }
285
308
 
@@ -293,105 +316,124 @@ Mzdata_105_info = MyOpenStruct.new do |info|
293
316
  :num => 2,
294
317
  :ms_level => 2,
295
318
  :time => 0.024833 * 60, # 1.48998
296
- :precursors => [MS::Precursor.new( :mz => 390.9291992, :intensity => 8.144094e+006) ],
319
+ :precursor => MS::Precursor.new( :mz => 390.9291992, :intensity => 8.144094e+006),
297
320
  }
298
321
  info.scans[-1] = {
299
- :num => 3619,
322
+ :num => 20,
300
323
  :ms_level => 2,
301
324
  #:time => 5099.69,
302
- :time => 84.968500 * 60, # 5098.11
325
+ #:time => 84.968500 * 60, # 5098.11
326
+ :time => 0.455167 * 60,
303
327
 
304
- :precursors => [MS::Precursor.new( :mz => 424.853821, :intensity => 738590.0 )] # wrong
328
+ :precursor => MS::Precursor.new( :mz => 361.671875, :intensity => 572148.0 ) # wrong
305
329
  }
306
330
  info.scan_count0 = info.scan_count
307
- info.scan_count1 = 905
308
- info.scan_count2 = 2714 # should be 2715, they dropped the last scan!
331
+ info.scan_count1 = 5
332
+ info.scan_count2 = 15 # should be 2715, they dropped the last scan!
309
333
  info.start_and_end_mz1 = [300.0, 1500.0]
310
334
  # This is the Correct one!!!, but Thermo drops last scan
311
335
  #info.start_and_end_mz2 = [112.0, 2000.0]
312
- info.start_and_end_mz2 = [95.0, 2000.0]
336
+ info.start_and_end_mz2 = [95.0, 1955]
313
337
  end
314
338
 
315
339
  describe MS::MSRun, "on mzData version 1.05 files (Bioworks3.3) (w/o spectra)" do
316
- spec_large do
317
- before(:all) do
318
- @info = Mzdata_105_info
319
- start = Time.now
320
- @run = @info.klass.new(@info.file, :spectra => false)
321
- puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
322
- puts "- [NOTE] mzData files from Thermo are missing their last scan!" if $specdoc
323
- end
340
+ before(:all) do
341
+ @info = Mzdata_105_info
342
+ start = Time.now
343
+ @run = @info.klass.new(@info.file, :lazy => :no_spectra)
344
+ puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
345
+ puts "- [NOTE] mzData files from Thermo are missing their last scan!" if $specdoc
346
+ end
324
347
 
325
- it_should_behave_like "an msrun with basic, non-spectral information"
326
- it_should_behave_like 'a basic scan info generator'
348
+ it_should_behave_like "an msrun with basic, non-spectral information"
349
+ it_should_behave_like 'a basic scan info generator'
327
350
 
328
- end
329
351
  end
330
352
 
331
- describe MS::MSRun, "on mzData version 1.05 files (Bioworks3.3) (w/spectra)" do
332
- spec_large do
333
- before(:all) do
334
- @info = Mzdata_105_info
335
- start = Time.now
336
- @run = @info.klass.new(@info.file)
337
- puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
338
- puts "- [NOTE] mzData files from Thermo are missing their last scan!" if $specdoc
339
- end
353
+ shared_examples_for "mzData v1.05 (Bioworks3.3) (w/spectra)" do
354
+ before(:all) do
355
+ @info = Mzdata_105_info
356
+ #start = Time.now
357
+ @fh = File.open(@info.file)
358
+ @run = @info.klass.new(@fh, :lazy => @lazy_type)
359
+ #puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
360
+ puts "- [NOTE] mzData files from Thermo are missing their last scan!" if $specdoc
361
+ end
340
362
 
341
- it_should_behave_like "an msrun with spectrum"
342
- it_should_behave_like 'a basic scan info generator'
363
+ it_should_behave_like "an msrun with spectrum"
364
+ it_should_behave_like 'a basic scan info generator'
343
365
 
344
- it 'gets correct precursor intensities for all scans' do
345
- check_file = Tfiles_l + '/opd1_2runs_2mods/data/020.readw.mzXML'
346
- prec_inten_mzs = IO.readlines(check_file).grep(/precursorMz/).map do |line|
347
- if line =~ /Intensity="([\d\.e\+\-]+)">([\d\.e\+\-]+)</
348
- [$1.to_f, $2.to_f]
349
- else
350
- abort "didn't match for some crazy reason! (probably newline issues)"
351
- end
366
+ it 'has (or can get) correct precursor intensities for all scans' do
367
+ check_file = Tfiles + '/opd1_2runs_2mods/data/020.readw.mzXML'
368
+ prec_inten_mzs = IO.readlines(check_file).grep(/precursorMz/).map do |line|
369
+ if line =~ /Intensity="([\d\.e\+\-]+)">([\d\.e\+\-]+)</
370
+ [$1.to_f, $2.to_f]
371
+ else
372
+ abort "didn't match for some crazy reason! (probably newline issues)"
352
373
  end
374
+ end
353
375
 
354
- prec_mz_cnt = 0
355
- @run.scans.each_with_index do |scan,i|
356
- next if i % 4 == 0
357
- (exp_int, exp_mz) = prec_inten_mzs[prec_mz_cnt]
376
+ prec_mz_cnt = 0
377
+ @run.scans.each_with_index do |scan,i|
378
+ next if i % 4 == 0
379
+ (exp_int, exp_mz) = prec_inten_mzs[prec_mz_cnt]
358
380
 
359
- precursor = scan.precursors.first
360
- precursor.mz.should be_close(exp_mz, 0.00001)
361
- precursor.intensity.should be_close(exp_int, 51)
381
+ precursor = scan.precursor
382
+ precursor.mz.should be_close(exp_mz, 0.00001)
383
+ prec_inten =
384
+ if precursor.intensity.nil?
385
+ precursor.parent.spectrum.intensity_at_mz(precursor.mz)
386
+ else
387
+ precursor.intensity
388
+ end
389
+ prec_inten.should be_close(exp_int, 51)
362
390
 
363
- prec_mz_cnt += 1
364
- end
391
+ prec_mz_cnt += 1
392
+ end
365
393
  end
366
394
  end
395
+
396
+ #[:string, :not, :io].each do |lazy_type|
397
+ [:io].each do |lazy_type|
398
+ describe MS::MSRun, "mzData v1.05 with :lazy => :#{lazy_type}" do
399
+ before(:all) { @lazy_type = lazy_type}
400
+ it_should_behave_like "mzData v1.05 (Bioworks3.3) (w/spectra)"
401
+ end
367
402
  end
368
403
 
369
- describe MS::MSRun, 'with small file of twenty scans' do
370
- before(:each) do
404
+ describe 'reading a small file of twenty scans' do
405
+ before(:all) do
371
406
  @file = Tfiles + "/opd1/twenty_scans.mzXML"
372
- @msrun = MS::MSRun.new(@file)
373
407
  end
374
408
 
375
- it 'retrieves times and spectra' do
376
- (times, spectra) = @msrun.times_and_spectra(1)
377
- etimes = %w(0.440000 5.150000 10.690000 16.400000 22.370000).map {|t| t.to_f }
378
- num_peaks = [992, 814, 796, 849, 813]
379
- tol = 0.000000001
380
- spectra[0].mz[1].should be_close(301.430114746094, tol)
381
- spectra[0].intensity[1].should be_close(22192.0, tol)
382
- spectra[0].mz[-1].should be_close(1499.09912109375, tol)
383
- spectra[0].intensity[-1].should be_close(111286.0, tol)
384
-
385
- spectra[-1].mz[1].should be_close(301.243774414062, tol)
386
- spectra[-1].intensity[1].should be_close(77503.0, tol)
387
- spectra[-1].mz[-1].should be_close(1499.42016601562, tol)
388
- spectra[-1].intensity[-1].should be_close(13.0, tol)
389
-
390
- num_peaks.each_with_index do |n,i|
391
- spectra[i].mz.size.should == n
392
- end
393
- etimes.each_with_index do |t,i|
394
- times[i].should be_close(t, 0.00001)
409
+ it 'retrieves times and spectra with all lazy types' do
410
+ [:not, :string, :io].each do |lazy_type|
411
+
412
+ File.open(@file) do |io|
413
+ msrun = MS::MSRun.new(io, :lazy => lazy_type)
414
+
415
+
416
+ (times, spectra) = msrun.times_and_spectra(1)
417
+ etimes = %w(0.440000 5.150000 10.690000 16.400000 22.370000).map {|t| t.to_f }
418
+ num_peaks = [992, 814, 796, 849, 813]
419
+ tol = 0.000000001
420
+ spectra[0].mzs[1].should be_close(301.430114746094, tol)
421
+ spectra[0].intensities[1].should be_close(22192.0, tol)
422
+ spectra[0].mzs[-1].should be_close(1499.09912109375, tol)
423
+ spectra[0].intensities[-1].should be_close(111286.0, tol)
424
+
425
+ spectra[-1].mzs[1].should be_close(301.243774414062, tol)
426
+ spectra[-1].intensities[1].should be_close(77503.0, tol)
427
+ spectra[-1].mzs[-1].should be_close(1499.42016601562, tol)
428
+ spectra[-1].intensities[-1].should be_close(13.0, tol)
429
+
430
+ num_peaks.each_with_index do |n,i|
431
+ spectra[i].mzs.size.should == n
432
+ end
433
+ etimes.each_with_index do |t,i|
434
+ times[i].should be_close(t, 0.00001)
435
+ end
436
+ end
395
437
  end
396
438
  end
397
439
  end
@@ -412,9 +454,10 @@ describe MS::MSRun, 'with a small set of scans' do
412
454
  precs = (0..(vals.size)).to_a.map do |x|
413
455
  MS::Precursor.new([x,100])
414
456
  end
457
+
415
458
  scans = vals.zip(precs).map do |ar,prec|
416
459
  scan = MS::Scan.new(ar)
417
- scan.precursors = [prec]
460
+ scan.precursor = prec
418
461
  scan
419
462
  end
420
463
  scans.size.should == vals.size
@@ -422,7 +465,7 @@ describe MS::MSRun, 'with a small set of scans' do
422
465
  parents = [nil,s[0],s[0],s[2],s[2],nil,s[5],s[6],s[5]]
423
466
  MS::MSRun.add_parent_scan(scans)
424
467
  scans.each_with_index do |scan,i|
425
- scan.precursors.first.parent.should == parents[i]
468
+ scan.precursor.parent.should == parents[i]
426
469
  end
427
470
  end
428
471
  end
@@ -22,7 +22,7 @@ describe "a MS::Parser on a file", :shared => true do
22
22
 
23
23
  ########################################################################
24
24
  # NOTE: methods to verify parsing of information should be defined where
25
- # that information is require.
25
+ # that information is required.
26
26
  # e.g. msrun_spec.rb will verify that msrun objects are created properly.
27
27
  # this is because we don't care how we get that file, just that we get it.
28
28
  # The whole process of parsing a file should be transparent to users.
@@ -71,7 +71,7 @@ describe MS::Parser, "on an mzXML version 2 file" do
71
71
  @version = '2.0'
72
72
  @filetype_version = [@filetype, @version]
73
73
  @subclass = 'MS::Parser::MzXML'
74
- @file = Tfiles_large + '/opd1_2runs_2mods/data/020.readw.mzXML'
74
+ @file = Tfiles + '/opd1_2runs_2mods/data/020.readw.mzXML'
75
75
  end
76
76
  it_should_behave_like "a MS::Parser on a file"
77
77
  end
@@ -84,7 +84,7 @@ describe MS::Parser, "on an mzData version 1.05 file" do
84
84
  @version = '1.05'
85
85
  @filetype_version = [@filetype, @version]
86
86
  @subclass = 'MS::Parser::MzData'
87
- @file = Tfiles_large + '/opd1_2runs_2mods/data/020.mzData.xml'
87
+ @file = Tfiles + '/opd1_2runs_2mods/data/020.mzData.xml'
88
88
  end
89
89
  it_should_behave_like "a MS::Parser on a file"
90
90
  end
@@ -59,8 +59,6 @@ describe MS::Spectrum, 'of full values' do
59
59
  @spec.index(1029.212891).should == 597
60
60
  end
61
61
 
62
-
63
-
64
62
  end
65
63
 
66
64
 
@@ -146,7 +146,7 @@ describe 'filtering on a real srf file' do
146
146
  spec_large do
147
147
  it 'does tmm with a toppred file on srf' do
148
148
  opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar => false}}
149
- dir = Tfiles_l + '/opd1_2runs_2mods/sequest'
149
+ dir = Tfiles_l + '/opd1_2runs_2mods/sequest33'
150
150
  tmm_file = dir + '/ecoli_K12_ncbi_20060321.toppred.xml'
151
151
  fasta_file = dir + '/ecoli_K12_ncbi_20060321.fasta'
152
152
  sequest_file = dir + '/ecoli.params'
@@ -219,6 +219,9 @@ describe SpecID::Precision::Filter::Peps do
219
219
  [1.2, 1.2, 1.2, 0.1, 50, true] => 6, # "all passing"
220
220
  [1.2, 1.2, 1.2, 0.2, 50, true] => 1, # "high deltacn"
221
221
  [1.0, 1.0, 1.6, 0.1, 50, true] => 5, # "one xcorr too high"
222
+ ##
223
+ [1.0, 1.0, 1.0, 0.05, 60, true] => 6, ## testing ppm filtering:
224
+ [1.0, 1.0, 1.0, 0.05, 10, true] => 0,
222
225
  }
223
226
  args_and_expected.each do |args,exp|
224
227
  filt = SpecID::Precision::Filter::Peps.new(:standard_sequest_filter, *args)
@@ -28,14 +28,14 @@ describe 'finding precision Proph::Prot::Pep objects' do
28
28
 
29
29
  it 'runs without any validator' do
30
30
  answer = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id)
31
- answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "params", "pephits_precision", "probabilities"]
31
+ answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "params", "pephits", "pephits_precision", "probabilities"]
32
32
  answer[:aaseqs].should == %w(3 0 4 1 2)
33
33
  end
34
34
 
35
35
  it 'returns modified peptides if any modified peptides' do
36
36
  @spec_id.peps[1].mod_info = Sequest::PepXML::SearchHit::ModificationInfo.new(['MODIFIED', []])
37
37
  answer = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id)
38
- answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "modified_peptides", "params", "pephits_precision", "probabilities"]
38
+ answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "modified_peptides", "params", "pephits", "pephits_precision", "probabilities"]
39
39
  end
40
40
 
41
41
  end
@@ -309,7 +309,7 @@ describe 'bioworks file with modifications transformed into pepxml' do
309
309
 
310
310
  spec_large do
311
311
  before(:all) do
312
- modfiles_sequest_dir = Tfiles_l + '/opd1_2runs_2mods/sequest/'
312
+ modfiles_sequest_dir = Tfiles_l + '/opd1_2runs_2mods/sequest33/'
313
313
  modfiles_data_dir = Tfiles_l + '/opd1_2runs_2mods/data/'
314
314
  @srgfile = modfiles_sequest_dir + 'tmp.srg'
315
315
  @out_path = modfiles_sequest_dir + 'pepxml'
@@ -110,7 +110,7 @@ describe 'converting a large srf to sqt' do
110
110
  del(@output)
111
111
  end
112
112
  it 'can get db info with correct path' do
113
- @srf.to_sqt(@output, :db_info => true, :new_db_path => Tfiles_l + '/opd1_2runs_2mods/sequest')
113
+ @srf.to_sqt(@output, :db_info => true, :new_db_path => Tfiles_l + '/opd1_2runs_2mods/sequest33')
114
114
  @output.exist_as_a_file?.should be_true
115
115
  lines = IO.readlines(@output)
116
116
  has_md5 = lines.any? do |line|
@@ -127,8 +127,8 @@ describe 'converting a large srf to sqt' do
127
127
  del(@output)
128
128
  end
129
129
  it 'can update the Database' do
130
- @srf.to_sqt(@output, :new_db_path => Tfiles_l + '/opd1_2runs_2mods/sequest', :update_db_path => true)
131
- regexp = Regexp.new("Database\t/.*/opd1_2runs_2mods/sequest/ecoli_K12_ncbi_20060321.fasta")
130
+ @srf.to_sqt(@output, :new_db_path => Tfiles_l + '/opd1_2runs_2mods/sequest33', :update_db_path => true)
131
+ regexp = Regexp.new("Database\t/.*/opd1_2runs_2mods/sequest33/ecoli_K12_ncbi_20060321.fasta")
132
132
  updated_db = IO.readlines(@output).any? do |line|
133
133
  line =~ regexp
134
134
  end
@@ -217,8 +217,8 @@ end
217
217
  describe SQTGroup, ': acting as a SpecID on large files' do
218
218
  spec_large do
219
219
  before(:each) do
220
- file1 = Tfiles_l + '/opd1_2runs_2mods/sequest/020.sqt'
221
- file2 = Tfiles_l + '/opd1_2runs_2mods/sequest/040.sqt'
220
+ file1 = Tfiles_l + '/opd1_2runs_2mods/sequest33/020.sqt'
221
+ file2 = Tfiles_l + '/opd1_2runs_2mods/sequest33/040.sqt'
222
222
  file1.exist_as_a_file?.should be_true
223
223
  file2.exist_as_a_file?.should be_true
224
224
  @sqg = SQTGroup.new([file1, file2])