mspire 0.3.9 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +24 -7
- data/README +15 -13
- data/README.rdoc +18 -0
- data/Rakefile +50 -14
- data/bin/aafreqs.rb +0 -0
- data/bin/bioworks2excel.rb +0 -0
- data/bin/bioworks_to_pepxml.rb +2 -1
- data/bin/bioworks_to_pepxml_gui.rb +0 -0
- data/bin/fasta_shaker.rb +0 -0
- data/bin/filter_and_validate.rb +0 -0
- data/bin/gi2annot.rb +0 -0
- data/bin/id_class_anal.rb +0 -0
- data/bin/id_precision.rb +0 -0
- data/bin/ms_to_lmat.rb +0 -0
- data/bin/pepproph_filter.rb +0 -0
- data/bin/protein_summary.rb +0 -0
- data/bin/protxml2prots_peps.rb +0 -0
- data/bin/raw_to_mzXML.rb +3 -3
- data/bin/run_percolator.rb +122 -0
- data/bin/sqt_group.rb +0 -0
- data/bin/srf_group.rb +0 -0
- data/changelog.txt +29 -0
- data/lib/ms/gradient_program.rb +0 -1
- data/lib/ms/msrun.rb +62 -29
- data/lib/ms/parser/mzdata/axml.rb +55 -0
- data/lib/ms/parser/mzdata/dom.rb +51 -36
- data/lib/ms/parser/mzdata.rb +8 -2
- data/lib/ms/parser/mzxml/axml.rb +59 -0
- data/lib/ms/parser/mzxml/dom.rb +80 -57
- data/lib/ms/parser/mzxml/hpricot.rb +1 -1
- data/lib/ms/parser/mzxml/libxml.rb +6 -2
- data/lib/ms/parser/mzxml.rb +110 -3
- data/lib/ms/parser.rb +4 -4
- data/lib/ms/precursor.rb +19 -4
- data/lib/ms/scan.rb +7 -7
- data/lib/ms/spectrum.rb +249 -58
- data/lib/mspire.rb +1 -1
- data/lib/spec_id/bioworks.rb +2 -2
- data/lib/spec_id/precision/filter/cmdline.rb +8 -1
- data/lib/spec_id/precision/prob/cmdline.rb +2 -2
- data/lib/spec_id/precision/prob.rb +1 -0
- data/lib/spec_id/proph/pep_summary.rb +3 -4
- data/lib/spec_id/proph/prot_summary.rb +3 -3
- data/lib/spec_id/protein_summary.rb +1 -1
- data/lib/spec_id/sequest/pepxml.rb +5 -5
- data/lib/spec_id/sqt.rb +4 -4
- data/lib/spec_id/srf.rb +49 -8
- data/lib/spec_id.rb +5 -0
- data/lib/xml_style_parser.rb +16 -2
- data/script/compile_and_plot_smriti_final.rb +0 -0
- data/script/create_little_pepxml.rb +0 -0
- data/script/degenerate_peptides.rb +0 -0
- data/script/estimate_fpr_by_cysteine.rb +0 -0
- data/script/extract_gradient_programs.rb +1 -1
- data/script/find_cysteine_background.rb +0 -0
- data/script/genuine_tps_and_probs.rb +0 -0
- data/script/get_apex_values_rexml.rb +0 -0
- data/script/mascot_fix_pepxml.rb +123 -0
- data/script/msvis.rb +0 -0
- data/script/mzXML2timeIndex.rb +0 -0
- data/script/peps_per_bin.rb +0 -0
- data/script/prep_dir.rb +0 -0
- data/script/simple_protein_digestion.rb +0 -0
- data/script/smriti_final_analysis.rb +0 -0
- data/script/sqt_to_meta.rb +0 -0
- data/script/top_hit_per_scan.rb +0 -0
- data/script/toppred_to_yaml.rb +0 -0
- data/script/tpp_installer.rb +0 -0
- data/specs/bin/prob_validate_spec.rb +5 -2
- data/specs/bin/protein_summary_spec.rb +5 -1
- data/specs/ms/msrun_spec.rb +176 -133
- data/specs/ms/parser_spec.rb +3 -3
- data/specs/ms/spectrum_spec.rb +0 -2
- data/specs/spec_id/precision/filter_spec.rb +4 -1
- data/specs/spec_id/precision/prob_spec.rb +2 -2
- data/specs/spec_id/sequest/pepxml_spec.rb +1 -1
- data/specs/spec_id/sqt_spec.rb +5 -5
- data/specs/spec_id/srf_spec.rb +56 -93
- data/specs/spec_id/srf_spec_helper.rb +121 -284
- data/specs/spec_id_spec.rb +3 -0
- data/specs/transmem/toppred_spec.rb +1 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +683 -0
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +382 -0
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +683 -0
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +382 -0
- data/test_files/opd1_2runs_2mods/data/README.txt +6 -0
- metadata +247 -229
data/specs/ms/msrun_spec.rb
CHANGED
|
@@ -2,13 +2,15 @@
|
|
|
2
2
|
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
|
3
3
|
require 'ms/msrun'
|
|
4
4
|
require 'ostruct'
|
|
5
|
+
require 'fileutils'
|
|
6
|
+
require 'ms/parser/mzxml'
|
|
5
7
|
|
|
6
8
|
parsers = %w(AXML LibXML XMLParser Regexp REXML)
|
|
7
9
|
|
|
8
10
|
XMLStyleParser::Parser_precedence.replace( %w(AXML) )
|
|
9
11
|
|
|
10
12
|
|
|
11
|
-
|
|
13
|
+
shared_examples_for "an msrun with basic, non-spectral information" do
|
|
12
14
|
it 'knows the type and version of file' do
|
|
13
15
|
@run.filetype.should == @info.filetype
|
|
14
16
|
@run.version.should == @info.version
|
|
@@ -37,12 +39,11 @@ describe "an msrun with basic, non-spectral information", :shared => true do
|
|
|
37
39
|
it 'has correct first two scans and last scan' do
|
|
38
40
|
[0,1,-1].each do |i|
|
|
39
41
|
@info.scans[i].each do |k,v|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
#end
|
|
42
|
+
testing = @run.scans[i].send(k)
|
|
43
|
+
if k == :precursor
|
|
44
|
+
testing.mz.should be_close(v.mz, 0.000001)
|
|
45
|
+
if testing.intensity # intensity not guaranteed to exist!
|
|
46
|
+
testing.intensity.should == v.intensity
|
|
46
47
|
end
|
|
47
48
|
else
|
|
48
49
|
@run.scans[i].send(k).should == v
|
|
@@ -52,7 +53,7 @@ describe "an msrun with basic, non-spectral information", :shared => true do
|
|
|
52
53
|
end
|
|
53
54
|
end
|
|
54
55
|
|
|
55
|
-
|
|
56
|
+
shared_examples_for "an msrun with spectrum" do
|
|
56
57
|
|
|
57
58
|
it 'has all scans with spectrum data' do
|
|
58
59
|
@run.scans.size.should == @info.scan_count
|
|
@@ -68,26 +69,27 @@ describe "an msrun with spectrum", :shared => true do
|
|
|
68
69
|
@run.start_and_end_mz(2).should == @info.start_and_end_mz2
|
|
69
70
|
end
|
|
70
71
|
|
|
72
|
+
it_should_behave_like "an msrun with basic, non-spectral information"
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# some xml formats have precursor intensities built in, some do not
|
|
76
|
+
shared_examples_for "an msrun with precursor intensities" do
|
|
77
|
+
|
|
71
78
|
it "has correct prec inten for first two scans and last scan" do
|
|
72
79
|
[0,1,-1].each do |i|
|
|
73
80
|
if i == 0
|
|
74
|
-
|
|
75
|
-
#@run.scans[i].precursors.should == []
|
|
76
|
-
#@run.scans[i].precursors.should be_nil
|
|
81
|
+
@run.scans[i].precursor.should be_nil
|
|
77
82
|
next
|
|
78
83
|
end
|
|
79
|
-
expected = @info.scans[i][:
|
|
80
|
-
@run.scans[i].
|
|
81
|
-
|
|
82
|
-
act.intensity.should == exp.intensity
|
|
83
|
-
end
|
|
84
|
+
expected = @info.scans[i][:precursor]
|
|
85
|
+
@run.scans[i].precursor.mz.should be_close(expected.mz, 0.000001)
|
|
86
|
+
@run.scans[i].precursor.intensity.should == expected.intensity
|
|
84
87
|
end
|
|
85
88
|
end
|
|
86
89
|
|
|
87
|
-
it_should_behave_like "an msrun with basic, non-spectral information"
|
|
88
90
|
end
|
|
89
91
|
|
|
90
|
-
|
|
92
|
+
shared_examples_for 'a basic scan info generator' do
|
|
91
93
|
|
|
92
94
|
def check_table(table, answer)
|
|
93
95
|
answer.each do |k,v|
|
|
@@ -143,13 +145,13 @@ MzXML_version_1_info = MyOpenStruct.new do |info|
|
|
|
143
145
|
:num => 2,
|
|
144
146
|
:ms_level => 2,
|
|
145
147
|
:time => 1.90,
|
|
146
|
-
:
|
|
148
|
+
:precursor => MS::Precursor.new(:mz => 391.045410, :intensity => 6986078.0)
|
|
147
149
|
}
|
|
148
150
|
info.scans[-1] = {
|
|
149
151
|
:num => 3748,
|
|
150
152
|
:ms_level => 2,
|
|
151
153
|
:time => 5102.55,
|
|
152
|
-
:
|
|
154
|
+
:precursor => MS::Precursor.new(:mz => 433.564941, :intensity => 481800.0)
|
|
153
155
|
}
|
|
154
156
|
info.scan_count0 = info.scan_count
|
|
155
157
|
info.scan_count1 = 937
|
|
@@ -163,7 +165,7 @@ describe MS::MSRun, "on mzXML version 1 files (w/o spectra)" do
|
|
|
163
165
|
before(:all) do
|
|
164
166
|
@info = MzXML_version_1_info
|
|
165
167
|
start = Time.now
|
|
166
|
-
@run = @info.klass.new(@info.file, :
|
|
168
|
+
@run = @info.klass.new(@info.file, :lazy => :no_spectra)
|
|
167
169
|
puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
|
|
168
170
|
end
|
|
169
171
|
it_should_behave_like "an msrun with basic, non-spectral information"
|
|
@@ -176,34 +178,35 @@ describe MS::MSRun, "on mzXML version 1 files (w/spectra)" do
|
|
|
176
178
|
before(:all) do
|
|
177
179
|
@info = MzXML_version_1_info
|
|
178
180
|
start = Time.now
|
|
179
|
-
@run = @info.klass.new(@info.file)
|
|
181
|
+
@run = @info.klass.new(@info.file, :lazy => :not)
|
|
180
182
|
puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
|
|
181
183
|
end
|
|
182
184
|
|
|
183
185
|
it_should_behave_like "an msrun with spectrum"
|
|
186
|
+
it_should_behave_like "an msrun with precursor intensities"
|
|
184
187
|
it_should_behave_like 'a basic scan info generator'
|
|
185
188
|
end
|
|
186
189
|
end
|
|
187
190
|
|
|
188
191
|
MzXML_version_20_info = MyOpenStruct.new do |info|
|
|
189
|
-
info.file =
|
|
192
|
+
info.file = Tfiles + '/opd1_2runs_2mods/data/020.readw.mzXML'
|
|
190
193
|
info.klass = MS::MSRun
|
|
191
194
|
info.filetype = :mzxml
|
|
192
195
|
info.version = '2.0'
|
|
193
|
-
info.scan_count =
|
|
196
|
+
info.scan_count = 20
|
|
194
197
|
#info.scan_counts = ??
|
|
195
198
|
info.start_time = 0.13
|
|
196
|
-
info.end_time =
|
|
199
|
+
info.end_time = 27.31
|
|
197
200
|
info.num_to_prec_mz_hash = {
|
|
198
201
|
0 => nil,
|
|
199
202
|
1 => nil,
|
|
200
203
|
2 => 390.9291992,
|
|
201
204
|
3 => 1121.944824,
|
|
202
205
|
4 => 1321.913574,
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
206
|
+
17 => nil,
|
|
207
|
+
18 => 308.795959,
|
|
208
|
+
19 => 444.983337,
|
|
209
|
+
20 => 361.671875,
|
|
207
210
|
}
|
|
208
211
|
info.scans = {}
|
|
209
212
|
info.scans[0]= {
|
|
@@ -215,61 +218,80 @@ MzXML_version_20_info = MyOpenStruct.new do |info|
|
|
|
215
218
|
:num => 2,
|
|
216
219
|
:ms_level => 2,
|
|
217
220
|
:time => 1.49,
|
|
218
|
-
:
|
|
221
|
+
:precursor => MS::Precursor.new(:mz => 390.9291992, :intensity => 8.14409e+006)
|
|
219
222
|
}
|
|
220
223
|
info.scans[-1] = {
|
|
221
|
-
:num =>
|
|
224
|
+
:num => 20,
|
|
222
225
|
:ms_level => 2,
|
|
223
|
-
:time =>
|
|
224
|
-
:
|
|
226
|
+
:time => 27.31,
|
|
227
|
+
:precursor => MS::Precursor.new(:mz => 361.671875, :intensity => 572148.0)
|
|
225
228
|
}
|
|
226
229
|
info.scan_count0 = info.scan_count
|
|
227
|
-
info.scan_count1 =
|
|
228
|
-
info.scan_count2 =
|
|
229
|
-
info.start_and_end_mz1 = [300.0, 1500
|
|
230
|
+
info.scan_count1 = 5
|
|
231
|
+
info.scan_count2 = 15
|
|
232
|
+
info.start_and_end_mz1 = [300.0, 1499] # apparently nothing brushes right up to 1500
|
|
230
233
|
# that first number on start_and_end_mz2 is a arbitrary as to accuracy...
|
|
231
234
|
# I'm not sure the correct answer
|
|
232
|
-
info.start_and_end_mz2 = [110.0,
|
|
235
|
+
info.start_and_end_mz2 = [110.0, 1955] ## again, this is based on data
|
|
233
236
|
end
|
|
234
237
|
|
|
235
238
|
describe MS::MSRun, "on mzXML version 2.0 files (w/o spectra)" do
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
end
|
|
239
|
+
before(:all) do
|
|
240
|
+
@info = MzXML_version_20_info
|
|
241
|
+
start = Time.now
|
|
242
|
+
@run = @info.klass.new(@info.file, :lazy => :no_spectra)
|
|
243
|
+
puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
|
|
244
|
+
end
|
|
243
245
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
+
#it_should_behave_like "an msrun with basic, non-spectral information"
|
|
247
|
+
it_should_behave_like 'a basic scan info generator'
|
|
248
|
+
|
|
249
|
+
it 'fixes bad scan tags on the fly!' do
|
|
250
|
+
# if this test works, this is true
|
|
246
251
|
end
|
|
247
252
|
end
|
|
248
253
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
254
|
+
shared_examples_for "an mzXML version 2.0 file (w/spectra)" do
|
|
255
|
+
before(:all) do
|
|
256
|
+
@info = MzXML_version_20_info
|
|
257
|
+
# first fix the file of bad scan tags
|
|
258
|
+
@info.file
|
|
259
|
+
#start = Time.now
|
|
260
|
+
@fh = File.open(@info.file)
|
|
261
|
+
@run = @info.klass.new(@fh, :lazy => @lazy_type)
|
|
262
|
+
#puts "- read #{File.basename(@new_file)} in #{Time.now - start} seconds" if $specdoc
|
|
263
|
+
end
|
|
257
264
|
|
|
258
|
-
|
|
259
|
-
|
|
265
|
+
after(:all) do
|
|
266
|
+
# @fh.close # not sure why, but the filehandle is already closed!
|
|
267
|
+
# Maybe because the filehandle went out of scope??
|
|
260
268
|
end
|
|
269
|
+
|
|
270
|
+
it_should_behave_like "an msrun with spectrum" # <- trouble
|
|
271
|
+
it_should_behave_like 'a basic scan info generator'
|
|
261
272
|
end
|
|
262
273
|
|
|
274
|
+
#[:io, :string, :not].each do |lazy_type|
|
|
275
|
+
[:io, :string, :not].each do |lazy_type|
|
|
276
|
+
describe MS::MSRun, "mzXML v2.0 with :lazy => :#{lazy_type}" do
|
|
277
|
+
before(:all) { @lazy_type = lazy_type}
|
|
278
|
+
it_should_behave_like "an mzXML version 2.0 file (w/spectra)"
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
end
|
|
282
|
+
|
|
263
283
|
Mzdata_105_info = MyOpenStruct.new do |info|
|
|
264
|
-
info.file =
|
|
284
|
+
info.file = Tfiles + '/opd1_2runs_2mods/data/020.mzData.xml'
|
|
265
285
|
info.klass = MS::MSRun
|
|
266
286
|
info.filetype = :mzdata
|
|
267
287
|
info.version = '1.05'
|
|
268
|
-
|
|
288
|
+
# NOTE that the real file drops the last scan!! giving a mismatch
|
|
289
|
+
info.scan_count = 20
|
|
269
290
|
info.start_time = 0.13002 # minutes == 0.00216667
|
|
270
291
|
# This is the correct one!, but Thermo drops last scan
|
|
271
292
|
# info.end_time = 5099.688 #84.9948
|
|
272
|
-
info.end_time = 84.968500*60 # 5098.11
|
|
293
|
+
#info.end_time = 84.968500*60 # 5098.11
|
|
294
|
+
info.end_time = 0.455167 * 60
|
|
273
295
|
|
|
274
296
|
info.num_to_prec_mz_hash = {
|
|
275
297
|
0 => nil,
|
|
@@ -277,9 +299,10 @@ Mzdata_105_info = MyOpenStruct.new do |info|
|
|
|
277
299
|
2 => 390.9291992,
|
|
278
300
|
3 => 1121.944824,
|
|
279
301
|
4 => 1321.913574,
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
302
|
+
17 => nil,
|
|
303
|
+
18 => 308.795959,
|
|
304
|
+
19 => 444.983337,
|
|
305
|
+
20 => 361.671875,
|
|
283
306
|
# 3620 => 357.0411987, Bioworks 3.3 is broken
|
|
284
307
|
}
|
|
285
308
|
|
|
@@ -293,105 +316,124 @@ Mzdata_105_info = MyOpenStruct.new do |info|
|
|
|
293
316
|
:num => 2,
|
|
294
317
|
:ms_level => 2,
|
|
295
318
|
:time => 0.024833 * 60, # 1.48998
|
|
296
|
-
:
|
|
319
|
+
:precursor => MS::Precursor.new( :mz => 390.9291992, :intensity => 8.144094e+006),
|
|
297
320
|
}
|
|
298
321
|
info.scans[-1] = {
|
|
299
|
-
:num =>
|
|
322
|
+
:num => 20,
|
|
300
323
|
:ms_level => 2,
|
|
301
324
|
#:time => 5099.69,
|
|
302
|
-
|
|
325
|
+
#:time => 84.968500 * 60, # 5098.11
|
|
326
|
+
:time => 0.455167 * 60,
|
|
303
327
|
|
|
304
|
-
:
|
|
328
|
+
:precursor => MS::Precursor.new( :mz => 361.671875, :intensity => 572148.0 ) # wrong
|
|
305
329
|
}
|
|
306
330
|
info.scan_count0 = info.scan_count
|
|
307
|
-
info.scan_count1 =
|
|
308
|
-
info.scan_count2 =
|
|
331
|
+
info.scan_count1 = 5
|
|
332
|
+
info.scan_count2 = 15 # should be 2715, they dropped the last scan!
|
|
309
333
|
info.start_and_end_mz1 = [300.0, 1500.0]
|
|
310
334
|
# This is the Correct one!!!, but Thermo drops last scan
|
|
311
335
|
#info.start_and_end_mz2 = [112.0, 2000.0]
|
|
312
|
-
info.start_and_end_mz2 = [95.0,
|
|
336
|
+
info.start_and_end_mz2 = [95.0, 1955]
|
|
313
337
|
end
|
|
314
338
|
|
|
315
339
|
describe MS::MSRun, "on mzData version 1.05 files (Bioworks3.3) (w/o spectra)" do
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
end
|
|
340
|
+
before(:all) do
|
|
341
|
+
@info = Mzdata_105_info
|
|
342
|
+
start = Time.now
|
|
343
|
+
@run = @info.klass.new(@info.file, :lazy => :no_spectra)
|
|
344
|
+
puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
|
|
345
|
+
puts "- [NOTE] mzData files from Thermo are missing their last scan!" if $specdoc
|
|
346
|
+
end
|
|
324
347
|
|
|
325
|
-
|
|
326
|
-
|
|
348
|
+
it_should_behave_like "an msrun with basic, non-spectral information"
|
|
349
|
+
it_should_behave_like 'a basic scan info generator'
|
|
327
350
|
|
|
328
|
-
end
|
|
329
351
|
end
|
|
330
352
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
353
|
+
shared_examples_for "mzData v1.05 (Bioworks3.3) (w/spectra)" do
|
|
354
|
+
before(:all) do
|
|
355
|
+
@info = Mzdata_105_info
|
|
356
|
+
#start = Time.now
|
|
357
|
+
@fh = File.open(@info.file)
|
|
358
|
+
@run = @info.klass.new(@fh, :lazy => @lazy_type)
|
|
359
|
+
#puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
|
|
360
|
+
puts "- [NOTE] mzData files from Thermo are missing their last scan!" if $specdoc
|
|
361
|
+
end
|
|
340
362
|
|
|
341
|
-
|
|
342
|
-
|
|
363
|
+
it_should_behave_like "an msrun with spectrum"
|
|
364
|
+
it_should_behave_like 'a basic scan info generator'
|
|
343
365
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
end
|
|
366
|
+
it 'has (or can get) correct precursor intensities for all scans' do
|
|
367
|
+
check_file = Tfiles + '/opd1_2runs_2mods/data/020.readw.mzXML'
|
|
368
|
+
prec_inten_mzs = IO.readlines(check_file).grep(/precursorMz/).map do |line|
|
|
369
|
+
if line =~ /Intensity="([\d\.e\+\-]+)">([\d\.e\+\-]+)</
|
|
370
|
+
[$1.to_f, $2.to_f]
|
|
371
|
+
else
|
|
372
|
+
abort "didn't match for some crazy reason! (probably newline issues)"
|
|
352
373
|
end
|
|
374
|
+
end
|
|
353
375
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
376
|
+
prec_mz_cnt = 0
|
|
377
|
+
@run.scans.each_with_index do |scan,i|
|
|
378
|
+
next if i % 4 == 0
|
|
379
|
+
(exp_int, exp_mz) = prec_inten_mzs[prec_mz_cnt]
|
|
358
380
|
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
381
|
+
precursor = scan.precursor
|
|
382
|
+
precursor.mz.should be_close(exp_mz, 0.00001)
|
|
383
|
+
prec_inten =
|
|
384
|
+
if precursor.intensity.nil?
|
|
385
|
+
precursor.parent.spectrum.intensity_at_mz(precursor.mz)
|
|
386
|
+
else
|
|
387
|
+
precursor.intensity
|
|
388
|
+
end
|
|
389
|
+
prec_inten.should be_close(exp_int, 51)
|
|
362
390
|
|
|
363
|
-
|
|
364
|
-
|
|
391
|
+
prec_mz_cnt += 1
|
|
392
|
+
end
|
|
365
393
|
end
|
|
366
394
|
end
|
|
395
|
+
|
|
396
|
+
#[:string, :not, :io].each do |lazy_type|
|
|
397
|
+
[:io].each do |lazy_type|
|
|
398
|
+
describe MS::MSRun, "mzData v1.05 with :lazy => :#{lazy_type}" do
|
|
399
|
+
before(:all) { @lazy_type = lazy_type}
|
|
400
|
+
it_should_behave_like "mzData v1.05 (Bioworks3.3) (w/spectra)"
|
|
401
|
+
end
|
|
367
402
|
end
|
|
368
403
|
|
|
369
|
-
describe
|
|
370
|
-
before(:
|
|
404
|
+
describe 'reading a small file of twenty scans' do
|
|
405
|
+
before(:all) do
|
|
371
406
|
@file = Tfiles + "/opd1/twenty_scans.mzXML"
|
|
372
|
-
@msrun = MS::MSRun.new(@file)
|
|
373
407
|
end
|
|
374
408
|
|
|
375
|
-
it 'retrieves times and spectra' do
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
409
|
+
it 'retrieves times and spectra with all lazy types' do
|
|
410
|
+
[:not, :string, :io].each do |lazy_type|
|
|
411
|
+
|
|
412
|
+
File.open(@file) do |io|
|
|
413
|
+
msrun = MS::MSRun.new(io, :lazy => lazy_type)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
(times, spectra) = msrun.times_and_spectra(1)
|
|
417
|
+
etimes = %w(0.440000 5.150000 10.690000 16.400000 22.370000).map {|t| t.to_f }
|
|
418
|
+
num_peaks = [992, 814, 796, 849, 813]
|
|
419
|
+
tol = 0.000000001
|
|
420
|
+
spectra[0].mzs[1].should be_close(301.430114746094, tol)
|
|
421
|
+
spectra[0].intensities[1].should be_close(22192.0, tol)
|
|
422
|
+
spectra[0].mzs[-1].should be_close(1499.09912109375, tol)
|
|
423
|
+
spectra[0].intensities[-1].should be_close(111286.0, tol)
|
|
424
|
+
|
|
425
|
+
spectra[-1].mzs[1].should be_close(301.243774414062, tol)
|
|
426
|
+
spectra[-1].intensities[1].should be_close(77503.0, tol)
|
|
427
|
+
spectra[-1].mzs[-1].should be_close(1499.42016601562, tol)
|
|
428
|
+
spectra[-1].intensities[-1].should be_close(13.0, tol)
|
|
429
|
+
|
|
430
|
+
num_peaks.each_with_index do |n,i|
|
|
431
|
+
spectra[i].mzs.size.should == n
|
|
432
|
+
end
|
|
433
|
+
etimes.each_with_index do |t,i|
|
|
434
|
+
times[i].should be_close(t, 0.00001)
|
|
435
|
+
end
|
|
436
|
+
end
|
|
395
437
|
end
|
|
396
438
|
end
|
|
397
439
|
end
|
|
@@ -412,9 +454,10 @@ describe MS::MSRun, 'with a small set of scans' do
|
|
|
412
454
|
precs = (0..(vals.size)).to_a.map do |x|
|
|
413
455
|
MS::Precursor.new([x,100])
|
|
414
456
|
end
|
|
457
|
+
|
|
415
458
|
scans = vals.zip(precs).map do |ar,prec|
|
|
416
459
|
scan = MS::Scan.new(ar)
|
|
417
|
-
scan.
|
|
460
|
+
scan.precursor = prec
|
|
418
461
|
scan
|
|
419
462
|
end
|
|
420
463
|
scans.size.should == vals.size
|
|
@@ -422,7 +465,7 @@ describe MS::MSRun, 'with a small set of scans' do
|
|
|
422
465
|
parents = [nil,s[0],s[0],s[2],s[2],nil,s[5],s[6],s[5]]
|
|
423
466
|
MS::MSRun.add_parent_scan(scans)
|
|
424
467
|
scans.each_with_index do |scan,i|
|
|
425
|
-
scan.
|
|
468
|
+
scan.precursor.parent.should == parents[i]
|
|
426
469
|
end
|
|
427
470
|
end
|
|
428
471
|
end
|
data/specs/ms/parser_spec.rb
CHANGED
|
@@ -22,7 +22,7 @@ describe "a MS::Parser on a file", :shared => true do
|
|
|
22
22
|
|
|
23
23
|
########################################################################
|
|
24
24
|
# NOTE: methods to verify parsing of information should be defined where
|
|
25
|
-
# that information is
|
|
25
|
+
# that information is required.
|
|
26
26
|
# e.g. msrun_spec.rb will verify that msrun objects are created properly.
|
|
27
27
|
# this is because we don't care how we get that file, just that we get it.
|
|
28
28
|
# The whole process of parsing a file should be transparent to users.
|
|
@@ -71,7 +71,7 @@ describe MS::Parser, "on an mzXML version 2 file" do
|
|
|
71
71
|
@version = '2.0'
|
|
72
72
|
@filetype_version = [@filetype, @version]
|
|
73
73
|
@subclass = 'MS::Parser::MzXML'
|
|
74
|
-
@file =
|
|
74
|
+
@file = Tfiles + '/opd1_2runs_2mods/data/020.readw.mzXML'
|
|
75
75
|
end
|
|
76
76
|
it_should_behave_like "a MS::Parser on a file"
|
|
77
77
|
end
|
|
@@ -84,7 +84,7 @@ describe MS::Parser, "on an mzData version 1.05 file" do
|
|
|
84
84
|
@version = '1.05'
|
|
85
85
|
@filetype_version = [@filetype, @version]
|
|
86
86
|
@subclass = 'MS::Parser::MzData'
|
|
87
|
-
@file =
|
|
87
|
+
@file = Tfiles + '/opd1_2runs_2mods/data/020.mzData.xml'
|
|
88
88
|
end
|
|
89
89
|
it_should_behave_like "a MS::Parser on a file"
|
|
90
90
|
end
|
data/specs/ms/spectrum_spec.rb
CHANGED
|
@@ -146,7 +146,7 @@ describe 'filtering on a real srf file' do
|
|
|
146
146
|
spec_large do
|
|
147
147
|
it 'does tmm with a toppred file on srf' do
|
|
148
148
|
opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar => false}}
|
|
149
|
-
dir = Tfiles_l + '/opd1_2runs_2mods/
|
|
149
|
+
dir = Tfiles_l + '/opd1_2runs_2mods/sequest33'
|
|
150
150
|
tmm_file = dir + '/ecoli_K12_ncbi_20060321.toppred.xml'
|
|
151
151
|
fasta_file = dir + '/ecoli_K12_ncbi_20060321.fasta'
|
|
152
152
|
sequest_file = dir + '/ecoli.params'
|
|
@@ -219,6 +219,9 @@ describe SpecID::Precision::Filter::Peps do
|
|
|
219
219
|
[1.2, 1.2, 1.2, 0.1, 50, true] => 6, # "all passing"
|
|
220
220
|
[1.2, 1.2, 1.2, 0.2, 50, true] => 1, # "high deltacn"
|
|
221
221
|
[1.0, 1.0, 1.6, 0.1, 50, true] => 5, # "one xcorr too high"
|
|
222
|
+
##
|
|
223
|
+
[1.0, 1.0, 1.0, 0.05, 60, true] => 6, ## testing ppm filtering:
|
|
224
|
+
[1.0, 1.0, 1.0, 0.05, 10, true] => 0,
|
|
222
225
|
}
|
|
223
226
|
args_and_expected.each do |args,exp|
|
|
224
227
|
filt = SpecID::Precision::Filter::Peps.new(:standard_sequest_filter, *args)
|
|
@@ -28,14 +28,14 @@ describe 'finding precision Proph::Prot::Pep objects' do
|
|
|
28
28
|
|
|
29
29
|
it 'runs without any validator' do
|
|
30
30
|
answer = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id)
|
|
31
|
-
answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "params", "pephits_precision", "probabilities"]
|
|
31
|
+
answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "params", "pephits", "pephits_precision", "probabilities"]
|
|
32
32
|
answer[:aaseqs].should == %w(3 0 4 1 2)
|
|
33
33
|
end
|
|
34
34
|
|
|
35
35
|
it 'returns modified peptides if any modified peptides' do
|
|
36
36
|
@spec_id.peps[1].mod_info = Sequest::PepXML::SearchHit::ModificationInfo.new(['MODIFIED', []])
|
|
37
37
|
answer = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id)
|
|
38
|
-
answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "modified_peptides", "params", "pephits_precision", "probabilities"]
|
|
38
|
+
answer.keys.map {|v| v.to_s }.sort.should == ["aaseqs", "charges", "count", "modified_peptides", "params", "pephits", "pephits_precision", "probabilities"]
|
|
39
39
|
end
|
|
40
40
|
|
|
41
41
|
end
|
|
@@ -309,7 +309,7 @@ describe 'bioworks file with modifications transformed into pepxml' do
|
|
|
309
309
|
|
|
310
310
|
spec_large do
|
|
311
311
|
before(:all) do
|
|
312
|
-
modfiles_sequest_dir = Tfiles_l + '/opd1_2runs_2mods/
|
|
312
|
+
modfiles_sequest_dir = Tfiles_l + '/opd1_2runs_2mods/sequest33/'
|
|
313
313
|
modfiles_data_dir = Tfiles_l + '/opd1_2runs_2mods/data/'
|
|
314
314
|
@srgfile = modfiles_sequest_dir + 'tmp.srg'
|
|
315
315
|
@out_path = modfiles_sequest_dir + 'pepxml'
|
data/specs/spec_id/sqt_spec.rb
CHANGED
|
@@ -110,7 +110,7 @@ describe 'converting a large srf to sqt' do
|
|
|
110
110
|
del(@output)
|
|
111
111
|
end
|
|
112
112
|
it 'can get db info with correct path' do
|
|
113
|
-
@srf.to_sqt(@output, :db_info => true, :new_db_path => Tfiles_l + '/opd1_2runs_2mods/
|
|
113
|
+
@srf.to_sqt(@output, :db_info => true, :new_db_path => Tfiles_l + '/opd1_2runs_2mods/sequest33')
|
|
114
114
|
@output.exist_as_a_file?.should be_true
|
|
115
115
|
lines = IO.readlines(@output)
|
|
116
116
|
has_md5 = lines.any? do |line|
|
|
@@ -127,8 +127,8 @@ describe 'converting a large srf to sqt' do
|
|
|
127
127
|
del(@output)
|
|
128
128
|
end
|
|
129
129
|
it 'can update the Database' do
|
|
130
|
-
@srf.to_sqt(@output, :new_db_path => Tfiles_l + '/opd1_2runs_2mods/
|
|
131
|
-
regexp = Regexp.new("Database\t/.*/opd1_2runs_2mods/
|
|
130
|
+
@srf.to_sqt(@output, :new_db_path => Tfiles_l + '/opd1_2runs_2mods/sequest33', :update_db_path => true)
|
|
131
|
+
regexp = Regexp.new("Database\t/.*/opd1_2runs_2mods/sequest33/ecoli_K12_ncbi_20060321.fasta")
|
|
132
132
|
updated_db = IO.readlines(@output).any? do |line|
|
|
133
133
|
line =~ regexp
|
|
134
134
|
end
|
|
@@ -217,8 +217,8 @@ end
|
|
|
217
217
|
describe SQTGroup, ': acting as a SpecID on large files' do
|
|
218
218
|
spec_large do
|
|
219
219
|
before(:each) do
|
|
220
|
-
file1 = Tfiles_l + '/opd1_2runs_2mods/
|
|
221
|
-
file2 = Tfiles_l + '/opd1_2runs_2mods/
|
|
220
|
+
file1 = Tfiles_l + '/opd1_2runs_2mods/sequest33/020.sqt'
|
|
221
|
+
file2 = Tfiles_l + '/opd1_2runs_2mods/sequest33/040.sqt'
|
|
222
222
|
file1.exist_as_a_file?.should be_true
|
|
223
223
|
file2.exist_as_a_file?.should be_true
|
|
224
224
|
@sqg = SQTGroup.new([file1, file2])
|