mspire 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
data/specs/gi_spec.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/spec_helper'
|
2
|
)
|
3
|
+
require 'gi'
|
4
|
+
|
5
|
+
|
6
|
+
describe GI, "given a 'GI' number" do
|
7
|
+
before(:all) do
|
8
|
+
@gi_num = 836805
|
9
|
+
end
|
10
|
+
it 'can query NCBI for annotation (fails nicely w/o connection)' do
|
11
|
+
annot = GI.gi2annot([@gi_num])
|
12
|
+
if annot
|
13
|
+
annot.first.should == 'proteosome component PRE4 [Saccharomyces cerevisiae]'
|
14
|
+
else
|
15
|
+
puts "- retrieval of gi failed gracefully w/o internet connection"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
|
2
|
+
require 'merge_deep'
|
3
|
+
|
4
|
+
describe 'merging one level deep' do
|
5
|
+
it 'works' do
|
6
|
+
base = {1=>"X", 3=>{6=>7, 8=>9}}
|
7
|
+
another = {1=>'y', 3=>{6=>9}}
|
8
|
+
ans = base.merge_deep(another, 1)
|
9
|
+
ans.should == {1=>'y', 3=>{6=>9, 8=>9}}
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
2
|
+
require 'ms/gradient_program'
|
3
|
+
|
4
|
+
describe GradientProgram do
|
5
|
+
it 'can be set from a Thermo Xcal 2.X .meth file' do
|
6
|
+
data = [
|
7
|
+
[0.00, 95.0, 5.0, 0.0, 0.0, 38.0],
|
8
|
+
[1.00, 90.0, 10.0, 0.0, 0.0, 38.0],
|
9
|
+
[30.00, 85.0, 15.0, 0.0, 0.0, 38.0],
|
10
|
+
[40.00, 80.0, 20.0, 0.0, 0.0, 38.0],
|
11
|
+
[45.00, 78.0, 22.0, 0.0, 0.0, 38.0],
|
12
|
+
[50.00, 72.0, 28.0, 0.0, 0.0, 38.0],
|
13
|
+
[65.00, 60.0, 40.0, 0.0, 0.0, 38.0],
|
14
|
+
[72.00, 10.0, 90.0, 0.0, 0.0, 38.0],
|
15
|
+
[75.0, 10.0, 90.0, 0.0, 0.0, 38.0],
|
16
|
+
[81.00, 10.0, 90.0, 0.0, 0.0, 38.0],
|
17
|
+
[81.10, 95.0, 5.0, 0.0, 0.0, 38.0],
|
18
|
+
[90.00, 95.0, 5.0, 0.0, 0.0, 38.0],
|
19
|
+
]
|
20
|
+
|
21
|
+
ms_pump_expected_tps = data.map do |ar|
|
22
|
+
GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4])
|
23
|
+
end
|
24
|
+
ms_pump_expected = GradientProgram.new('MS Pump', ms_pump_expected_tps, %w(A B C D))
|
25
|
+
|
26
|
+
data = [
|
27
|
+
[0.00, 0.0, 0.0, 100.0, 0.0, 40.0],
|
28
|
+
[90.0, 0.0, 0.0, 100.0, 0.0, 40.0],
|
29
|
+
]
|
30
|
+
sample_pump_expected_tps = data.map {|ar| GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4]) }
|
31
|
+
sample_pump_expected = GradientProgram.new('Sample Pump', sample_pump_expected_tps, %w(A B C D))
|
32
|
+
|
33
|
+
file = Tfiles + '/s01_anC1_ld020mM.meth'
|
34
|
+
File.open(file) do |fh|
|
35
|
+
gps = GradientProgram.all_from_handle(fh)
|
36
|
+
gps[0].should == ms_pump_expected
|
37
|
+
gps[1].should == sample_pump_expected
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'can be set from a Thermo Xcal 1.X .RAW file (but missing pump_type)' do
|
42
|
+
file = Tfiles + '/opd1_020_beginning.RAW'
|
43
|
+
data = [[0.0, 0.0, 0.0, 100.0, 0.0, 200.0],
|
44
|
+
[1.0, 0.0, 0.0, 96.0, 4.0, 200.0],
|
45
|
+
[10.0, 0.0, 0.0, 96.0, 4.0, 200.0],
|
46
|
+
[11.0, 0.0, 0.0, 100.0, 0.0, 200.0],
|
47
|
+
[85.0, 0.0, 0.0, 100.0, 0.0, 200.0],]
|
48
|
+
|
49
|
+
time_points = data.map do |ar|
|
50
|
+
GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4])
|
51
|
+
end
|
52
|
+
pump_type = '' ## need to get pump type...
|
53
|
+
ms_pump_expected = GradientProgram.new(pump_type, time_points, %w(A B C D))
|
54
|
+
|
55
|
+
data = [[0.0, 95.0, 5.0, 0.0, 0.0, 200.0],
|
56
|
+
[1.0, 95.0, 5.0, 0.0, 0.0, 200.0],
|
57
|
+
[61.0, 55.0, 45.0, 0.0, 0.0, 200.0],
|
58
|
+
[62.0, 5.0, 95.0, 0.0, 0.0, 200.0],
|
59
|
+
[67.0, 5.0, 95.0, 0.0, 0.0, 200.0],
|
60
|
+
[68.0, 95.0, 5.0, 0.0, 0.0, 200.0],
|
61
|
+
[85.0, 95.0, 5.0, 0.0, 0.0, 200.0],]
|
62
|
+
time_points = data.map do |ar|
|
63
|
+
GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4])
|
64
|
+
end
|
65
|
+
pump_type = '' ## need to get pump type...
|
66
|
+
sample_pump_expected = GradientProgram.new(pump_type, time_points, %w(A B C D))
|
67
|
+
|
68
|
+
# we'd like to get an older .meth file to do this on
|
69
|
+
File.open(file) do |fh|
|
70
|
+
gps = GradientProgram.all_from_handle(fh)
|
71
|
+
gps[0].should == ms_pump_expected
|
72
|
+
gps[1].should == sample_pump_expected
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
@@ -0,0 +1,455 @@
|
|
1
|
+
|
2
|
+
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
3
|
+
require 'ms/msrun'
|
4
|
+
require 'ostruct'
|
5
|
+
|
6
|
+
parsers = %w(AXML LibXML XMLParser Regexp REXML)
|
7
|
+
|
8
|
+
XMLStyleParser::Parser_precedence.replace( %w(AXML) )
|
9
|
+
|
10
|
+
|
11
|
+
describe "an msrun with basic, non-spectral information", :shared => true do
|
12
|
+
it 'knows the type and version of file' do
|
13
|
+
@run.filetype.should == @info.filetype
|
14
|
+
@run.version.should == @info.version
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'knows basic run information' do
|
18
|
+
@run.scan_count.should == @info.scan_count
|
19
|
+
@run.start_time.should == @info.start_time
|
20
|
+
@run.end_time.should == @info.end_time
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'has all scans' do
|
24
|
+
@run.scans.size.should == @info.scan_count
|
25
|
+
@run.scans.each_with_index do |sc,i|
|
26
|
+
sc.class.should == MS::Scan
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'can determine scan counts for any mslevel' do
|
31
|
+
@run.scan_counts.class.should == Array
|
32
|
+
@run.scan_count(0).should == @info.scan_count0
|
33
|
+
@run.scan_count(1).should == @info.scan_count1
|
34
|
+
@run.scan_count(2).should == @info.scan_count2
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'has correct first two scans and last scan' do
|
38
|
+
[0,1,-1].each do |i|
|
39
|
+
@info.scans[i].each do |k,v|
|
40
|
+
if k == :precursors
|
41
|
+
v.zip( @run.scans[i].send(k) ) do |exp, act|
|
42
|
+
act.mz.should be_close(exp.mz, 0.000001)
|
43
|
+
#if act.intensity # intensity not guaranteed to exist!
|
44
|
+
# act.intensity.should == exp.intensity
|
45
|
+
#end
|
46
|
+
end
|
47
|
+
else
|
48
|
+
@run.scans[i].send(k).should == v
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
describe "an msrun with spectrum", :shared => true do
|
56
|
+
|
57
|
+
it 'has all scans with spectrum data' do
|
58
|
+
@run.scans.size.should == @info.scan_count
|
59
|
+
@run.scans.each_with_index do |sc,i|
|
60
|
+
sc.class.should == MS::Scan
|
61
|
+
sc.spectrum.should have_mz_data
|
62
|
+
sc.spectrum.should have_intensity_data
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'can determine start_and_end_mz' do
|
67
|
+
@run.start_and_end_mz(1).should == @info.start_and_end_mz1
|
68
|
+
@run.start_and_end_mz(2).should == @info.start_and_end_mz2
|
69
|
+
end
|
70
|
+
|
71
|
+
it "has correct prec inten for first two scans and last scan" do
|
72
|
+
[0,1,-1].each do |i|
|
73
|
+
if i == 0
|
74
|
+
# currently we do diff't things for ms_level 1 scans! is it nil or []
|
75
|
+
#@run.scans[i].precursors.should == []
|
76
|
+
#@run.scans[i].precursors.should be_nil
|
77
|
+
next
|
78
|
+
end
|
79
|
+
expected = @info.scans[i][:precursors]
|
80
|
+
@run.scans[i].precursors.zip(expected) do |act,exp|
|
81
|
+
act.mz.should be_close(exp.mz, 0.000001)
|
82
|
+
act.intensity.should == exp.intensity
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
it_should_behave_like "an msrun with basic, non-spectral information"
|
88
|
+
end
|
89
|
+
|
90
|
+
describe 'a basic scan info generator', :shared => true do
|
91
|
+
|
92
|
+
def check_table(table, answer)
|
93
|
+
answer.each do |k,v|
|
94
|
+
if v == nil
|
95
|
+
table[k].should be_nil
|
96
|
+
else
|
97
|
+
table[k].should be_close(v, 0.000001)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'generates precursor_mz_by_scan_num lookup table' do
|
103
|
+
ar = @run.precursor_mz_by_scan_num
|
104
|
+
check_table(ar, @info.num_to_prec_mz_hash)
|
105
|
+
end
|
106
|
+
|
107
|
+
it 'class method -> precursor_mz_by_scan_num (with file)' do
|
108
|
+
ar = @info.klass.precursor_mz_by_scan_num(@info.file)
|
109
|
+
check_table(ar, @info.num_to_prec_mz_hash)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
MzXML_version_1_info = MyOpenStruct.new do |info|
|
114
|
+
info.file = Tfiles_l + '/yeast_gly_mzXML/000.mzXML'
|
115
|
+
info.klass = MS::MSRun
|
116
|
+
info.filetype = :mzxml
|
117
|
+
info.version = '1.0'
|
118
|
+
info.scan_count = 3748
|
119
|
+
#info.scan_counts = [3748, 937, nil] ## need to get ms2
|
120
|
+
info.start_time = 0.44
|
121
|
+
info.end_time = 5102.55
|
122
|
+
info.num_to_prec_mz_hash = {
|
123
|
+
0 => nil,
|
124
|
+
1 => nil,
|
125
|
+
2 => 391.045410,
|
126
|
+
3 => 446.009033,
|
127
|
+
4 => 1222.033203,
|
128
|
+
5 => nil,
|
129
|
+
6 => 390.947449,
|
130
|
+
3744 => 338.779114,
|
131
|
+
3745 => nil,
|
132
|
+
3746 => 304.136597,
|
133
|
+
3748 => 433.564941,
|
134
|
+
}
|
135
|
+
info.scans = {}
|
136
|
+
|
137
|
+
info.scans[0] = {
|
138
|
+
:num => 1,
|
139
|
+
:ms_level => 1,
|
140
|
+
:time => 0.440,
|
141
|
+
}
|
142
|
+
info.scans[1] = {
|
143
|
+
:num => 2,
|
144
|
+
:ms_level => 2,
|
145
|
+
:time => 1.90,
|
146
|
+
:precursors => [MS::Precursor.new(:mz => 391.045410, :intensity => 6986078.0)]
|
147
|
+
}
|
148
|
+
info.scans[-1] = {
|
149
|
+
:num => 3748,
|
150
|
+
:ms_level => 2,
|
151
|
+
:time => 5102.55,
|
152
|
+
:precursors => [MS::Precursor.new(:mz => 433.564941, :intensity => 481800.0)]
|
153
|
+
}
|
154
|
+
info.scan_count0 = info.scan_count
|
155
|
+
info.scan_count1 = 937
|
156
|
+
info.scan_count2 = 2811
|
157
|
+
info.start_and_end_mz1 = [300.0, 1500.0]
|
158
|
+
info.start_and_end_mz2 = [0.0, 2000.0]
|
159
|
+
end
|
160
|
+
|
161
|
+
describe MS::MSRun, "on mzXML version 1 files (w/o spectra)" do
|
162
|
+
spec_large do
|
163
|
+
before(:all) do
|
164
|
+
@info = MzXML_version_1_info
|
165
|
+
start = Time.now
|
166
|
+
@run = @info.klass.new(@info.file, :spectra => false)
|
167
|
+
puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
|
168
|
+
end
|
169
|
+
it_should_behave_like "an msrun with basic, non-spectral information"
|
170
|
+
it_should_behave_like 'a basic scan info generator'
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
describe MS::MSRun, "on mzXML version 1 files (w/spectra)" do
|
175
|
+
spec_large do
|
176
|
+
before(:all) do
|
177
|
+
@info = MzXML_version_1_info
|
178
|
+
start = Time.now
|
179
|
+
@run = @info.klass.new(@info.file)
|
180
|
+
puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
|
181
|
+
end
|
182
|
+
|
183
|
+
it_should_behave_like "an msrun with spectrum"
|
184
|
+
it_should_behave_like 'a basic scan info generator'
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
MzXML_version_20_info = MyOpenStruct.new do |info|
|
189
|
+
info.file = Tfiles_l + '/opd1_2runs_2mods/data/020.readw.mzXML'
|
190
|
+
info.klass = MS::MSRun
|
191
|
+
info.filetype = :mzxml
|
192
|
+
info.version = '2.0'
|
193
|
+
info.scan_count = 3620
|
194
|
+
#info.scan_counts = ??
|
195
|
+
info.start_time = 0.13
|
196
|
+
info.end_time = 5099.69
|
197
|
+
info.num_to_prec_mz_hash = {
|
198
|
+
0 => nil,
|
199
|
+
1 => nil,
|
200
|
+
2 => 390.9291992,
|
201
|
+
3 => 1121.944824,
|
202
|
+
4 => 1321.913574,
|
203
|
+
3617 => nil,
|
204
|
+
3618 => 828.2867432,
|
205
|
+
3619 => 424.8538208,
|
206
|
+
3620 => 357.0411987,
|
207
|
+
}
|
208
|
+
info.scans = {}
|
209
|
+
info.scans[0]= {
|
210
|
+
:num => 1,
|
211
|
+
:ms_level => 1,
|
212
|
+
:time => 0.13,
|
213
|
+
}
|
214
|
+
info.scans[1] = {
|
215
|
+
:num => 2,
|
216
|
+
:ms_level => 2,
|
217
|
+
:time => 1.49,
|
218
|
+
:precursors => [MS::Precursor.new(:mz => 390.9291992, :intensity => 8.14409e+006)]
|
219
|
+
}
|
220
|
+
info.scans[-1] = {
|
221
|
+
:num => 3620,
|
222
|
+
:ms_level => 2,
|
223
|
+
:time => 5099.69,
|
224
|
+
:precursors => [MS::Precursor.new(:mz => 357.0411987, :intensity => 643017.0)]
|
225
|
+
}
|
226
|
+
info.scan_count0 = info.scan_count
|
227
|
+
info.scan_count1 = 905
|
228
|
+
info.scan_count2 = 2715
|
229
|
+
info.start_and_end_mz1 = [300.0, 1500.0]
|
230
|
+
# that first number on start_and_end_mz2 is a arbitrary as to accuracy...
|
231
|
+
# I'm not sure the correct answer
|
232
|
+
info.start_and_end_mz2 = [110.0, 2000.0]
|
233
|
+
end
|
234
|
+
|
235
|
+
describe MS::MSRun, "on mzXML version 2.0 files (w/o spectra)" do
|
236
|
+
spec_large do
|
237
|
+
before(:all) do
|
238
|
+
@info = MzXML_version_20_info
|
239
|
+
start = Time.now
|
240
|
+
@run = @info.klass.new(@info.file, :spectra => false)
|
241
|
+
puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
|
242
|
+
end
|
243
|
+
|
244
|
+
it_should_behave_like "an msrun with basic, non-spectral information"
|
245
|
+
it_should_behave_like 'a basic scan info generator'
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
describe MS::MSRun, "on mzXML version 2.0 files (w/spectra)" do
|
250
|
+
spec_large do
|
251
|
+
before(:all) do
|
252
|
+
@info = MzXML_version_20_info
|
253
|
+
start = Time.now
|
254
|
+
@run = @info.klass.new(@info.file)
|
255
|
+
puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
|
256
|
+
end
|
257
|
+
|
258
|
+
it_should_behave_like "an msrun with spectrum"
|
259
|
+
it_should_behave_like 'a basic scan info generator'
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
Mzdata_105_info = MyOpenStruct.new do |info|
|
264
|
+
info.file = Tfiles_l + '/opd1_2runs_2mods/data/020.mzData.xml'
|
265
|
+
info.klass = MS::MSRun
|
266
|
+
info.filetype = :mzdata
|
267
|
+
info.version = '1.05'
|
268
|
+
info.scan_count = 3619 # this should be 3620, they drop the last scan
|
269
|
+
info.start_time = 0.13002 # minutes == 0.00216667
|
270
|
+
# This is the correct one!, but Thermo drops last scan
|
271
|
+
# info.end_time = 5099.688 #84.9948
|
272
|
+
info.end_time = 84.968500*60 # 5098.11
|
273
|
+
|
274
|
+
info.num_to_prec_mz_hash = {
|
275
|
+
0 => nil,
|
276
|
+
1 => nil,
|
277
|
+
2 => 390.9291992,
|
278
|
+
3 => 1121.944824,
|
279
|
+
4 => 1321.913574,
|
280
|
+
3617 => nil,
|
281
|
+
3618 => 828.2867432,
|
282
|
+
3619 => 424.8538208,
|
283
|
+
# 3620 => 357.0411987, Bioworks 3.3 is broken
|
284
|
+
}
|
285
|
+
|
286
|
+
info.scans = {}
|
287
|
+
info.scans[0] = {
|
288
|
+
:num => 1,
|
289
|
+
:ms_level => 1,
|
290
|
+
:time => 0.13002, # a little rounding error coming from minutes
|
291
|
+
}
|
292
|
+
info.scans[1] = {
|
293
|
+
:num => 2,
|
294
|
+
:ms_level => 2,
|
295
|
+
:time => 0.024833 * 60, # 1.48998
|
296
|
+
:precursors => [MS::Precursor.new( :mz => 390.9291992, :intensity => 8.144094e+006) ],
|
297
|
+
}
|
298
|
+
info.scans[-1] = {
|
299
|
+
:num => 3619,
|
300
|
+
:ms_level => 2,
|
301
|
+
#:time => 5099.69,
|
302
|
+
:time => 84.968500 * 60, # 5098.11
|
303
|
+
|
304
|
+
:precursors => [MS::Precursor.new( :mz => 424.853821, :intensity => 738590.0 )] # wrong
|
305
|
+
}
|
306
|
+
info.scan_count0 = info.scan_count
|
307
|
+
info.scan_count1 = 905
|
308
|
+
info.scan_count2 = 2714 # should be 2715, they dropped the last scan!
|
309
|
+
info.start_and_end_mz1 = [300.0, 1500.0]
|
310
|
+
# This is the Correct one!!!, but Thermo drops last scan
|
311
|
+
#info.start_and_end_mz2 = [112.0, 2000.0]
|
312
|
+
info.start_and_end_mz2 = [95.0, 2000.0]
|
313
|
+
end
|
314
|
+
|
315
|
+
describe MS::MSRun, "on mzData version 1.05 files (Bioworks3.3) (w/o spectra)" do
|
316
|
+
spec_large do
|
317
|
+
before(:all) do
|
318
|
+
@info = Mzdata_105_info
|
319
|
+
start = Time.now
|
320
|
+
@run = @info.klass.new(@info.file, :spectra => false)
|
321
|
+
puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
|
322
|
+
puts "- [NOTE] mzData files from Thermo are missing their last scan!" if $specdoc
|
323
|
+
end
|
324
|
+
|
325
|
+
it_should_behave_like "an msrun with basic, non-spectral information"
|
326
|
+
it_should_behave_like 'a basic scan info generator'
|
327
|
+
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
describe MS::MSRun, "on mzData version 1.05 files (Bioworks3.3) (w/spectra)" do
|
332
|
+
spec_large do
|
333
|
+
before(:all) do
|
334
|
+
@info = Mzdata_105_info
|
335
|
+
start = Time.now
|
336
|
+
@run = @info.klass.new(@info.file)
|
337
|
+
puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
|
338
|
+
puts "- [NOTE] mzData files from Thermo are missing their last scan!" if $specdoc
|
339
|
+
end
|
340
|
+
|
341
|
+
it_should_behave_like "an msrun with spectrum"
|
342
|
+
it_should_behave_like 'a basic scan info generator'
|
343
|
+
|
344
|
+
it 'gets correct precursor intensities for all scans' do
|
345
|
+
check_file = Tfiles_l + '/opd1_2runs_2mods/data/020.readw.mzXML'
|
346
|
+
prec_inten_mzs = IO.readlines(check_file).grep(/precursorMz/).map do |line|
|
347
|
+
if line =~ /Intensity="([\d\.e\+\-]+)">([\d\.e\+\-]+)</
|
348
|
+
[$1.to_f, $2.to_f]
|
349
|
+
else
|
350
|
+
abort "didn't match for some crazy reason! (probably newline issues)"
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
prec_mz_cnt = 0
|
355
|
+
@run.scans.each_with_index do |scan,i|
|
356
|
+
next if i % 4 == 0
|
357
|
+
(exp_int, exp_mz) = prec_inten_mzs[prec_mz_cnt]
|
358
|
+
|
359
|
+
precursor = scan.precursors.first
|
360
|
+
precursor.mz.should be_close(exp_mz, 0.00001)
|
361
|
+
precursor.intensity.should be_close(exp_int, 51)
|
362
|
+
|
363
|
+
prec_mz_cnt += 1
|
364
|
+
end
|
365
|
+
end
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
describe MS::MSRun, 'with small file of twenty scans' do
|
370
|
+
before(:each) do
|
371
|
+
@file = Tfiles + "/opd1/twenty_scans.mzXML"
|
372
|
+
@msrun = MS::MSRun.new(@file)
|
373
|
+
end
|
374
|
+
|
375
|
+
it 'retrieves times and spectra' do
|
376
|
+
(times, spectra) = @msrun.times_and_spectra(1)
|
377
|
+
etimes = %w(0.440000 5.150000 10.690000 16.400000 22.370000).map {|t| t.to_f }
|
378
|
+
num_peaks = [992, 814, 796, 849, 813]
|
379
|
+
tol = 0.000000001
|
380
|
+
spectra[0].mz[1].should be_close(301.430114746094, tol)
|
381
|
+
spectra[0].intensity[1].should be_close(22192.0, tol)
|
382
|
+
spectra[0].mz[-1].should be_close(1499.09912109375, tol)
|
383
|
+
spectra[0].intensity[-1].should be_close(111286.0, tol)
|
384
|
+
|
385
|
+
spectra[-1].mz[1].should be_close(301.243774414062, tol)
|
386
|
+
spectra[-1].intensity[1].should be_close(77503.0, tol)
|
387
|
+
spectra[-1].mz[-1].should be_close(1499.42016601562, tol)
|
388
|
+
spectra[-1].intensity[-1].should be_close(13.0, tol)
|
389
|
+
|
390
|
+
num_peaks.each_with_index do |n,i|
|
391
|
+
spectra[i].mz.size.should == n
|
392
|
+
end
|
393
|
+
etimes.each_with_index do |t,i|
|
394
|
+
times[i].should be_close(t, 0.00001)
|
395
|
+
end
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
399
|
+
describe MS::MSRun, 'with a small set of scans' do
|
400
|
+
it 'can add parent scans' do
|
401
|
+
vals = [
|
402
|
+
[1,1,0.13],
|
403
|
+
[2,2,0.23],
|
404
|
+
[3,2,0.33],
|
405
|
+
[4,3,0.43],
|
406
|
+
[5,3,0.53],
|
407
|
+
[6,1,0.63],
|
408
|
+
[7,2,0.73],
|
409
|
+
[8,3,0.83],
|
410
|
+
[9,2,0.93]
|
411
|
+
]
|
412
|
+
precs = (0..(vals.size)).to_a.map do |x|
|
413
|
+
MS::Precursor.new([x,100])
|
414
|
+
end
|
415
|
+
scans = vals.zip(precs).map do |ar,prec|
|
416
|
+
scan = MS::Scan.new(ar)
|
417
|
+
scan.precursors = [prec]
|
418
|
+
scan
|
419
|
+
end
|
420
|
+
scans.size.should == vals.size
|
421
|
+
s = scans
|
422
|
+
parents = [nil,s[0],s[0],s[2],s[2],nil,s[5],s[6],s[5]]
|
423
|
+
MS::MSRun.add_parent_scan(scans)
|
424
|
+
scans.each_with_index do |scan,i|
|
425
|
+
scan.precursors.first.parent.should == parents[i]
|
426
|
+
end
|
427
|
+
end
|
428
|
+
end
|
429
|
+
|
430
|
+
=begin
|
431
|
+
###################################################
|
432
|
+
# SHOULD IMPLEMENT BASIC INFO FOR ALL FILE TYPES
|
433
|
+
###################################################
|
434
|
+
|
435
|
+
require 'test/unit'
|
436
|
+
require 'ms/mzxml/parser'
|
437
|
+
|
438
|
+
class MSMzXML < Test::Unit::TestCase
|
439
|
+
def initialize(arg)
|
440
|
+
super(arg)
|
441
|
+
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
442
|
+
@tscans = @tfiles + "opd1/twenty_scans.mzXML"
|
443
|
+
@big_file = "../bioworks2prophet/xml/opd00001_test_set/opd00001_prophprepped/000.mzXML"
|
444
|
+
end
|
445
|
+
|
446
|
+
def test_basic_info
|
447
|
+
hash = MS::MzXML::Parser.new.basic_info(@tscans)
|
448
|
+
assert_equal({:scan_count=>[20, 5, 15], :start_time=>0.44, :end_time=>27.05, :start_mz=>300.0, :end_mz=>1500.0, :ms_level=>1}, hash, "basic info the same")
|
449
|
+
end
|
450
|
+
|
451
|
+
end
|
452
|
+
|
453
|
+
=end
|
454
|
+
|
455
|
+
|
@@ -0,0 +1,92 @@
|
|
1
|
+
|
2
|
+
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
3
|
+
require 'ms/parser'
|
4
|
+
|
5
|
+
describe "a MS::Parser on a file", :shared => true do
|
6
|
+
it 'finds filetype and version on file and handle' do
|
7
|
+
ft_version = nil
|
8
|
+
File.open(@file) do |fh|
|
9
|
+
ft_version = MS::Parser.filetype_and_version(fh)
|
10
|
+
end
|
11
|
+
ft_version.should == @filetype_version
|
12
|
+
ft_version = MS::Parser.filetype_and_version(@file)
|
13
|
+
ft_version.should == @filetype_version
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'creates a sub-classed parser responding to "msrun"' do
|
17
|
+
parser = MS::Parser.new(@file, :msrun)
|
18
|
+
parser.class.to_s.should match(/^MS::Parser::/)
|
19
|
+
parser.class.to_s.should match(Regexp.new(Regexp.escape(@subclass)))
|
20
|
+
parser.respond_to?(:msrun).should be_true
|
21
|
+
end
|
22
|
+
|
23
|
+
########################################################################
|
24
|
+
# NOTE: methods to verify parsing of information should be defined where
|
25
|
+
# that information is require.
|
26
|
+
# e.g. msrun_spec.rb will verify that msrun objects are created properly.
|
27
|
+
# this is because we don't care how we get that file, just that we get it.
|
28
|
+
# The whole process of parsing a file should be transparent to users.
|
29
|
+
########################################################################
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
describe MS::Parser, "on a RAW file (Xcalibur 1.3 SP 1)" do
|
34
|
+
spec_large do
|
35
|
+
before(:all) do
|
36
|
+
@filetype = :raw
|
37
|
+
@version = nil
|
38
|
+
@filetype_version = [@filetype, @version]
|
39
|
+
@file = Tfiles_large + '/opd1_2runs_2mods/data/020.RAW'
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'finds filetype (NO version yet!) on file and handle' do
|
43
|
+
ft_version = nil
|
44
|
+
File.open(@file) do |fh|
|
45
|
+
ft_version = MS::Parser.filetype_and_version(fh)
|
46
|
+
end
|
47
|
+
ft_version.should == @filetype_version
|
48
|
+
ft_version = MS::Parser.filetype_and_version(@file)
|
49
|
+
ft_version.should == @filetype_version
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe MS::Parser, "on an mzXML version 1 file" do
|
55
|
+
spec_large do
|
56
|
+
before(:all) do
|
57
|
+
@filetype = :mzxml
|
58
|
+
@version = '1.0'
|
59
|
+
@filetype_version = [@filetype, @version]
|
60
|
+
@subclass = 'MS::Parser::MzXML'
|
61
|
+
@file = Tfiles_large + '/yeast_gly_mzXML/000.mzXML'
|
62
|
+
end
|
63
|
+
it_should_behave_like "a MS::Parser on a file"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
describe MS::Parser, "on an mzXML version 2 file" do
|
68
|
+
spec_large do
|
69
|
+
before(:all) do
|
70
|
+
@filetype = :mzxml
|
71
|
+
@version = '2.0'
|
72
|
+
@filetype_version = [@filetype, @version]
|
73
|
+
@subclass = 'MS::Parser::MzXML'
|
74
|
+
@file = Tfiles_large + '/opd1_2runs_2mods/data/020.readw.mzXML'
|
75
|
+
end
|
76
|
+
it_should_behave_like "a MS::Parser on a file"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
describe MS::Parser, "on an mzData version 1.05 file" do
|
81
|
+
spec_large do
|
82
|
+
before(:all) do
|
83
|
+
@filetype = :mzdata
|
84
|
+
@version = '1.05'
|
85
|
+
@filetype_version = [@filetype, @version]
|
86
|
+
@subclass = 'MS::Parser::MzData'
|
87
|
+
@file = Tfiles_large + '/opd1_2runs_2mods/data/020.mzData.xml'
|
88
|
+
end
|
89
|
+
it_should_behave_like "a MS::Parser on a file"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|