mspire 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
data/specs/gi_spec.rb ADDED
@@ -0,0 +1,22 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/spec_helper'
2
  )
3
+ require 'gi'
4
+
5
+
6
+ describe GI, "given a 'GI' number" do
7
+ before(:all) do
8
+ @gi_num = 836805
9
+ end
10
+ it 'can query NCBI for annotation (fails nicely w/o connection)' do
11
+ annot = GI.gi2annot([@gi_num])
12
+ if annot
13
+ annot.first.should == 'proteosome component PRE4 [Saccharomyces cerevisiae]'
14
+ else
15
+ puts "- retrieval of gi failed gracefully w/o internet connection"
16
+ end
17
+ end
18
+
19
+ end
20
+
21
+
22
+
23
+
@@ -0,0 +1,7 @@
1
+ tmp = $VERBOSE ; $VERBOSE = nil
2
+ LOAD_BIN_PATH = File.expand_path(File.dirname(__FILE__) + "#{File::SEPARATOR}..#{File::SEPARATOR}bin")
3
+ $VERBOSE = tmp
4
+
5
+ if ENV.key?("PATH")
6
+ ENV["PATH"] = LOAD_BIN_PATH + File::PATH_SEPARATOR + ENV["PATH"]
7
+ end
@@ -0,0 +1,13 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
+ require 'merge_deep'
3
+
4
+ describe 'merging one level deep' do
5
+ it 'works' do
6
+ base = {1=>"X", 3=>{6=>7, 8=>9}}
7
+ another = {1=>'y', 3=>{6=>9}}
8
+ ans = base.merge_deep(another, 1)
9
+ ans.should == {1=>'y', 3=>{6=>9, 8=>9}}
10
+ end
11
+ end
12
+
13
+
@@ -0,0 +1,77 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+ require 'ms/gradient_program'
3
+
4
+ describe GradientProgram do
5
+ it 'can be set from a Thermo Xcal 2.X .meth file' do
6
+ data = [
7
+ [0.00, 95.0, 5.0, 0.0, 0.0, 38.0],
8
+ [1.00, 90.0, 10.0, 0.0, 0.0, 38.0],
9
+ [30.00, 85.0, 15.0, 0.0, 0.0, 38.0],
10
+ [40.00, 80.0, 20.0, 0.0, 0.0, 38.0],
11
+ [45.00, 78.0, 22.0, 0.0, 0.0, 38.0],
12
+ [50.00, 72.0, 28.0, 0.0, 0.0, 38.0],
13
+ [65.00, 60.0, 40.0, 0.0, 0.0, 38.0],
14
+ [72.00, 10.0, 90.0, 0.0, 0.0, 38.0],
15
+ [75.0, 10.0, 90.0, 0.0, 0.0, 38.0],
16
+ [81.00, 10.0, 90.0, 0.0, 0.0, 38.0],
17
+ [81.10, 95.0, 5.0, 0.0, 0.0, 38.0],
18
+ [90.00, 95.0, 5.0, 0.0, 0.0, 38.0],
19
+ ]
20
+
21
+ ms_pump_expected_tps = data.map do |ar|
22
+ GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4])
23
+ end
24
+ ms_pump_expected = GradientProgram.new('MS Pump', ms_pump_expected_tps, %w(A B C D))
25
+
26
+ data = [
27
+ [0.00, 0.0, 0.0, 100.0, 0.0, 40.0],
28
+ [90.0, 0.0, 0.0, 100.0, 0.0, 40.0],
29
+ ]
30
+ sample_pump_expected_tps = data.map {|ar| GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4]) }
31
+ sample_pump_expected = GradientProgram.new('Sample Pump', sample_pump_expected_tps, %w(A B C D))
32
+
33
+ file = Tfiles + '/s01_anC1_ld020mM.meth'
34
+ File.open(file) do |fh|
35
+ gps = GradientProgram.all_from_handle(fh)
36
+ gps[0].should == ms_pump_expected
37
+ gps[1].should == sample_pump_expected
38
+ end
39
+ end
40
+
41
+ it 'can be set from a Thermo Xcal 1.X .RAW file (but missing pump_type)' do
42
+ file = Tfiles + '/opd1_020_beginning.RAW'
43
+ data = [[0.0, 0.0, 0.0, 100.0, 0.0, 200.0],
44
+ [1.0, 0.0, 0.0, 96.0, 4.0, 200.0],
45
+ [10.0, 0.0, 0.0, 96.0, 4.0, 200.0],
46
+ [11.0, 0.0, 0.0, 100.0, 0.0, 200.0],
47
+ [85.0, 0.0, 0.0, 100.0, 0.0, 200.0],]
48
+
49
+ time_points = data.map do |ar|
50
+ GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4])
51
+ end
52
+ pump_type = '' ## need to get pump type...
53
+ ms_pump_expected = GradientProgram.new(pump_type, time_points, %w(A B C D))
54
+
55
+ data = [[0.0, 95.0, 5.0, 0.0, 0.0, 200.0],
56
+ [1.0, 95.0, 5.0, 0.0, 0.0, 200.0],
57
+ [61.0, 55.0, 45.0, 0.0, 0.0, 200.0],
58
+ [62.0, 5.0, 95.0, 0.0, 0.0, 200.0],
59
+ [67.0, 5.0, 95.0, 0.0, 0.0, 200.0],
60
+ [68.0, 95.0, 5.0, 0.0, 0.0, 200.0],
61
+ [85.0, 95.0, 5.0, 0.0, 0.0, 200.0],]
62
+ time_points = data.map do |ar|
63
+ GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4])
64
+ end
65
+ pump_type = '' ## need to get pump type...
66
+ sample_pump_expected = GradientProgram.new(pump_type, time_points, %w(A B C D))
67
+
68
+ # we'd like to get an older .meth file to do this on
69
+ File.open(file) do |fh|
70
+ gps = GradientProgram.all_from_handle(fh)
71
+ gps[0].should == ms_pump_expected
72
+ gps[1].should == sample_pump_expected
73
+
74
+ end
75
+ end
76
+
77
+ end
@@ -0,0 +1,455 @@
1
+
2
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
3
+ require 'ms/msrun'
4
+ require 'ostruct'
5
+
6
+ parsers = %w(AXML LibXML XMLParser Regexp REXML)
7
+
8
+ XMLStyleParser::Parser_precedence.replace( %w(AXML) )
9
+
10
+
11
+ describe "an msrun with basic, non-spectral information", :shared => true do
12
+ it 'knows the type and version of file' do
13
+ @run.filetype.should == @info.filetype
14
+ @run.version.should == @info.version
15
+ end
16
+
17
+ it 'knows basic run information' do
18
+ @run.scan_count.should == @info.scan_count
19
+ @run.start_time.should == @info.start_time
20
+ @run.end_time.should == @info.end_time
21
+ end
22
+
23
+ it 'has all scans' do
24
+ @run.scans.size.should == @info.scan_count
25
+ @run.scans.each_with_index do |sc,i|
26
+ sc.class.should == MS::Scan
27
+ end
28
+ end
29
+
30
+ it 'can determine scan counts for any mslevel' do
31
+ @run.scan_counts.class.should == Array
32
+ @run.scan_count(0).should == @info.scan_count0
33
+ @run.scan_count(1).should == @info.scan_count1
34
+ @run.scan_count(2).should == @info.scan_count2
35
+ end
36
+
37
+ it 'has correct first two scans and last scan' do
38
+ [0,1,-1].each do |i|
39
+ @info.scans[i].each do |k,v|
40
+ if k == :precursors
41
+ v.zip( @run.scans[i].send(k) ) do |exp, act|
42
+ act.mz.should be_close(exp.mz, 0.000001)
43
+ #if act.intensity # intensity not guaranteed to exist!
44
+ # act.intensity.should == exp.intensity
45
+ #end
46
+ end
47
+ else
48
+ @run.scans[i].send(k).should == v
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+
55
+ describe "an msrun with spectrum", :shared => true do
56
+
57
+ it 'has all scans with spectrum data' do
58
+ @run.scans.size.should == @info.scan_count
59
+ @run.scans.each_with_index do |sc,i|
60
+ sc.class.should == MS::Scan
61
+ sc.spectrum.should have_mz_data
62
+ sc.spectrum.should have_intensity_data
63
+ end
64
+ end
65
+
66
+ it 'can determine start_and_end_mz' do
67
+ @run.start_and_end_mz(1).should == @info.start_and_end_mz1
68
+ @run.start_and_end_mz(2).should == @info.start_and_end_mz2
69
+ end
70
+
71
+ it "has correct prec inten for first two scans and last scan" do
72
+ [0,1,-1].each do |i|
73
+ if i == 0
74
+ # currently we do diff't things for ms_level 1 scans! is it nil or []
75
+ #@run.scans[i].precursors.should == []
76
+ #@run.scans[i].precursors.should be_nil
77
+ next
78
+ end
79
+ expected = @info.scans[i][:precursors]
80
+ @run.scans[i].precursors.zip(expected) do |act,exp|
81
+ act.mz.should be_close(exp.mz, 0.000001)
82
+ act.intensity.should == exp.intensity
83
+ end
84
+ end
85
+ end
86
+
87
+ it_should_behave_like "an msrun with basic, non-spectral information"
88
+ end
89
+
90
+ describe 'a basic scan info generator', :shared => true do
91
+
92
+ def check_table(table, answer)
93
+ answer.each do |k,v|
94
+ if v == nil
95
+ table[k].should be_nil
96
+ else
97
+ table[k].should be_close(v, 0.000001)
98
+ end
99
+ end
100
+ end
101
+
102
+ it 'generates precursor_mz_by_scan_num lookup table' do
103
+ ar = @run.precursor_mz_by_scan_num
104
+ check_table(ar, @info.num_to_prec_mz_hash)
105
+ end
106
+
107
+ it 'class method -> precursor_mz_by_scan_num (with file)' do
108
+ ar = @info.klass.precursor_mz_by_scan_num(@info.file)
109
+ check_table(ar, @info.num_to_prec_mz_hash)
110
+ end
111
+ end
112
+
113
+ MzXML_version_1_info = MyOpenStruct.new do |info|
114
+ info.file = Tfiles_l + '/yeast_gly_mzXML/000.mzXML'
115
+ info.klass = MS::MSRun
116
+ info.filetype = :mzxml
117
+ info.version = '1.0'
118
+ info.scan_count = 3748
119
+ #info.scan_counts = [3748, 937, nil] ## need to get ms2
120
+ info.start_time = 0.44
121
+ info.end_time = 5102.55
122
+ info.num_to_prec_mz_hash = {
123
+ 0 => nil,
124
+ 1 => nil,
125
+ 2 => 391.045410,
126
+ 3 => 446.009033,
127
+ 4 => 1222.033203,
128
+ 5 => nil,
129
+ 6 => 390.947449,
130
+ 3744 => 338.779114,
131
+ 3745 => nil,
132
+ 3746 => 304.136597,
133
+ 3748 => 433.564941,
134
+ }
135
+ info.scans = {}
136
+
137
+ info.scans[0] = {
138
+ :num => 1,
139
+ :ms_level => 1,
140
+ :time => 0.440,
141
+ }
142
+ info.scans[1] = {
143
+ :num => 2,
144
+ :ms_level => 2,
145
+ :time => 1.90,
146
+ :precursors => [MS::Precursor.new(:mz => 391.045410, :intensity => 6986078.0)]
147
+ }
148
+ info.scans[-1] = {
149
+ :num => 3748,
150
+ :ms_level => 2,
151
+ :time => 5102.55,
152
+ :precursors => [MS::Precursor.new(:mz => 433.564941, :intensity => 481800.0)]
153
+ }
154
+ info.scan_count0 = info.scan_count
155
+ info.scan_count1 = 937
156
+ info.scan_count2 = 2811
157
+ info.start_and_end_mz1 = [300.0, 1500.0]
158
+ info.start_and_end_mz2 = [0.0, 2000.0]
159
+ end
160
+
161
+ describe MS::MSRun, "on mzXML version 1 files (w/o spectra)" do
162
+ spec_large do
163
+ before(:all) do
164
+ @info = MzXML_version_1_info
165
+ start = Time.now
166
+ @run = @info.klass.new(@info.file, :spectra => false)
167
+ puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
168
+ end
169
+ it_should_behave_like "an msrun with basic, non-spectral information"
170
+ it_should_behave_like 'a basic scan info generator'
171
+ end
172
+ end
173
+
174
+ describe MS::MSRun, "on mzXML version 1 files (w/spectra)" do
175
+ spec_large do
176
+ before(:all) do
177
+ @info = MzXML_version_1_info
178
+ start = Time.now
179
+ @run = @info.klass.new(@info.file)
180
+ puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
181
+ end
182
+
183
+ it_should_behave_like "an msrun with spectrum"
184
+ it_should_behave_like 'a basic scan info generator'
185
+ end
186
+ end
187
+
188
+ MzXML_version_20_info = MyOpenStruct.new do |info|
189
+ info.file = Tfiles_l + '/opd1_2runs_2mods/data/020.readw.mzXML'
190
+ info.klass = MS::MSRun
191
+ info.filetype = :mzxml
192
+ info.version = '2.0'
193
+ info.scan_count = 3620
194
+ #info.scan_counts = ??
195
+ info.start_time = 0.13
196
+ info.end_time = 5099.69
197
+ info.num_to_prec_mz_hash = {
198
+ 0 => nil,
199
+ 1 => nil,
200
+ 2 => 390.9291992,
201
+ 3 => 1121.944824,
202
+ 4 => 1321.913574,
203
+ 3617 => nil,
204
+ 3618 => 828.2867432,
205
+ 3619 => 424.8538208,
206
+ 3620 => 357.0411987,
207
+ }
208
+ info.scans = {}
209
+ info.scans[0]= {
210
+ :num => 1,
211
+ :ms_level => 1,
212
+ :time => 0.13,
213
+ }
214
+ info.scans[1] = {
215
+ :num => 2,
216
+ :ms_level => 2,
217
+ :time => 1.49,
218
+ :precursors => [MS::Precursor.new(:mz => 390.9291992, :intensity => 8.14409e+006)]
219
+ }
220
+ info.scans[-1] = {
221
+ :num => 3620,
222
+ :ms_level => 2,
223
+ :time => 5099.69,
224
+ :precursors => [MS::Precursor.new(:mz => 357.0411987, :intensity => 643017.0)]
225
+ }
226
+ info.scan_count0 = info.scan_count
227
+ info.scan_count1 = 905
228
+ info.scan_count2 = 2715
229
+ info.start_and_end_mz1 = [300.0, 1500.0]
230
+ # that first number on start_and_end_mz2 is a arbitrary as to accuracy...
231
+ # I'm not sure the correct answer
232
+ info.start_and_end_mz2 = [110.0, 2000.0]
233
+ end
234
+
235
+ describe MS::MSRun, "on mzXML version 2.0 files (w/o spectra)" do
236
+ spec_large do
237
+ before(:all) do
238
+ @info = MzXML_version_20_info
239
+ start = Time.now
240
+ @run = @info.klass.new(@info.file, :spectra => false)
241
+ puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
242
+ end
243
+
244
+ it_should_behave_like "an msrun with basic, non-spectral information"
245
+ it_should_behave_like 'a basic scan info generator'
246
+ end
247
+ end
248
+
249
+ describe MS::MSRun, "on mzXML version 2.0 files (w/spectra)" do
250
+ spec_large do
251
+ before(:all) do
252
+ @info = MzXML_version_20_info
253
+ start = Time.now
254
+ @run = @info.klass.new(@info.file)
255
+ puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
256
+ end
257
+
258
+ it_should_behave_like "an msrun with spectrum"
259
+ it_should_behave_like 'a basic scan info generator'
260
+ end
261
+ end
262
+
263
+ Mzdata_105_info = MyOpenStruct.new do |info|
264
+ info.file = Tfiles_l + '/opd1_2runs_2mods/data/020.mzData.xml'
265
+ info.klass = MS::MSRun
266
+ info.filetype = :mzdata
267
+ info.version = '1.05'
268
+ info.scan_count = 3619 # this should be 3620, they drop the last scan
269
+ info.start_time = 0.13002 # minutes == 0.00216667
270
+ # This is the correct one!, but Thermo drops last scan
271
+ # info.end_time = 5099.688 #84.9948
272
+ info.end_time = 84.968500*60 # 5098.11
273
+
274
+ info.num_to_prec_mz_hash = {
275
+ 0 => nil,
276
+ 1 => nil,
277
+ 2 => 390.9291992,
278
+ 3 => 1121.944824,
279
+ 4 => 1321.913574,
280
+ 3617 => nil,
281
+ 3618 => 828.2867432,
282
+ 3619 => 424.8538208,
283
+ # 3620 => 357.0411987, Bioworks 3.3 is broken
284
+ }
285
+
286
+ info.scans = {}
287
+ info.scans[0] = {
288
+ :num => 1,
289
+ :ms_level => 1,
290
+ :time => 0.13002, # a little rounding error coming from minutes
291
+ }
292
+ info.scans[1] = {
293
+ :num => 2,
294
+ :ms_level => 2,
295
+ :time => 0.024833 * 60, # 1.48998
296
+ :precursors => [MS::Precursor.new( :mz => 390.9291992, :intensity => 8.144094e+006) ],
297
+ }
298
+ info.scans[-1] = {
299
+ :num => 3619,
300
+ :ms_level => 2,
301
+ #:time => 5099.69,
302
+ :time => 84.968500 * 60, # 5098.11
303
+
304
+ :precursors => [MS::Precursor.new( :mz => 424.853821, :intensity => 738590.0 )] # wrong
305
+ }
306
+ info.scan_count0 = info.scan_count
307
+ info.scan_count1 = 905
308
+ info.scan_count2 = 2714 # should be 2715, they dropped the last scan!
309
+ info.start_and_end_mz1 = [300.0, 1500.0]
310
+ # This is the Correct one!!!, but Thermo drops last scan
311
+ #info.start_and_end_mz2 = [112.0, 2000.0]
312
+ info.start_and_end_mz2 = [95.0, 2000.0]
313
+ end
314
+
315
+ describe MS::MSRun, "on mzData version 1.05 files (Bioworks3.3) (w/o spectra)" do
316
+ spec_large do
317
+ before(:all) do
318
+ @info = Mzdata_105_info
319
+ start = Time.now
320
+ @run = @info.klass.new(@info.file, :spectra => false)
321
+ puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
322
+ puts "- [NOTE] mzData files from Thermo are missing their last scan!" if $specdoc
323
+ end
324
+
325
+ it_should_behave_like "an msrun with basic, non-spectral information"
326
+ it_should_behave_like 'a basic scan info generator'
327
+
328
+ end
329
+ end
330
+
331
+ describe MS::MSRun, "on mzData version 1.05 files (Bioworks3.3) (w/spectra)" do
332
+ spec_large do
333
+ before(:all) do
334
+ @info = Mzdata_105_info
335
+ start = Time.now
336
+ @run = @info.klass.new(@info.file)
337
+ puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
338
+ puts "- [NOTE] mzData files from Thermo are missing their last scan!" if $specdoc
339
+ end
340
+
341
+ it_should_behave_like "an msrun with spectrum"
342
+ it_should_behave_like 'a basic scan info generator'
343
+
344
+ it 'gets correct precursor intensities for all scans' do
345
+ check_file = Tfiles_l + '/opd1_2runs_2mods/data/020.readw.mzXML'
346
+ prec_inten_mzs = IO.readlines(check_file).grep(/precursorMz/).map do |line|
347
+ if line =~ /Intensity="([\d\.e\+\-]+)">([\d\.e\+\-]+)</
348
+ [$1.to_f, $2.to_f]
349
+ else
350
+ abort "didn't match for some crazy reason! (probably newline issues)"
351
+ end
352
+ end
353
+
354
+ prec_mz_cnt = 0
355
+ @run.scans.each_with_index do |scan,i|
356
+ next if i % 4 == 0
357
+ (exp_int, exp_mz) = prec_inten_mzs[prec_mz_cnt]
358
+
359
+ precursor = scan.precursors.first
360
+ precursor.mz.should be_close(exp_mz, 0.00001)
361
+ precursor.intensity.should be_close(exp_int, 51)
362
+
363
+ prec_mz_cnt += 1
364
+ end
365
+ end
366
+ end
367
+ end
368
+
369
+ describe MS::MSRun, 'with small file of twenty scans' do
370
+ before(:each) do
371
+ @file = Tfiles + "/opd1/twenty_scans.mzXML"
372
+ @msrun = MS::MSRun.new(@file)
373
+ end
374
+
375
+ it 'retrieves times and spectra' do
376
+ (times, spectra) = @msrun.times_and_spectra(1)
377
+ etimes = %w(0.440000 5.150000 10.690000 16.400000 22.370000).map {|t| t.to_f }
378
+ num_peaks = [992, 814, 796, 849, 813]
379
+ tol = 0.000000001
380
+ spectra[0].mz[1].should be_close(301.430114746094, tol)
381
+ spectra[0].intensity[1].should be_close(22192.0, tol)
382
+ spectra[0].mz[-1].should be_close(1499.09912109375, tol)
383
+ spectra[0].intensity[-1].should be_close(111286.0, tol)
384
+
385
+ spectra[-1].mz[1].should be_close(301.243774414062, tol)
386
+ spectra[-1].intensity[1].should be_close(77503.0, tol)
387
+ spectra[-1].mz[-1].should be_close(1499.42016601562, tol)
388
+ spectra[-1].intensity[-1].should be_close(13.0, tol)
389
+
390
+ num_peaks.each_with_index do |n,i|
391
+ spectra[i].mz.size.should == n
392
+ end
393
+ etimes.each_with_index do |t,i|
394
+ times[i].should be_close(t, 0.00001)
395
+ end
396
+ end
397
+ end
398
+
399
+ describe MS::MSRun, 'with a small set of scans' do
400
+ it 'can add parent scans' do
401
+ vals = [
402
+ [1,1,0.13],
403
+ [2,2,0.23],
404
+ [3,2,0.33],
405
+ [4,3,0.43],
406
+ [5,3,0.53],
407
+ [6,1,0.63],
408
+ [7,2,0.73],
409
+ [8,3,0.83],
410
+ [9,2,0.93]
411
+ ]
412
+ precs = (0..(vals.size)).to_a.map do |x|
413
+ MS::Precursor.new([x,100])
414
+ end
415
+ scans = vals.zip(precs).map do |ar,prec|
416
+ scan = MS::Scan.new(ar)
417
+ scan.precursors = [prec]
418
+ scan
419
+ end
420
+ scans.size.should == vals.size
421
+ s = scans
422
+ parents = [nil,s[0],s[0],s[2],s[2],nil,s[5],s[6],s[5]]
423
+ MS::MSRun.add_parent_scan(scans)
424
+ scans.each_with_index do |scan,i|
425
+ scan.precursors.first.parent.should == parents[i]
426
+ end
427
+ end
428
+ end
429
+
430
+ =begin
431
+ ###################################################
432
+ # SHOULD IMPLEMENT BASIC INFO FOR ALL FILE TYPES
433
+ ###################################################
434
+
435
+ require 'test/unit'
436
+ require 'ms/mzxml/parser'
437
+
438
+ class MSMzXML < Test::Unit::TestCase
439
+ def initialize(arg)
440
+ super(arg)
441
+ @tfiles = File.dirname(__FILE__) + '/tfiles/'
442
+ @tscans = @tfiles + "opd1/twenty_scans.mzXML"
443
+ @big_file = "../bioworks2prophet/xml/opd00001_test_set/opd00001_prophprepped/000.mzXML"
444
+ end
445
+
446
+ def test_basic_info
447
+ hash = MS::MzXML::Parser.new.basic_info(@tscans)
448
+ assert_equal({:scan_count=>[20, 5, 15], :start_time=>0.44, :end_time=>27.05, :start_mz=>300.0, :end_mz=>1500.0, :ms_level=>1}, hash, "basic info the same")
449
+ end
450
+
451
+ end
452
+
453
+ =end
454
+
455
+
@@ -0,0 +1,92 @@
1
+
2
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
3
+ require 'ms/parser'
4
+
5
+ describe "a MS::Parser on a file", :shared => true do
6
+ it 'finds filetype and version on file and handle' do
7
+ ft_version = nil
8
+ File.open(@file) do |fh|
9
+ ft_version = MS::Parser.filetype_and_version(fh)
10
+ end
11
+ ft_version.should == @filetype_version
12
+ ft_version = MS::Parser.filetype_and_version(@file)
13
+ ft_version.should == @filetype_version
14
+ end
15
+
16
+ it 'creates a sub-classed parser responding to "msrun"' do
17
+ parser = MS::Parser.new(@file, :msrun)
18
+ parser.class.to_s.should match(/^MS::Parser::/)
19
+ parser.class.to_s.should match(Regexp.new(Regexp.escape(@subclass)))
20
+ parser.respond_to?(:msrun).should be_true
21
+ end
22
+
23
+ ########################################################################
24
+ # NOTE: methods to verify parsing of information should be defined where
25
+ # that information is require.
26
+ # e.g. msrun_spec.rb will verify that msrun objects are created properly.
27
+ # this is because we don't care how we get that file, just that we get it.
28
+ # The whole process of parsing a file should be transparent to users.
29
+ ########################################################################
30
+
31
+ end
32
+
33
+ describe MS::Parser, "on a RAW file (Xcalibur 1.3 SP 1)" do
34
+ spec_large do
35
+ before(:all) do
36
+ @filetype = :raw
37
+ @version = nil
38
+ @filetype_version = [@filetype, @version]
39
+ @file = Tfiles_large + '/opd1_2runs_2mods/data/020.RAW'
40
+ end
41
+
42
+ it 'finds filetype (NO version yet!) on file and handle' do
43
+ ft_version = nil
44
+ File.open(@file) do |fh|
45
+ ft_version = MS::Parser.filetype_and_version(fh)
46
+ end
47
+ ft_version.should == @filetype_version
48
+ ft_version = MS::Parser.filetype_and_version(@file)
49
+ ft_version.should == @filetype_version
50
+ end
51
+ end
52
+ end
53
+
54
+ describe MS::Parser, "on an mzXML version 1 file" do
55
+ spec_large do
56
+ before(:all) do
57
+ @filetype = :mzxml
58
+ @version = '1.0'
59
+ @filetype_version = [@filetype, @version]
60
+ @subclass = 'MS::Parser::MzXML'
61
+ @file = Tfiles_large + '/yeast_gly_mzXML/000.mzXML'
62
+ end
63
+ it_should_behave_like "a MS::Parser on a file"
64
+ end
65
+ end
66
+
67
+ describe MS::Parser, "on an mzXML version 2 file" do
68
+ spec_large do
69
+ before(:all) do
70
+ @filetype = :mzxml
71
+ @version = '2.0'
72
+ @filetype_version = [@filetype, @version]
73
+ @subclass = 'MS::Parser::MzXML'
74
+ @file = Tfiles_large + '/opd1_2runs_2mods/data/020.readw.mzXML'
75
+ end
76
+ it_should_behave_like "a MS::Parser on a file"
77
+ end
78
+ end
79
+
80
+ describe MS::Parser, "on an mzData version 1.05 file" do
81
+ spec_large do
82
+ before(:all) do
83
+ @filetype = :mzdata
84
+ @version = '1.05'
85
+ @filetype_version = [@filetype, @version]
86
+ @subclass = 'MS::Parser::MzData'
87
+ @file = Tfiles_large + '/opd1_2runs_2mods/data/020.mzData.xml'
88
+ end
89
+ it_should_behave_like "a MS::Parser on a file"
90
+ end
91
+ end
92
+