mspire 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
data/specs/gi_spec.rb ADDED
@@ -0,0 +1,22 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/spec_helper'
2
  )
3
+ require 'gi'
4
+
5
+
6
+ describe GI, "given a 'GI' number" do
7
+ before(:all) do
8
+ @gi_num = 836805
9
+ end
10
+ it 'can query NCBI for annotation (fails nicely w/o connection)' do
11
+ annot = GI.gi2annot([@gi_num])
12
+ if annot
13
+ annot.first.should == 'proteosome component PRE4 [Saccharomyces cerevisiae]'
14
+ else
15
+ puts "- retrieval of gi failed gracefully w/o internet connection"
16
+ end
17
+ end
18
+
19
+ end
20
+
21
+
22
+
23
+
@@ -0,0 +1,7 @@
1
+ tmp = $VERBOSE ; $VERBOSE = nil
2
+ LOAD_BIN_PATH = File.expand_path(File.dirname(__FILE__) + "#{File::SEPARATOR}..#{File::SEPARATOR}bin")
3
+ $VERBOSE = tmp
4
+
5
+ if ENV.key?("PATH")
6
+ ENV["PATH"] = LOAD_BIN_PATH + File::PATH_SEPARATOR + ENV["PATH"]
7
+ end
@@ -0,0 +1,13 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
+ require 'merge_deep'
3
+
4
+ describe 'merging one level deep' do
5
+ it 'works' do
6
+ base = {1=>"X", 3=>{6=>7, 8=>9}}
7
+ another = {1=>'y', 3=>{6=>9}}
8
+ ans = base.merge_deep(another, 1)
9
+ ans.should == {1=>'y', 3=>{6=>9, 8=>9}}
10
+ end
11
+ end
12
+
13
+
@@ -0,0 +1,77 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+ require 'ms/gradient_program'
3
+
4
+ describe GradientProgram do
5
+ it 'can be set from a Thermo Xcal 2.X .meth file' do
6
+ data = [
7
+ [0.00, 95.0, 5.0, 0.0, 0.0, 38.0],
8
+ [1.00, 90.0, 10.0, 0.0, 0.0, 38.0],
9
+ [30.00, 85.0, 15.0, 0.0, 0.0, 38.0],
10
+ [40.00, 80.0, 20.0, 0.0, 0.0, 38.0],
11
+ [45.00, 78.0, 22.0, 0.0, 0.0, 38.0],
12
+ [50.00, 72.0, 28.0, 0.0, 0.0, 38.0],
13
+ [65.00, 60.0, 40.0, 0.0, 0.0, 38.0],
14
+ [72.00, 10.0, 90.0, 0.0, 0.0, 38.0],
15
+ [75.0, 10.0, 90.0, 0.0, 0.0, 38.0],
16
+ [81.00, 10.0, 90.0, 0.0, 0.0, 38.0],
17
+ [81.10, 95.0, 5.0, 0.0, 0.0, 38.0],
18
+ [90.00, 95.0, 5.0, 0.0, 0.0, 38.0],
19
+ ]
20
+
21
+ ms_pump_expected_tps = data.map do |ar|
22
+ GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4])
23
+ end
24
+ ms_pump_expected = GradientProgram.new('MS Pump', ms_pump_expected_tps, %w(A B C D))
25
+
26
+ data = [
27
+ [0.00, 0.0, 0.0, 100.0, 0.0, 40.0],
28
+ [90.0, 0.0, 0.0, 100.0, 0.0, 40.0],
29
+ ]
30
+ sample_pump_expected_tps = data.map {|ar| GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4]) }
31
+ sample_pump_expected = GradientProgram.new('Sample Pump', sample_pump_expected_tps, %w(A B C D))
32
+
33
+ file = Tfiles + '/s01_anC1_ld020mM.meth'
34
+ File.open(file) do |fh|
35
+ gps = GradientProgram.all_from_handle(fh)
36
+ gps[0].should == ms_pump_expected
37
+ gps[1].should == sample_pump_expected
38
+ end
39
+ end
40
+
41
+ it 'can be set from a Thermo Xcal 1.X .RAW file (but missing pump_type)' do
42
+ file = Tfiles + '/opd1_020_beginning.RAW'
43
+ data = [[0.0, 0.0, 0.0, 100.0, 0.0, 200.0],
44
+ [1.0, 0.0, 0.0, 96.0, 4.0, 200.0],
45
+ [10.0, 0.0, 0.0, 96.0, 4.0, 200.0],
46
+ [11.0, 0.0, 0.0, 100.0, 0.0, 200.0],
47
+ [85.0, 0.0, 0.0, 100.0, 0.0, 200.0],]
48
+
49
+ time_points = data.map do |ar|
50
+ GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4])
51
+ end
52
+ pump_type = '' ## need to get pump type...
53
+ ms_pump_expected = GradientProgram.new(pump_type, time_points, %w(A B C D))
54
+
55
+ data = [[0.0, 95.0, 5.0, 0.0, 0.0, 200.0],
56
+ [1.0, 95.0, 5.0, 0.0, 0.0, 200.0],
57
+ [61.0, 55.0, 45.0, 0.0, 0.0, 200.0],
58
+ [62.0, 5.0, 95.0, 0.0, 0.0, 200.0],
59
+ [67.0, 5.0, 95.0, 0.0, 0.0, 200.0],
60
+ [68.0, 95.0, 5.0, 0.0, 0.0, 200.0],
61
+ [85.0, 95.0, 5.0, 0.0, 0.0, 200.0],]
62
+ time_points = data.map do |ar|
63
+ GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4])
64
+ end
65
+ pump_type = '' ## need to get pump type...
66
+ sample_pump_expected = GradientProgram.new(pump_type, time_points, %w(A B C D))
67
+
68
+ # we'd like to get an older .meth file to do this on
69
+ File.open(file) do |fh|
70
+ gps = GradientProgram.all_from_handle(fh)
71
+ gps[0].should == ms_pump_expected
72
+ gps[1].should == sample_pump_expected
73
+
74
+ end
75
+ end
76
+
77
+ end
@@ -0,0 +1,455 @@
1
+
2
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
3
+ require 'ms/msrun'
4
+ require 'ostruct'
5
+
6
+ parsers = %w(AXML LibXML XMLParser Regexp REXML)
7
+
8
+ XMLStyleParser::Parser_precedence.replace( %w(AXML) )
9
+
10
+
11
+ describe "an msrun with basic, non-spectral information", :shared => true do
12
+ it 'knows the type and version of file' do
13
+ @run.filetype.should == @info.filetype
14
+ @run.version.should == @info.version
15
+ end
16
+
17
+ it 'knows basic run information' do
18
+ @run.scan_count.should == @info.scan_count
19
+ @run.start_time.should == @info.start_time
20
+ @run.end_time.should == @info.end_time
21
+ end
22
+
23
+ it 'has all scans' do
24
+ @run.scans.size.should == @info.scan_count
25
+ @run.scans.each_with_index do |sc,i|
26
+ sc.class.should == MS::Scan
27
+ end
28
+ end
29
+
30
+ it 'can determine scan counts for any mslevel' do
31
+ @run.scan_counts.class.should == Array
32
+ @run.scan_count(0).should == @info.scan_count0
33
+ @run.scan_count(1).should == @info.scan_count1
34
+ @run.scan_count(2).should == @info.scan_count2
35
+ end
36
+
37
+ it 'has correct first two scans and last scan' do
38
+ [0,1,-1].each do |i|
39
+ @info.scans[i].each do |k,v|
40
+ if k == :precursors
41
+ v.zip( @run.scans[i].send(k) ) do |exp, act|
42
+ act.mz.should be_close(exp.mz, 0.000001)
43
+ #if act.intensity # intensity not guaranteed to exist!
44
+ # act.intensity.should == exp.intensity
45
+ #end
46
+ end
47
+ else
48
+ @run.scans[i].send(k).should == v
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+
55
+ describe "an msrun with spectrum", :shared => true do
56
+
57
+ it 'has all scans with spectrum data' do
58
+ @run.scans.size.should == @info.scan_count
59
+ @run.scans.each_with_index do |sc,i|
60
+ sc.class.should == MS::Scan
61
+ sc.spectrum.should have_mz_data
62
+ sc.spectrum.should have_intensity_data
63
+ end
64
+ end
65
+
66
+ it 'can determine start_and_end_mz' do
67
+ @run.start_and_end_mz(1).should == @info.start_and_end_mz1
68
+ @run.start_and_end_mz(2).should == @info.start_and_end_mz2
69
+ end
70
+
71
+ it "has correct prec inten for first two scans and last scan" do
72
+ [0,1,-1].each do |i|
73
+ if i == 0
74
+ # currently we do diff't things for ms_level 1 scans! is it nil or []
75
+ #@run.scans[i].precursors.should == []
76
+ #@run.scans[i].precursors.should be_nil
77
+ next
78
+ end
79
+ expected = @info.scans[i][:precursors]
80
+ @run.scans[i].precursors.zip(expected) do |act,exp|
81
+ act.mz.should be_close(exp.mz, 0.000001)
82
+ act.intensity.should == exp.intensity
83
+ end
84
+ end
85
+ end
86
+
87
+ it_should_behave_like "an msrun with basic, non-spectral information"
88
+ end
89
+
90
+ describe 'a basic scan info generator', :shared => true do
91
+
92
+ def check_table(table, answer)
93
+ answer.each do |k,v|
94
+ if v == nil
95
+ table[k].should be_nil
96
+ else
97
+ table[k].should be_close(v, 0.000001)
98
+ end
99
+ end
100
+ end
101
+
102
+ it 'generates precursor_mz_by_scan_num lookup table' do
103
+ ar = @run.precursor_mz_by_scan_num
104
+ check_table(ar, @info.num_to_prec_mz_hash)
105
+ end
106
+
107
+ it 'class method -> precursor_mz_by_scan_num (with file)' do
108
+ ar = @info.klass.precursor_mz_by_scan_num(@info.file)
109
+ check_table(ar, @info.num_to_prec_mz_hash)
110
+ end
111
+ end
112
+
113
+ MzXML_version_1_info = MyOpenStruct.new do |info|
114
+ info.file = Tfiles_l + '/yeast_gly_mzXML/000.mzXML'
115
+ info.klass = MS::MSRun
116
+ info.filetype = :mzxml
117
+ info.version = '1.0'
118
+ info.scan_count = 3748
119
+ #info.scan_counts = [3748, 937, nil] ## need to get ms2
120
+ info.start_time = 0.44
121
+ info.end_time = 5102.55
122
+ info.num_to_prec_mz_hash = {
123
+ 0 => nil,
124
+ 1 => nil,
125
+ 2 => 391.045410,
126
+ 3 => 446.009033,
127
+ 4 => 1222.033203,
128
+ 5 => nil,
129
+ 6 => 390.947449,
130
+ 3744 => 338.779114,
131
+ 3745 => nil,
132
+ 3746 => 304.136597,
133
+ 3748 => 433.564941,
134
+ }
135
+ info.scans = {}
136
+
137
+ info.scans[0] = {
138
+ :num => 1,
139
+ :ms_level => 1,
140
+ :time => 0.440,
141
+ }
142
+ info.scans[1] = {
143
+ :num => 2,
144
+ :ms_level => 2,
145
+ :time => 1.90,
146
+ :precursors => [MS::Precursor.new(:mz => 391.045410, :intensity => 6986078.0)]
147
+ }
148
+ info.scans[-1] = {
149
+ :num => 3748,
150
+ :ms_level => 2,
151
+ :time => 5102.55,
152
+ :precursors => [MS::Precursor.new(:mz => 433.564941, :intensity => 481800.0)]
153
+ }
154
+ info.scan_count0 = info.scan_count
155
+ info.scan_count1 = 937
156
+ info.scan_count2 = 2811
157
+ info.start_and_end_mz1 = [300.0, 1500.0]
158
+ info.start_and_end_mz2 = [0.0, 2000.0]
159
+ end
160
+
161
+ describe MS::MSRun, "on mzXML version 1 files (w/o spectra)" do
162
+ spec_large do
163
+ before(:all) do
164
+ @info = MzXML_version_1_info
165
+ start = Time.now
166
+ @run = @info.klass.new(@info.file, :spectra => false)
167
+ puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
168
+ end
169
+ it_should_behave_like "an msrun with basic, non-spectral information"
170
+ it_should_behave_like 'a basic scan info generator'
171
+ end
172
+ end
173
+
174
+ describe MS::MSRun, "on mzXML version 1 files (w/spectra)" do
175
+ spec_large do
176
+ before(:all) do
177
+ @info = MzXML_version_1_info
178
+ start = Time.now
179
+ @run = @info.klass.new(@info.file)
180
+ puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
181
+ end
182
+
183
+ it_should_behave_like "an msrun with spectrum"
184
+ it_should_behave_like 'a basic scan info generator'
185
+ end
186
+ end
187
+
188
+ MzXML_version_20_info = MyOpenStruct.new do |info|
189
+ info.file = Tfiles_l + '/opd1_2runs_2mods/data/020.readw.mzXML'
190
+ info.klass = MS::MSRun
191
+ info.filetype = :mzxml
192
+ info.version = '2.0'
193
+ info.scan_count = 3620
194
+ #info.scan_counts = ??
195
+ info.start_time = 0.13
196
+ info.end_time = 5099.69
197
+ info.num_to_prec_mz_hash = {
198
+ 0 => nil,
199
+ 1 => nil,
200
+ 2 => 390.9291992,
201
+ 3 => 1121.944824,
202
+ 4 => 1321.913574,
203
+ 3617 => nil,
204
+ 3618 => 828.2867432,
205
+ 3619 => 424.8538208,
206
+ 3620 => 357.0411987,
207
+ }
208
+ info.scans = {}
209
+ info.scans[0]= {
210
+ :num => 1,
211
+ :ms_level => 1,
212
+ :time => 0.13,
213
+ }
214
+ info.scans[1] = {
215
+ :num => 2,
216
+ :ms_level => 2,
217
+ :time => 1.49,
218
+ :precursors => [MS::Precursor.new(:mz => 390.9291992, :intensity => 8.14409e+006)]
219
+ }
220
+ info.scans[-1] = {
221
+ :num => 3620,
222
+ :ms_level => 2,
223
+ :time => 5099.69,
224
+ :precursors => [MS::Precursor.new(:mz => 357.0411987, :intensity => 643017.0)]
225
+ }
226
+ info.scan_count0 = info.scan_count
227
+ info.scan_count1 = 905
228
+ info.scan_count2 = 2715
229
+ info.start_and_end_mz1 = [300.0, 1500.0]
230
+ # that first number on start_and_end_mz2 is a arbitrary as to accuracy...
231
+ # I'm not sure the correct answer
232
+ info.start_and_end_mz2 = [110.0, 2000.0]
233
+ end
234
+
235
+ describe MS::MSRun, "on mzXML version 2.0 files (w/o spectra)" do
236
+ spec_large do
237
+ before(:all) do
238
+ @info = MzXML_version_20_info
239
+ start = Time.now
240
+ @run = @info.klass.new(@info.file, :spectra => false)
241
+ puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
242
+ end
243
+
244
+ it_should_behave_like "an msrun with basic, non-spectral information"
245
+ it_should_behave_like 'a basic scan info generator'
246
+ end
247
+ end
248
+
249
+ describe MS::MSRun, "on mzXML version 2.0 files (w/spectra)" do
250
+ spec_large do
251
+ before(:all) do
252
+ @info = MzXML_version_20_info
253
+ start = Time.now
254
+ @run = @info.klass.new(@info.file)
255
+ puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
256
+ end
257
+
258
+ it_should_behave_like "an msrun with spectrum"
259
+ it_should_behave_like 'a basic scan info generator'
260
+ end
261
+ end
262
+
263
+ Mzdata_105_info = MyOpenStruct.new do |info|
264
+ info.file = Tfiles_l + '/opd1_2runs_2mods/data/020.mzData.xml'
265
+ info.klass = MS::MSRun
266
+ info.filetype = :mzdata
267
+ info.version = '1.05'
268
+ info.scan_count = 3619 # this should be 3620, they drop the last scan
269
+ info.start_time = 0.13002 # minutes == 0.00216667
270
+ # This is the correct one!, but Thermo drops last scan
271
+ # info.end_time = 5099.688 #84.9948
272
+ info.end_time = 84.968500*60 # 5098.11
273
+
274
+ info.num_to_prec_mz_hash = {
275
+ 0 => nil,
276
+ 1 => nil,
277
+ 2 => 390.9291992,
278
+ 3 => 1121.944824,
279
+ 4 => 1321.913574,
280
+ 3617 => nil,
281
+ 3618 => 828.2867432,
282
+ 3619 => 424.8538208,
283
+ # 3620 => 357.0411987, Bioworks 3.3 is broken
284
+ }
285
+
286
+ info.scans = {}
287
+ info.scans[0] = {
288
+ :num => 1,
289
+ :ms_level => 1,
290
+ :time => 0.13002, # a little rounding error coming from minutes
291
+ }
292
+ info.scans[1] = {
293
+ :num => 2,
294
+ :ms_level => 2,
295
+ :time => 0.024833 * 60, # 1.48998
296
+ :precursors => [MS::Precursor.new( :mz => 390.9291992, :intensity => 8.144094e+006) ],
297
+ }
298
+ info.scans[-1] = {
299
+ :num => 3619,
300
+ :ms_level => 2,
301
+ #:time => 5099.69,
302
+ :time => 84.968500 * 60, # 5098.11
303
+
304
+ :precursors => [MS::Precursor.new( :mz => 424.853821, :intensity => 738590.0 )] # wrong
305
+ }
306
+ info.scan_count0 = info.scan_count
307
+ info.scan_count1 = 905
308
+ info.scan_count2 = 2714 # should be 2715, they dropped the last scan!
309
+ info.start_and_end_mz1 = [300.0, 1500.0]
310
+ # This is the Correct one!!!, but Thermo drops last scan
311
+ #info.start_and_end_mz2 = [112.0, 2000.0]
312
+ info.start_and_end_mz2 = [95.0, 2000.0]
313
+ end
314
+
315
+ describe MS::MSRun, "on mzData version 1.05 files (Bioworks3.3) (w/o spectra)" do
316
+ spec_large do
317
+ before(:all) do
318
+ @info = Mzdata_105_info
319
+ start = Time.now
320
+ @run = @info.klass.new(@info.file, :spectra => false)
321
+ puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
322
+ puts "- [NOTE] mzData files from Thermo are missing their last scan!" if $specdoc
323
+ end
324
+
325
+ it_should_behave_like "an msrun with basic, non-spectral information"
326
+ it_should_behave_like 'a basic scan info generator'
327
+
328
+ end
329
+ end
330
+
331
+ describe MS::MSRun, "on mzData version 1.05 files (Bioworks3.3) (w/spectra)" do
332
+ spec_large do
333
+ before(:all) do
334
+ @info = Mzdata_105_info
335
+ start = Time.now
336
+ @run = @info.klass.new(@info.file)
337
+ puts "- read #{File.basename(@info.file)} in #{Time.now - start} seconds" if $specdoc
338
+ puts "- [NOTE] mzData files from Thermo are missing their last scan!" if $specdoc
339
+ end
340
+
341
+ it_should_behave_like "an msrun with spectrum"
342
+ it_should_behave_like 'a basic scan info generator'
343
+
344
+ it 'gets correct precursor intensities for all scans' do
345
+ check_file = Tfiles_l + '/opd1_2runs_2mods/data/020.readw.mzXML'
346
+ prec_inten_mzs = IO.readlines(check_file).grep(/precursorMz/).map do |line|
347
+ if line =~ /Intensity="([\d\.e\+\-]+)">([\d\.e\+\-]+)</
348
+ [$1.to_f, $2.to_f]
349
+ else
350
+ abort "didn't match for some crazy reason! (probably newline issues)"
351
+ end
352
+ end
353
+
354
+ prec_mz_cnt = 0
355
+ @run.scans.each_with_index do |scan,i|
356
+ next if i % 4 == 0
357
+ (exp_int, exp_mz) = prec_inten_mzs[prec_mz_cnt]
358
+
359
+ precursor = scan.precursors.first
360
+ precursor.mz.should be_close(exp_mz, 0.00001)
361
+ precursor.intensity.should be_close(exp_int, 51)
362
+
363
+ prec_mz_cnt += 1
364
+ end
365
+ end
366
+ end
367
+ end
368
+
369
+ describe MS::MSRun, 'with small file of twenty scans' do
370
+ before(:each) do
371
+ @file = Tfiles + "/opd1/twenty_scans.mzXML"
372
+ @msrun = MS::MSRun.new(@file)
373
+ end
374
+
375
+ it 'retrieves times and spectra' do
376
+ (times, spectra) = @msrun.times_and_spectra(1)
377
+ etimes = %w(0.440000 5.150000 10.690000 16.400000 22.370000).map {|t| t.to_f }
378
+ num_peaks = [992, 814, 796, 849, 813]
379
+ tol = 0.000000001
380
+ spectra[0].mz[1].should be_close(301.430114746094, tol)
381
+ spectra[0].intensity[1].should be_close(22192.0, tol)
382
+ spectra[0].mz[-1].should be_close(1499.09912109375, tol)
383
+ spectra[0].intensity[-1].should be_close(111286.0, tol)
384
+
385
+ spectra[-1].mz[1].should be_close(301.243774414062, tol)
386
+ spectra[-1].intensity[1].should be_close(77503.0, tol)
387
+ spectra[-1].mz[-1].should be_close(1499.42016601562, tol)
388
+ spectra[-1].intensity[-1].should be_close(13.0, tol)
389
+
390
+ num_peaks.each_with_index do |n,i|
391
+ spectra[i].mz.size.should == n
392
+ end
393
+ etimes.each_with_index do |t,i|
394
+ times[i].should be_close(t, 0.00001)
395
+ end
396
+ end
397
+ end
398
+
399
+ describe MS::MSRun, 'with a small set of scans' do
400
+ it 'can add parent scans' do
401
+ vals = [
402
+ [1,1,0.13],
403
+ [2,2,0.23],
404
+ [3,2,0.33],
405
+ [4,3,0.43],
406
+ [5,3,0.53],
407
+ [6,1,0.63],
408
+ [7,2,0.73],
409
+ [8,3,0.83],
410
+ [9,2,0.93]
411
+ ]
412
+ precs = (0..(vals.size)).to_a.map do |x|
413
+ MS::Precursor.new([x,100])
414
+ end
415
+ scans = vals.zip(precs).map do |ar,prec|
416
+ scan = MS::Scan.new(ar)
417
+ scan.precursors = [prec]
418
+ scan
419
+ end
420
+ scans.size.should == vals.size
421
+ s = scans
422
+ parents = [nil,s[0],s[0],s[2],s[2],nil,s[5],s[6],s[5]]
423
+ MS::MSRun.add_parent_scan(scans)
424
+ scans.each_with_index do |scan,i|
425
+ scan.precursors.first.parent.should == parents[i]
426
+ end
427
+ end
428
+ end
429
+
430
+ =begin
431
+ ###################################################
432
+ # SHOULD IMPLEMENT BASIC INFO FOR ALL FILE TYPES
433
+ ###################################################
434
+
435
+ require 'test/unit'
436
+ require 'ms/mzxml/parser'
437
+
438
+ class MSMzXML < Test::Unit::TestCase
439
+ def initialize(arg)
440
+ super(arg)
441
+ @tfiles = File.dirname(__FILE__) + '/tfiles/'
442
+ @tscans = @tfiles + "opd1/twenty_scans.mzXML"
443
+ @big_file = "../bioworks2prophet/xml/opd00001_test_set/opd00001_prophprepped/000.mzXML"
444
+ end
445
+
446
+ def test_basic_info
447
+ hash = MS::MzXML::Parser.new.basic_info(@tscans)
448
+ assert_equal({:scan_count=>[20, 5, 15], :start_time=>0.44, :end_time=>27.05, :start_mz=>300.0, :end_mz=>1500.0, :ms_level=>1}, hash, "basic info the same")
449
+ end
450
+
451
+ end
452
+
453
+ =end
454
+
455
+
@@ -0,0 +1,92 @@
1
+
2
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
3
+ require 'ms/parser'
4
+
5
+ describe "a MS::Parser on a file", :shared => true do
6
+ it 'finds filetype and version on file and handle' do
7
+ ft_version = nil
8
+ File.open(@file) do |fh|
9
+ ft_version = MS::Parser.filetype_and_version(fh)
10
+ end
11
+ ft_version.should == @filetype_version
12
+ ft_version = MS::Parser.filetype_and_version(@file)
13
+ ft_version.should == @filetype_version
14
+ end
15
+
16
+ it 'creates a sub-classed parser responding to "msrun"' do
17
+ parser = MS::Parser.new(@file, :msrun)
18
+ parser.class.to_s.should match(/^MS::Parser::/)
19
+ parser.class.to_s.should match(Regexp.new(Regexp.escape(@subclass)))
20
+ parser.respond_to?(:msrun).should be_true
21
+ end
22
+
23
+ ########################################################################
24
+ # NOTE: methods to verify parsing of information should be defined where
25
+ # that information is require.
26
+ # e.g. msrun_spec.rb will verify that msrun objects are created properly.
27
+ # this is because we don't care how we get that file, just that we get it.
28
+ # The whole process of parsing a file should be transparent to users.
29
+ ########################################################################
30
+
31
+ end
32
+
33
+ describe MS::Parser, "on a RAW file (Xcalibur 1.3 SP 1)" do
34
+ spec_large do
35
+ before(:all) do
36
+ @filetype = :raw
37
+ @version = nil
38
+ @filetype_version = [@filetype, @version]
39
+ @file = Tfiles_large + '/opd1_2runs_2mods/data/020.RAW'
40
+ end
41
+
42
+ it 'finds filetype (NO version yet!) on file and handle' do
43
+ ft_version = nil
44
+ File.open(@file) do |fh|
45
+ ft_version = MS::Parser.filetype_and_version(fh)
46
+ end
47
+ ft_version.should == @filetype_version
48
+ ft_version = MS::Parser.filetype_and_version(@file)
49
+ ft_version.should == @filetype_version
50
+ end
51
+ end
52
+ end
53
+
54
+ describe MS::Parser, "on an mzXML version 1 file" do
55
+ spec_large do
56
+ before(:all) do
57
+ @filetype = :mzxml
58
+ @version = '1.0'
59
+ @filetype_version = [@filetype, @version]
60
+ @subclass = 'MS::Parser::MzXML'
61
+ @file = Tfiles_large + '/yeast_gly_mzXML/000.mzXML'
62
+ end
63
+ it_should_behave_like "a MS::Parser on a file"
64
+ end
65
+ end
66
+
67
+ describe MS::Parser, "on an mzXML version 2 file" do
68
+ spec_large do
69
+ before(:all) do
70
+ @filetype = :mzxml
71
+ @version = '2.0'
72
+ @filetype_version = [@filetype, @version]
73
+ @subclass = 'MS::Parser::MzXML'
74
+ @file = Tfiles_large + '/opd1_2runs_2mods/data/020.readw.mzXML'
75
+ end
76
+ it_should_behave_like "a MS::Parser on a file"
77
+ end
78
+ end
79
+
80
+ describe MS::Parser, "on an mzData version 1.05 file" do
81
+ spec_large do
82
+ before(:all) do
83
+ @filetype = :mzdata
84
+ @version = '1.05'
85
+ @filetype_version = [@filetype, @version]
86
+ @subclass = 'MS::Parser::MzData'
87
+ @file = Tfiles_large + '/opd1_2runs_2mods/data/020.mzData.xml'
88
+ end
89
+ it_should_behave_like "a MS::Parser on a file"
90
+ end
91
+ end
92
+