mspire 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -1,109 +1,77 @@
1
-
2
- require 'base64'
3
-
4
- module Spec; end
5
-
6
- module Spec::MzXML
7
- Potential_mzxml_converters = %w(readw.exe readw t2x)
8
-
9
- # takes PT2.7500000S and returns it as 2.700000 (no PT or S)
10
- def strip_time(time)
11
- return time[2...-1]
12
- end
13
-
14
- # first, converts backslash to forward slash in filename.
15
- # if .mzXML returns the filename
16
- # if .raw or .RAW converts the file to .mZXML and returns mzXML filename
17
- # if no recognized extension, looks for .mzXML file, then .RAW file (and
18
- # converts)
19
- # aborts if file was not able to be converted
20
- # returns nil if a file that can be converted or used was not found
21
- def self.file_to_mzxml(file)
22
- file.gsub!("\\",'/')
23
- old_file = file.dup
24
- if file =~ /\.mzXML$/
25
- return file
26
- elsif file =~ /\.RAW$/i
27
- old_file = file.dup
28
- ## t2x outputs in cwd (so go to the directory of the file!)
29
- dir = File.dirname(file)
30
- basename = File.basename(file)
31
- converter = Spec::MzXML.find_mzxml_converter
32
- Dir.chdir(dir) do
33
- if converter =~ /readw/
34
- cmd = "#{converter} #{basename} c #{basename.sub(/\.RAW$/i, '.mzXML')}"
35
- else
36
- cmd = "#{converter} #{basename}"
37
- end
38
- #puts cmd
39
- #puts `#{cmd}`
40
- reply = `#{cmd}`
41
- puts reply if $VERBOSE
42
- end
43
- file.sub!(/\.RAW$/i, '.mzXML')
44
- unless File.exist? file
45
- abort "Couldn't convert #{old_file} to #{file}"
46
- end
47
- return file
48
- else
49
- if File.exist?( file + '.mzXML' )
50
- return file_to_mzxml(file + '.mzXML')
51
- elsif File.exist?( file + '.RAW' )
52
- return file_to_mzxml(file + '.RAW')
53
- elsif File.exist?( file + '.raw' )
54
- return file_to_mzxml(file + '.raw')
55
- else
56
- return nil
57
- end
58
- end
59
-
60
- end
61
-
62
-
63
-
64
- # takes a base64 peaks string and returns an array of [m/z,intens] doublets
65
- # mzXML as network ordered
66
- def base64_peaks_to_pairs(string, precision=32)
67
- data = base64_peaks_to_array(string, precision)
68
- ndata = []
69
- data.each_with_index do |dat,ind|
70
- if (ind % 2) == 0 # even
71
- arr = Array.new(2)
72
- arr[0] = dat
73
- ndata.push( arr )
74
- else
75
- ndata.last[1] = dat
76
- end
77
- end
78
- ndata
79
- end
80
-
81
- # takes a base64 peaks string and returns an array of alternating m/z and
82
- # intensity mzXML as network ordered
83
- def base64_peaks_to_array(string, precision=32)
84
- b64d = Base64.decode64(string)
85
- if precision == 32
86
- unpack_code = "g*"
87
- elsif precision == 64
88
- unpack_code = "G*"
89
- end
90
- b64d.unpack(unpack_code)
91
- end
92
-
93
- # Searchs each path element and returns the first one it finds
94
- # returns nil if none found
95
- def self.find_mzxml_converter
96
- ENV['PATH'].split(/[:;]/).each do |path|
97
- Dir.chdir(path) do
98
- Potential_mzxml_converters.each do |pc|
99
- if File.exist? pc
100
- return File.join(path, pc)
101
- end
102
- end
103
- end
104
- end
105
- nil
106
- end
107
-
108
-
109
- end
1
+
2
+ module MS ; end
3
+ module MS::Converter ; end
4
+ module MS::Converter::MzXML
5
+ Potential_mzxml_converters = %w(readw.exe readw t2x)
6
+
7
+ # takes PT2.7500000S and returns it as 2.700000 (no PT or S)
8
+ #def strip_time(time)
9
+ # return time[2...-1]
10
+ #end
11
+
12
+ # first, converts backslash to forward slash in filename.
13
+ # if .mzXML returns the filename
14
+ # if .raw or .RAW converts the file to .mZXML and returns mzXML filename
15
+ # if no recognized extension, looks for .mzXML file, then .RAW file (and
16
+ # converts)
17
+ # aborts if file was not able to be converted
18
+ # returns nil if a file that can be converted or used was not found
19
+ def self.file_to_mzxml(file)
20
+ file.gsub!("\\",'/')
21
+ old_file = file.dup
22
+ if file =~ /\.mzXML$/
23
+ return file
24
+ elsif file =~ /\.RAW$/i
25
+ old_file = file.dup
26
+ ## t2x outputs in cwd (so go to the directory of the file!)
27
+ dir = File.dirname(file)
28
+ basename = File.basename(file)
29
+ converter = MS::MzXML.find_mzxml_converter
30
+ Dir.chdir(dir) do
31
+ if converter =~ /readw/
32
+ cmd = "#{converter} #{basename} c #{basename.sub(/\.RAW$/i, '.mzXML')}"
33
+ else
34
+ cmd = "#{converter} #{basename}"
35
+ end
36
+ #puts cmd
37
+ #puts `#{cmd}`
38
+ reply = `#{cmd}`
39
+ puts reply if $VERBOSE
40
+ end
41
+ file.sub!(/\.RAW$/i, '.mzXML')
42
+ unless File.exist? file
43
+ abort "Couldn't convert #{old_file} to #{file}"
44
+ end
45
+ return file
46
+ else
47
+ if File.exist?( file + '.mzXML' )
48
+ return file_to_mzxml(file + '.mzXML')
49
+ elsif File.exist?( file + '.RAW' )
50
+ return file_to_mzxml(file + '.RAW')
51
+ elsif File.exist?( file + '.raw' )
52
+ return file_to_mzxml(file + '.raw')
53
+ else
54
+ return nil
55
+ end
56
+ end
57
+
58
+ end
59
+
60
+
61
+ # Searchs each path element and returns the first one it finds
62
+ # returns nil if none found
63
+ def self.find_mzxml_converter
64
+ ENV['PATH'].split(/[:;]/).each do |path|
65
+ Dir.chdir(path) do
66
+ Potential_mzxml_converters.each do |pc|
67
+ if File.exist? pc
68
+ return File.join(path, pc)
69
+ end
70
+ end
71
+ end
72
+ end
73
+ nil
74
+ end
75
+
76
+ end
77
+
@@ -0,0 +1,171 @@
1
+ require 'array_class'
2
+
3
+ # This is modeled after the Thermo gradient
4
+ class GradientProgram
5
+ attr_accessor :time_points
6
+ attr_accessor :pump_type
7
+ # array of solvents parallel to TimePoint percentages array
8
+ attr_accessor :solvents
9
+
10
+ def initialize(pump_type, time_points=[], solvents=[])
11
+ @pump_type = pump_type
12
+ @time_points = time_points
13
+ @solvents = solvents
14
+ end
15
+
16
+ def ==(other)
17
+ self.class == other.class and @pump_type==other.pump_type and @solvents == other.solvents and @time_points == other.time_points
18
+ end
19
+
20
+ # gets the first gradient program encountered in the filehandle
21
+ def self.get_gradient_program(fh)
22
+ thermo_newline = "\n\000"
23
+ #gtable = "g\000r\000a\000d\000i\000e\000n\000t\000 \000t\000a\000b\000l\000e"
24
+ gradient = "[Gg]\000r\000a\000d\000i\000e\000n\000t\000 \000"
25
+
26
+
27
+ xcal_2x = gradient + "t\000a\000b\000l\000e\000:\000"
28
+ xcal_1x = gradient + "P\000r\000o\000g\000r\000a\000m\000:\000"
29
+ xcal_2x_regexp = Regexp.new(xcal_2x)
30
+ xcal_1x_regexp = Regexp.new(xcal_1x)
31
+ find_gtable_regexp = Regexp.new(gradient)
32
+
33
+ found_one_2x = false
34
+ found_one_1x = false
35
+ pump_type = ''
36
+ fh.each(thermo_newline) do |line|
37
+ # first identify the line, then
38
+ if line =~ find_gtable_regexp
39
+ if line =~ xcal_1x_regexp
40
+ pump_type = '' ## have to look way back in file for this
41
+ found_one_1x = true
42
+ break
43
+ elsif line =~ xcal_2x_regexp
44
+ grab_pump_type_regexp = /(.*) .g.r.a.d.i.e.n.t. .t.a.b.l.e/
45
+ pump_type = read_thermo_string(grab_pump_type_regexp.match(line).captures[0])
46
+ found_one_2x = true
47
+ break
48
+ end
49
+ end
50
+ end
51
+ if found_one_2x
52
+ fh.gets(thermo_newline) # nothing
53
+ table_headers = fh.gets(thermo_newline)
54
+ time_points = []
55
+ while (line = fh.gets(thermo_newline)) != thermo_newline
56
+ # 0 0.00 95.0 5.0 0.0 0.0 38.0 x
57
+ # 1 1.00 90.0 10.0 0.0 0.0 38.0 o
58
+
59
+ pieces = table_row_to_pieces(line, '2.0')
60
+ time_points << TimePoint.new(pieces[1].to_f, pieces[6].to_f, pieces[2,4].map{|x| x.to_f })
61
+ end
62
+ GradientProgram.new(pump_type, time_points, %w(A B C D))
63
+ elsif found_one_1x
64
+ fh.gets(thermo_newline) # nothing
65
+ table_headers = fh.gets(thermo_newline)
66
+ time_points = []
67
+ null_char_regexp = Regexp.new("^\000\000\000\000")
68
+ while (line = fh.gets(thermo_newline)) !~ null_char_regexp
69
+ pieces = table_row_to_pieces(line, '1.0')
70
+ time_points << TimePoint.new(pieces[1].to_f, pieces[6].to_f, pieces[2,4].map{|x| x.to_f })
71
+ end
72
+ GradientProgram.new(pump_type, time_points, %w(A B C D))
73
+ else
74
+ nil
75
+ end
76
+ end
77
+
78
+ # returns the elements of a gradient table row properly cast
79
+ # NOTE: Xcal 2.X starts index with 0, 1.X starts with 1
80
+ # (and this is how it will be returned!)
81
+ # NOTE: Xcal 1.X will be shorter by one (doesn't have the o/x string!)
82
+ # [(Int) index, time (Float), %A (Float), %B (Float), %C (Float), %D (Float),
83
+ # FlowRate (Float), o/x (String)]
84
+ def self.table_row_to_pieces(line,xcal_version='2.0')
85
+ string = read_thermo_string(line)
86
+ if xcal_version >= '2.0'
87
+ # at first, I thought you could just split on spaces, but the table is
88
+ # designed to have a certain number of chars per column padded with
89
+ # spaces. This is hte way to do it.
90
+ index = string[0,4].to_i
91
+ (tm, a, b, c, d) = (0...5).to_a.map do |x|
92
+ string[(x*6)+4,6].rstrip.to_f
93
+ end
94
+ fr = string[34,7].rstrip.to_f
95
+ ox = string[41,4].rstrip
96
+ [index, tm, a, b, c, d, fr, ox]
97
+ else
98
+ index = string[0,5].lstrip.to_i # correct
99
+ tm = string[5,13].lstrip.to_f # correct
100
+ #puts "**" + string[18,16] + "**"
101
+ fr = string[18,16].lstrip.to_f
102
+ (a,b,c,d) = (0..3).to_a.map do |x|
103
+ string[(x*8)+34, 8].lstrip.to_f # correct
104
+ end
105
+ [index, tm, a, b, c, d, fr]
106
+ end
107
+ end
108
+
109
+ # takes a filehandle
110
+ # returns an array of gradient programs from a thermo filehandle.
111
+ # Acceptable file types include a .meth file and a .raw file
112
+ def self.all_from_handle(fh)
113
+ # 0005340: 3000 2e00 3000 3000 0a00 0a00 5300 6100 0...0.0.....S.a.
114
+ # 0005350: 6d00 7000 6c00 6500 2000 5000 7500 6d00 m.p.l.e. .P.u.m.
115
+ # 0005360: 7000 2000 6700 7200 6100 6400 6900 6500 p. .g.r.a.d.i.e.
116
+ # 0005370: 6e00 7400 2000 7400 6100 6200 6c00 6500 n.t. .t.a.b.l.e.
117
+ # 0005380: 3a00 0a00 0a00 4e00 6f00 2e00 2000 5400 :.....N.o... .T.
118
+ # 0005390: 6900 6d00 6500 2000 2000 4100 2500 2000 i.m.e. . .A.%. .
119
+ # 00053a0: 2000 2000 2000 4200 2500 2000 2000 2000 . . .B.%. . . .
120
+ # 00053b0: 2000 4300 2500 2000 2000 2000 2000 4400 .C.%. . . . .D.
121
+ # 00053c0: 2500 2000 2000 2000 2000 b500 6c00 2f00 %. . . . ...l./.
122
+ # 00053d0: 6d00 6900 6e00 2000 0a00 3000 2000 2000 m.i.n. ...0. . .
123
+ # 00053e0: 2000 3000 2e00 3000 3000 2000 2000 3000 .0...0.0. . .0.
124
+ # 00053f0: 2e00 3000 2000 2000 2000 3000 2e00 3000 ..0. . . .0...0.
125
+ # 0005400: 2000 2000 2000 3100 3000 3000 2e00 3000 . . .1.0.0...0.
126
+ programs = []
127
+ while (gp = get_gradient_program(fh))
128
+ programs << gp
129
+ end
130
+ programs
131
+ end
132
+
133
+ def self.read_thermo_string(string)
134
+ chars = []
135
+ (0...string.size).step(2) do |i|
136
+ chars << string[i,1]
137
+ end
138
+ chars.join
139
+ end
140
+
141
+ def self.read_thermo_string_as_hex(string)
142
+ chars = []
143
+ (0...string.size).step(4) do |i|
144
+ chars << string[i,2]
145
+ end
146
+ [chars.join].pack('H*')
147
+ end
148
+
149
+
150
+ end
151
+
152
+ class GradientProgram::TimePoint
153
+ # time in minutes
154
+ attr_accessor :time
155
+ # flow_rate in ul/min
156
+ attr_accessor :flow_rate
157
+ # percentages
158
+ attr_accessor :percentages
159
+
160
+ def initialize(time=nil, flow_rate=nil, percentages=[])
161
+ @time = time
162
+ @flow_rate = flow_rate
163
+ @percentages = percentages
164
+ end
165
+
166
+ def ==(other)
167
+ self.class == other.class and @time==other.time and @flow_rate == other.flow_rate and @percentages == other.percentages
168
+ end
169
+ end
170
+
171
+
data/lib/ms/msrun.rb ADDED
@@ -0,0 +1,209 @@
1
+
2
+ require 'ms/scan'
3
+ require 'ms/parser'
4
+ require 'ms/msrun_index'
5
+ require 'ms/converter/mzxml'
6
+
7
+ #require 'ms/parser/mzxml'
8
+ #require 'ms/parser/mzdata'
9
+
10
+ module MS; end
11
+ class MS::MSRun
12
+
13
+ attr_accessor :start_time, :end_time
14
+ attr_accessor :scans
15
+ # (just for reference) the type of file this is (as symbol)
16
+ attr_accessor :filetype
17
+ # (just for reference) the version string of this type of file
18
+ attr_accessor :version
19
+ # the total number of scans
20
+ attr_writer :scan_count
21
+
22
+ # should be able to read basic information from a variety of files
23
+ # this will be written in regexp's because REXML is way too slow, xmlparser
24
+ # is not guaranteed to be on every system, xmlib is not on win32.
25
+ # spectra is false, then spectra are not parsed out and included
26
+ # OPTIONS:
27
+ # :spectra => *true|false # whether to parse out spectra
28
+ # [note: precursor intensities not guaranteed to exist unless :spectra == true]
29
+ def initialize(file=nil, opts={})
30
+ myopts = opts.dup ; myopts[:msrun] = self
31
+ if file
32
+ filetype_and_version = MS::Parser.filetype_and_version(file)
33
+ MS::Parser.new(filetype_and_version, :msrun).parse(file, myopts)
34
+ (@filetype, @version) = filetype_and_version
35
+ end
36
+ end
37
+
38
+ # returns an array, whose indices provide the number of scans in each index level the ms_levels, [0] = all the scans, [1] = mslevel 1, [2] = mslevel 2,
39
+ # ...
40
+ def scan_counts
41
+ ar = []
42
+ ar[0] = 0
43
+ scans.each do |sc|
44
+ level = sc.ms_level
45
+ unless ar[level]
46
+ ar[level] = 0
47
+ end
48
+ ar[level] += 1
49
+ ar[0] += 1
50
+ end
51
+ ar
52
+ end
53
+
54
+ def scan_count(mslevel=0)
55
+ if mslevel == 0
56
+ @scan_count
57
+ else
58
+ num = 0
59
+ scans.each do |sc|
60
+ if sc.ms_level == mslevel
61
+ num += 1
62
+ end
63
+ end
64
+ num
65
+ end
66
+ end
67
+
68
+ # for level 1, finds first scan and asks if it has start_mz/end_mz
69
+ # attributes. for other levels, asks for start_mz/ end_mz and takes the
70
+ # min/max. If start_mz and end_mz are not found, goes through every scan
71
+ # finding the max/min first and last m/z. returns [start_mz (rounded down to
72
+ # nearest int), end_mz (rounded up to nearest int)]
73
+ def start_and_end_mz(mslevel=1)
74
+ if mslevel == 1
75
+ # special case for mslevel 1 (where we expect scans to be same length)
76
+ scans.each do |sc|
77
+ if sc.ms_level == mslevel
78
+ if sc.start_mz && sc.end_mz
79
+ return [sc.start_mz, sc.end_mz]
80
+ end
81
+ break
82
+ end
83
+ end
84
+ end
85
+ hi_mz = nil
86
+ lo_mz = nil
87
+ # see if we have start_mz and end_mz for the level we want
88
+ # set the initial hi_mz and lo_mz in any case
89
+ have_start_end_mz = false
90
+ scans.each do |sc|
91
+ if sc.ms_level == mslevel
92
+ if sc.start_mz && sc.end_mz
93
+ lo_mz = sc.start_mz
94
+ hi_mz = sc.end_mz
95
+ else
96
+ mz = sc.spectrum.mz
97
+ hi_mz = mz.last
98
+ lo_mz = mz.first
99
+ end
100
+ break
101
+ end
102
+ end
103
+ if have_start_end_mz
104
+ scans.each do |sc|
105
+ if sc.ms_level == mslevel
106
+ if sc.start_mz < lo_mz
107
+ lo_mz = sc.start_mz
108
+ end
109
+ if sc.end_mz > hi_mz
110
+ hi_mz = sc.end_mz
111
+ end
112
+ end
113
+ end
114
+ else
115
+ # didn't have the attributes (find by brute force)
116
+ scans.each do |sc|
117
+ if sc.ms_level == mslevel
118
+ mz = sc.spectrum.mz
119
+ if mz.last > hi_mz
120
+ hi_mz = mz.last
121
+ end
122
+ if mz.last < lo_mz
123
+ lo_mz = mz.last
124
+ end
125
+ end
126
+ end
127
+ end
128
+ [lo_mz.floor, hi_mz.ceil]
129
+ end
130
+
131
+ # returns an array of precursor mz by scan number
132
+ # returns only the m/z of the FIRST precursor if multiple
133
+ def precursor_mz_by_scan_num
134
+ ar = Array.new(@scans.size + 1)
135
+ @scans.each do |scan|
136
+ if prec = scan.precursors.first
137
+ ar[scan.num] = prec.mz
138
+ else
139
+ ar[scan.num] = nil
140
+ end
141
+ end
142
+ ar
143
+ end
144
+
145
+ # returns an array of times and parallel array of spectra objects.
146
+ # ms_level = 0 then all spectra and times
147
+ # ms_level = 1 then all spectra of ms_level 1
148
+ def times_and_spectra(ms_level=0)
149
+ spectra = []
150
+ if ms_level == 0
151
+ times = @scans.map do |scan|
152
+ spectra << scan.spectrum
153
+ scan.time
154
+ end
155
+ [times, spectra]
156
+ else # choose a particular ms_level
157
+ times = []
158
+ @scans.each do |scan|
159
+ if ms_level == scan.ms_level
160
+ spectra << scan.spectrum
161
+ times << scan.time
162
+ end
163
+ end
164
+ [times, spectra]
165
+ end
166
+ end
167
+
168
+ # same as the instance method (creates an object without spectrum and calls
169
+ # instance method of the same name)
170
+ def self.precursor_mz_by_scan_num(file)
171
+ self.new(file, :spectra => false).precursor_mz_by_scan_num
172
+ end
173
+
174
+ # only adds the parent if one is not already present!
175
+ def self.add_parent_scan(scans, add_intensities=false)
176
+ #start = Time.now
177
+ prev_scan = nil
178
+ parent_stack = [nil]
179
+ ## we want to set the level to be the first mslevel we come to
180
+ prev_level = scans.first.ms_level
181
+ scans.each do |scan|
182
+ #next unless scan ## the first one is nil, (others?)
183
+ level = scan.ms_level
184
+ if prev_level < level
185
+ parent_stack.unshift prev_scan
186
+ end
187
+ if prev_level > level
188
+ (prev_level - level).times do parent_stack.shift end
189
+ end
190
+ if scan.ms_level > 1
191
+ scan.precursors.each do |precursor|
192
+ #precursor.parent = parent_stack.first # that's the next line's
193
+ precursor[2] = parent_stack.first unless precursor[2]
194
+ #precursor.intensity
195
+ if add_intensities
196
+ precursor[1] = precursor[2].spectrum.intensity_at_mz(precursor[0])
197
+ end
198
+ end
199
+ end
200
+ prev_level = level
201
+ prev_scan = scan
202
+ end
203
+ #puts "TOOK #{Time.now - start} secs"
204
+ end
205
+
206
+ end
207
+
208
+
209
+
@@ -1,11 +1,7 @@
1
+ require 'ms/scan'
2
+ require 'ms/parser'
1
3
 
2
- require 'spec/scan'
3
- require 'spec/mzxml/parser'
4
- require 'spec/mzdata/parser'
5
-
6
- module Spec; end
7
-
8
- class Spec::MSRunIndex
4
+ class MS::MSRunIndex
9
5
  # basename_noext is the base name of the file (with NO extensions)
10
6
  attr_accessor :scans_by_num
11
7
  attr_reader :basename_noext
@@ -32,7 +28,7 @@ class Spec::MSRunIndex
32
28
  # index_file has one row for each scan:
33
29
  # ms_level scan_num time [prec_mz prec_inten]
34
30
  # also consider getting this data directly from the mzXML file
35
- # via the Spec::MzXML::Parser.get_msrun_index command
31
+ # via the MS::MzXML::Parser.get_msrun_index command
36
32
  def set_from_index_file(index_file)
37
33
  self.basename_noext = index_file
38
34
  @scans_by_num = []
@@ -41,7 +37,7 @@ class Spec::MSRunIndex
41
37
  next if line !~ /\d/ || line =~ /^#/
42
38
  line.chomp!
43
39
  arr = line.split(" ")
44
- scan = Spec::Scan.new(arr[1].to_i, arr[0].to_i, arr[2].to_f)
40
+ scan = MS::Scan.new(arr[1].to_i, arr[0].to_i, arr[2].to_f)
45
41
  if scan.ms_level > 1
46
42
  scan.prec_mz = arr[3].to_f
47
43
  scan.prec_inten = arr[4].to_f
@@ -49,7 +45,7 @@ class Spec::MSRunIndex
49
45
  @scans_by_num[scan.num] = scan
50
46
  end
51
47
  end
52
- Spec::Scan.add_parent_scan(@scans_by_num)
48
+ MS::Scan.add_parent_scan(@scans_by_num)
53
49
  end
54
50
 
55
51
  # Takes a .mzXML file or .timeIndex file (currently)
@@ -73,7 +69,7 @@ class Spec::MSRunIndex
73
69
  # returns a new
74
70
  def set_from_mzxml(file)
75
71
  self.basename_noext = file
76
- @scans_by_num = Spec::MzXML::Parser.new.scans_by_num(file)
72
+ @scans_by_num = MS::Parser.new(file, :scans_by_num).parse(file)
77
73
  end
78
74
 
79
75
  # writes the index to filename
@@ -109,33 +105,4 @@ class Spec::MSRunIndex
109
105
  end
110
106
 
111
107
 
112
- class Spec::MSRun
113
-
114
- # scan_count is an array [0] is all the scans, [1] is mslevel 1, [2] is mslevel 2, etc
115
- attr_accessor :scan_count, :start_time, :end_time, :start_mz, :end_mz
116
-
117
- # returns an array indexed by scan number where the precursor mz is recorded
118
- # for each fragment (ms2) ion
119
- # The precursor mz will be a String!
120
- def self.precursor_mz_by_scan(file)
121
- extname = File.extname(file)
122
- case extname
123
- when '.mzXML' || '.timeIndex'
124
- klass = Spec::MzXML::Parser
125
- when '.xml'
126
- klass = Spec::MzData::Parser
127
- when '' # they want us to figure out the right extension
128
- if File.exist? file + '.xml'
129
- klass = Spec::MzData::Parser
130
- else
131
- # This will cover .timeIndex, .mzXML and .RAW
132
- klass = Spec::MzXML::Parser
133
- end
134
- else
135
- abort "files of extension #{extname} are not currently supported"
136
- end
137
- klass.new.precursor_mz_by_scan(file)
138
- end
139
-
140
- end
141
108
 
@@ -0,0 +1,12 @@
1
+ require 'ms/parser/mzdata/dom'
2
+
3
+ class MS::Parser::MzData::AXML < MS::Parser::MzData::DOM
4
+ def get_root_node_from_file(file)
5
+ ::AXML.parse_file(file)
6
+ end
7
+ end
8
+
9
+
10
+
11
+
12
+