mspire 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -1,109 +1,77 @@
1
-
2
- require 'base64'
3
-
4
- module Spec; end
5
-
6
- module Spec::MzXML
7
- Potential_mzxml_converters = %w(readw.exe readw t2x)
8
-
9
- # takes PT2.7500000S and returns it as 2.700000 (no PT or S)
10
- def strip_time(time)
11
- return time[2...-1]
12
- end
13
-
14
- # first, converts backslash to forward slash in filename.
15
- # if .mzXML returns the filename
16
- # if .raw or .RAW converts the file to .mZXML and returns mzXML filename
17
- # if no recognized extension, looks for .mzXML file, then .RAW file (and
18
- # converts)
19
- # aborts if file was not able to be converted
20
- # returns nil if a file that can be converted or used was not found
21
- def self.file_to_mzxml(file)
22
- file.gsub!("\\",'/')
23
- old_file = file.dup
24
- if file =~ /\.mzXML$/
25
- return file
26
- elsif file =~ /\.RAW$/i
27
- old_file = file.dup
28
- ## t2x outputs in cwd (so go to the directory of the file!)
29
- dir = File.dirname(file)
30
- basename = File.basename(file)
31
- converter = Spec::MzXML.find_mzxml_converter
32
- Dir.chdir(dir) do
33
- if converter =~ /readw/
34
- cmd = "#{converter} #{basename} c #{basename.sub(/\.RAW$/i, '.mzXML')}"
35
- else
36
- cmd = "#{converter} #{basename}"
37
- end
38
- #puts cmd
39
- #puts `#{cmd}`
40
- reply = `#{cmd}`
41
- puts reply if $VERBOSE
42
- end
43
- file.sub!(/\.RAW$/i, '.mzXML')
44
- unless File.exist? file
45
- abort "Couldn't convert #{old_file} to #{file}"
46
- end
47
- return file
48
- else
49
- if File.exist?( file + '.mzXML' )
50
- return file_to_mzxml(file + '.mzXML')
51
- elsif File.exist?( file + '.RAW' )
52
- return file_to_mzxml(file + '.RAW')
53
- elsif File.exist?( file + '.raw' )
54
- return file_to_mzxml(file + '.raw')
55
- else
56
- return nil
57
- end
58
- end
59
-
60
- end
61
-
62
-
63
-
64
- # takes a base64 peaks string and returns an array of [m/z,intens] doublets
65
- # mzXML as network ordered
66
- def base64_peaks_to_pairs(string, precision=32)
67
- data = base64_peaks_to_array(string, precision)
68
- ndata = []
69
- data.each_with_index do |dat,ind|
70
- if (ind % 2) == 0 # even
71
- arr = Array.new(2)
72
- arr[0] = dat
73
- ndata.push( arr )
74
- else
75
- ndata.last[1] = dat
76
- end
77
- end
78
- ndata
79
- end
80
-
81
- # takes a base64 peaks string and returns an array of alternating m/z and
82
- # intensity mzXML as network ordered
83
- def base64_peaks_to_array(string, precision=32)
84
- b64d = Base64.decode64(string)
85
- if precision == 32
86
- unpack_code = "g*"
87
- elsif precision == 64
88
- unpack_code = "G*"
89
- end
90
- b64d.unpack(unpack_code)
91
- end
92
-
93
- # Searchs each path element and returns the first one it finds
94
- # returns nil if none found
95
- def self.find_mzxml_converter
96
- ENV['PATH'].split(/[:;]/).each do |path|
97
- Dir.chdir(path) do
98
- Potential_mzxml_converters.each do |pc|
99
- if File.exist? pc
100
- return File.join(path, pc)
101
- end
102
- end
103
- end
104
- end
105
- nil
106
- end
107
-
108
-
109
- end
1
+
2
+ module MS ; end
3
+ module MS::Converter ; end
4
+ module MS::Converter::MzXML
5
+ Potential_mzxml_converters = %w(readw.exe readw t2x)
6
+
7
+ # takes PT2.7500000S and returns it as 2.700000 (no PT or S)
8
+ #def strip_time(time)
9
+ # return time[2...-1]
10
+ #end
11
+
12
+ # first, converts backslash to forward slash in filename.
13
+ # if .mzXML returns the filename
14
+ # if .raw or .RAW converts the file to .mZXML and returns mzXML filename
15
+ # if no recognized extension, looks for .mzXML file, then .RAW file (and
16
+ # converts)
17
+ # aborts if file was not able to be converted
18
+ # returns nil if a file that can be converted or used was not found
19
+ def self.file_to_mzxml(file)
20
+ file.gsub!("\\",'/')
21
+ old_file = file.dup
22
+ if file =~ /\.mzXML$/
23
+ return file
24
+ elsif file =~ /\.RAW$/i
25
+ old_file = file.dup
26
+ ## t2x outputs in cwd (so go to the directory of the file!)
27
+ dir = File.dirname(file)
28
+ basename = File.basename(file)
29
+ converter = MS::MzXML.find_mzxml_converter
30
+ Dir.chdir(dir) do
31
+ if converter =~ /readw/
32
+ cmd = "#{converter} #{basename} c #{basename.sub(/\.RAW$/i, '.mzXML')}"
33
+ else
34
+ cmd = "#{converter} #{basename}"
35
+ end
36
+ #puts cmd
37
+ #puts `#{cmd}`
38
+ reply = `#{cmd}`
39
+ puts reply if $VERBOSE
40
+ end
41
+ file.sub!(/\.RAW$/i, '.mzXML')
42
+ unless File.exist? file
43
+ abort "Couldn't convert #{old_file} to #{file}"
44
+ end
45
+ return file
46
+ else
47
+ if File.exist?( file + '.mzXML' )
48
+ return file_to_mzxml(file + '.mzXML')
49
+ elsif File.exist?( file + '.RAW' )
50
+ return file_to_mzxml(file + '.RAW')
51
+ elsif File.exist?( file + '.raw' )
52
+ return file_to_mzxml(file + '.raw')
53
+ else
54
+ return nil
55
+ end
56
+ end
57
+
58
+ end
59
+
60
+
61
+ # Searchs each path element and returns the first one it finds
62
+ # returns nil if none found
63
+ def self.find_mzxml_converter
64
+ ENV['PATH'].split(/[:;]/).each do |path|
65
+ Dir.chdir(path) do
66
+ Potential_mzxml_converters.each do |pc|
67
+ if File.exist? pc
68
+ return File.join(path, pc)
69
+ end
70
+ end
71
+ end
72
+ end
73
+ nil
74
+ end
75
+
76
+ end
77
+
@@ -0,0 +1,171 @@
1
+ require 'array_class'
2
+
3
+ # This is modeled after the Thermo gradient
4
+ class GradientProgram
5
+ attr_accessor :time_points
6
+ attr_accessor :pump_type
7
+ # array of solvents parallel to TimePoint percentages array
8
+ attr_accessor :solvents
9
+
10
+ def initialize(pump_type, time_points=[], solvents=[])
11
+ @pump_type = pump_type
12
+ @time_points = time_points
13
+ @solvents = solvents
14
+ end
15
+
16
+ def ==(other)
17
+ self.class == other.class and @pump_type==other.pump_type and @solvents == other.solvents and @time_points == other.time_points
18
+ end
19
+
20
+ # gets the first gradient program encountered in the filehandle
21
+ def self.get_gradient_program(fh)
22
+ thermo_newline = "\n\000"
23
+ #gtable = "g\000r\000a\000d\000i\000e\000n\000t\000 \000t\000a\000b\000l\000e"
24
+ gradient = "[Gg]\000r\000a\000d\000i\000e\000n\000t\000 \000"
25
+
26
+
27
+ xcal_2x = gradient + "t\000a\000b\000l\000e\000:\000"
28
+ xcal_1x = gradient + "P\000r\000o\000g\000r\000a\000m\000:\000"
29
+ xcal_2x_regexp = Regexp.new(xcal_2x)
30
+ xcal_1x_regexp = Regexp.new(xcal_1x)
31
+ find_gtable_regexp = Regexp.new(gradient)
32
+
33
+ found_one_2x = false
34
+ found_one_1x = false
35
+ pump_type = ''
36
+ fh.each(thermo_newline) do |line|
37
+ # first identify the line, then
38
+ if line =~ find_gtable_regexp
39
+ if line =~ xcal_1x_regexp
40
+ pump_type = '' ## have to look way back in file for this
41
+ found_one_1x = true
42
+ break
43
+ elsif line =~ xcal_2x_regexp
44
+ grab_pump_type_regexp = /(.*) .g.r.a.d.i.e.n.t. .t.a.b.l.e/
45
+ pump_type = read_thermo_string(grab_pump_type_regexp.match(line).captures[0])
46
+ found_one_2x = true
47
+ break
48
+ end
49
+ end
50
+ end
51
+ if found_one_2x
52
+ fh.gets(thermo_newline) # nothing
53
+ table_headers = fh.gets(thermo_newline)
54
+ time_points = []
55
+ while (line = fh.gets(thermo_newline)) != thermo_newline
56
+ # 0 0.00 95.0 5.0 0.0 0.0 38.0 x
57
+ # 1 1.00 90.0 10.0 0.0 0.0 38.0 o
58
+
59
+ pieces = table_row_to_pieces(line, '2.0')
60
+ time_points << TimePoint.new(pieces[1].to_f, pieces[6].to_f, pieces[2,4].map{|x| x.to_f })
61
+ end
62
+ GradientProgram.new(pump_type, time_points, %w(A B C D))
63
+ elsif found_one_1x
64
+ fh.gets(thermo_newline) # nothing
65
+ table_headers = fh.gets(thermo_newline)
66
+ time_points = []
67
+ null_char_regexp = Regexp.new("^\000\000\000\000")
68
+ while (line = fh.gets(thermo_newline)) !~ null_char_regexp
69
+ pieces = table_row_to_pieces(line, '1.0')
70
+ time_points << TimePoint.new(pieces[1].to_f, pieces[6].to_f, pieces[2,4].map{|x| x.to_f })
71
+ end
72
+ GradientProgram.new(pump_type, time_points, %w(A B C D))
73
+ else
74
+ nil
75
+ end
76
+ end
77
+
78
+ # returns the elements of a gradient table row properly cast
79
+ # NOTE: Xcal 2.X starts index with 0, 1.X starts with 1
80
+ # (and this is how it will be returned!)
81
+ # NOTE: Xcal 1.X will be shorter by one (doesn't have the o/x string!)
82
+ # [(Int) index, time (Float), %A (Float), %B (Float), %C (Float), %D (Float),
83
+ # FlowRate (Float), o/x (String)]
84
+ def self.table_row_to_pieces(line,xcal_version='2.0')
85
+ string = read_thermo_string(line)
86
+ if xcal_version >= '2.0'
87
+ # at first, I thought you could just split on spaces, but the table is
88
+ # designed to have a certain number of chars per column padded with
89
+ # spaces. This is hte way to do it.
90
+ index = string[0,4].to_i
91
+ (tm, a, b, c, d) = (0...5).to_a.map do |x|
92
+ string[(x*6)+4,6].rstrip.to_f
93
+ end
94
+ fr = string[34,7].rstrip.to_f
95
+ ox = string[41,4].rstrip
96
+ [index, tm, a, b, c, d, fr, ox]
97
+ else
98
+ index = string[0,5].lstrip.to_i # correct
99
+ tm = string[5,13].lstrip.to_f # correct
100
+ #puts "**" + string[18,16] + "**"
101
+ fr = string[18,16].lstrip.to_f
102
+ (a,b,c,d) = (0..3).to_a.map do |x|
103
+ string[(x*8)+34, 8].lstrip.to_f # correct
104
+ end
105
+ [index, tm, a, b, c, d, fr]
106
+ end
107
+ end
108
+
109
+ # takes a filehandle
110
+ # returns an array of gradient programs from a thermo filehandle.
111
+ # Acceptable file types include a .meth file and a .raw file
112
+ def self.all_from_handle(fh)
113
+ # 0005340: 3000 2e00 3000 3000 0a00 0a00 5300 6100 0...0.0.....S.a.
114
+ # 0005350: 6d00 7000 6c00 6500 2000 5000 7500 6d00 m.p.l.e. .P.u.m.
115
+ # 0005360: 7000 2000 6700 7200 6100 6400 6900 6500 p. .g.r.a.d.i.e.
116
+ # 0005370: 6e00 7400 2000 7400 6100 6200 6c00 6500 n.t. .t.a.b.l.e.
117
+ # 0005380: 3a00 0a00 0a00 4e00 6f00 2e00 2000 5400 :.....N.o... .T.
118
+ # 0005390: 6900 6d00 6500 2000 2000 4100 2500 2000 i.m.e. . .A.%. .
119
+ # 00053a0: 2000 2000 2000 4200 2500 2000 2000 2000 . . .B.%. . . .
120
+ # 00053b0: 2000 4300 2500 2000 2000 2000 2000 4400 .C.%. . . . .D.
121
+ # 00053c0: 2500 2000 2000 2000 2000 b500 6c00 2f00 %. . . . ...l./.
122
+ # 00053d0: 6d00 6900 6e00 2000 0a00 3000 2000 2000 m.i.n. ...0. . .
123
+ # 00053e0: 2000 3000 2e00 3000 3000 2000 2000 3000 .0...0.0. . .0.
124
+ # 00053f0: 2e00 3000 2000 2000 2000 3000 2e00 3000 ..0. . . .0...0.
125
+ # 0005400: 2000 2000 2000 3100 3000 3000 2e00 3000 . . .1.0.0...0.
126
+ programs = []
127
+ while (gp = get_gradient_program(fh))
128
+ programs << gp
129
+ end
130
+ programs
131
+ end
132
+
133
+ def self.read_thermo_string(string)
134
+ chars = []
135
+ (0...string.size).step(2) do |i|
136
+ chars << string[i,1]
137
+ end
138
+ chars.join
139
+ end
140
+
141
+ def self.read_thermo_string_as_hex(string)
142
+ chars = []
143
+ (0...string.size).step(4) do |i|
144
+ chars << string[i,2]
145
+ end
146
+ [chars.join].pack('H*')
147
+ end
148
+
149
+
150
+ end
151
+
152
+ class GradientProgram::TimePoint
153
+ # time in minutes
154
+ attr_accessor :time
155
+ # flow_rate in ul/min
156
+ attr_accessor :flow_rate
157
+ # percentages
158
+ attr_accessor :percentages
159
+
160
+ def initialize(time=nil, flow_rate=nil, percentages=[])
161
+ @time = time
162
+ @flow_rate = flow_rate
163
+ @percentages = percentages
164
+ end
165
+
166
+ def ==(other)
167
+ self.class == other.class and @time==other.time and @flow_rate == other.flow_rate and @percentages == other.percentages
168
+ end
169
+ end
170
+
171
+
data/lib/ms/msrun.rb ADDED
@@ -0,0 +1,209 @@
1
+
2
+ require 'ms/scan'
3
+ require 'ms/parser'
4
+ require 'ms/msrun_index'
5
+ require 'ms/converter/mzxml'
6
+
7
+ #require 'ms/parser/mzxml'
8
+ #require 'ms/parser/mzdata'
9
+
10
+ module MS; end
11
+ class MS::MSRun
12
+
13
+ attr_accessor :start_time, :end_time
14
+ attr_accessor :scans
15
+ # (just for reference) the type of file this is (as symbol)
16
+ attr_accessor :filetype
17
+ # (just for reference) the version string of this type of file
18
+ attr_accessor :version
19
+ # the total number of scans
20
+ attr_writer :scan_count
21
+
22
+ # should be able to read basic information from a variety of files
23
+ # this will be written in regexp's because REXML is way too slow, xmlparser
24
+ # is not guaranteed to be on every system, xmlib is not on win32.
25
+ # spectra is false, then spectra are not parsed out and included
26
+ # OPTIONS:
27
+ # :spectra => *true|false # whether to parse out spectra
28
+ # [note: precursor intensities not guaranteed to exist unless :spectra == true]
29
+ def initialize(file=nil, opts={})
30
+ myopts = opts.dup ; myopts[:msrun] = self
31
+ if file
32
+ filetype_and_version = MS::Parser.filetype_and_version(file)
33
+ MS::Parser.new(filetype_and_version, :msrun).parse(file, myopts)
34
+ (@filetype, @version) = filetype_and_version
35
+ end
36
+ end
37
+
38
+ # returns an array, whose indices provide the number of scans in each index level the ms_levels, [0] = all the scans, [1] = mslevel 1, [2] = mslevel 2,
39
+ # ...
40
+ def scan_counts
41
+ ar = []
42
+ ar[0] = 0
43
+ scans.each do |sc|
44
+ level = sc.ms_level
45
+ unless ar[level]
46
+ ar[level] = 0
47
+ end
48
+ ar[level] += 1
49
+ ar[0] += 1
50
+ end
51
+ ar
52
+ end
53
+
54
+ def scan_count(mslevel=0)
55
+ if mslevel == 0
56
+ @scan_count
57
+ else
58
+ num = 0
59
+ scans.each do |sc|
60
+ if sc.ms_level == mslevel
61
+ num += 1
62
+ end
63
+ end
64
+ num
65
+ end
66
+ end
67
+
68
+ # for level 1, finds first scan and asks if it has start_mz/end_mz
69
+ # attributes. for other levels, asks for start_mz/ end_mz and takes the
70
+ # min/max. If start_mz and end_mz are not found, goes through every scan
71
+ # finding the max/min first and last m/z. returns [start_mz (rounded down to
72
+ # nearest int), end_mz (rounded up to nearest int)]
73
+ def start_and_end_mz(mslevel=1)
74
+ if mslevel == 1
75
+ # special case for mslevel 1 (where we expect scans to be same length)
76
+ scans.each do |sc|
77
+ if sc.ms_level == mslevel
78
+ if sc.start_mz && sc.end_mz
79
+ return [sc.start_mz, sc.end_mz]
80
+ end
81
+ break
82
+ end
83
+ end
84
+ end
85
+ hi_mz = nil
86
+ lo_mz = nil
87
+ # see if we have start_mz and end_mz for the level we want
88
+ # set the initial hi_mz and lo_mz in any case
89
+ have_start_end_mz = false
90
+ scans.each do |sc|
91
+ if sc.ms_level == mslevel
92
+ if sc.start_mz && sc.end_mz
93
+ lo_mz = sc.start_mz
94
+ hi_mz = sc.end_mz
95
+ else
96
+ mz = sc.spectrum.mz
97
+ hi_mz = mz.last
98
+ lo_mz = mz.first
99
+ end
100
+ break
101
+ end
102
+ end
103
+ if have_start_end_mz
104
+ scans.each do |sc|
105
+ if sc.ms_level == mslevel
106
+ if sc.start_mz < lo_mz
107
+ lo_mz = sc.start_mz
108
+ end
109
+ if sc.end_mz > hi_mz
110
+ hi_mz = sc.end_mz
111
+ end
112
+ end
113
+ end
114
+ else
115
+ # didn't have the attributes (find by brute force)
116
+ scans.each do |sc|
117
+ if sc.ms_level == mslevel
118
+ mz = sc.spectrum.mz
119
+ if mz.last > hi_mz
120
+ hi_mz = mz.last
121
+ end
122
+ if mz.last < lo_mz
123
+ lo_mz = mz.last
124
+ end
125
+ end
126
+ end
127
+ end
128
+ [lo_mz.floor, hi_mz.ceil]
129
+ end
130
+
131
+ # returns an array of precursor mz by scan number
132
+ # returns only the m/z of the FIRST precursor if multiple
133
+ def precursor_mz_by_scan_num
134
+ ar = Array.new(@scans.size + 1)
135
+ @scans.each do |scan|
136
+ if prec = scan.precursors.first
137
+ ar[scan.num] = prec.mz
138
+ else
139
+ ar[scan.num] = nil
140
+ end
141
+ end
142
+ ar
143
+ end
144
+
145
+ # returns an array of times and parallel array of spectra objects.
146
+ # ms_level = 0 then all spectra and times
147
+ # ms_level = 1 then all spectra of ms_level 1
148
+ def times_and_spectra(ms_level=0)
149
+ spectra = []
150
+ if ms_level == 0
151
+ times = @scans.map do |scan|
152
+ spectra << scan.spectrum
153
+ scan.time
154
+ end
155
+ [times, spectra]
156
+ else # choose a particular ms_level
157
+ times = []
158
+ @scans.each do |scan|
159
+ if ms_level == scan.ms_level
160
+ spectra << scan.spectrum
161
+ times << scan.time
162
+ end
163
+ end
164
+ [times, spectra]
165
+ end
166
+ end
167
+
168
+ # same as the instance method (creates an object without spectrum and calls
169
+ # instance method of the same name)
170
+ def self.precursor_mz_by_scan_num(file)
171
+ self.new(file, :spectra => false).precursor_mz_by_scan_num
172
+ end
173
+
174
+ # only adds the parent if one is not already present!
175
+ def self.add_parent_scan(scans, add_intensities=false)
176
+ #start = Time.now
177
+ prev_scan = nil
178
+ parent_stack = [nil]
179
+ ## we want to set the level to be the first mslevel we come to
180
+ prev_level = scans.first.ms_level
181
+ scans.each do |scan|
182
+ #next unless scan ## the first one is nil, (others?)
183
+ level = scan.ms_level
184
+ if prev_level < level
185
+ parent_stack.unshift prev_scan
186
+ end
187
+ if prev_level > level
188
+ (prev_level - level).times do parent_stack.shift end
189
+ end
190
+ if scan.ms_level > 1
191
+ scan.precursors.each do |precursor|
192
+ #precursor.parent = parent_stack.first # that's the next line's
193
+ precursor[2] = parent_stack.first unless precursor[2]
194
+ #precursor.intensity
195
+ if add_intensities
196
+ precursor[1] = precursor[2].spectrum.intensity_at_mz(precursor[0])
197
+ end
198
+ end
199
+ end
200
+ prev_level = level
201
+ prev_scan = scan
202
+ end
203
+ #puts "TOOK #{Time.now - start} secs"
204
+ end
205
+
206
+ end
207
+
208
+
209
+
@@ -1,11 +1,7 @@
1
+ require 'ms/scan'
2
+ require 'ms/parser'
1
3
 
2
- require 'spec/scan'
3
- require 'spec/mzxml/parser'
4
- require 'spec/mzdata/parser'
5
-
6
- module Spec; end
7
-
8
- class Spec::MSRunIndex
4
+ class MS::MSRunIndex
9
5
  # basename_noext is the base name of the file (with NO extensions)
10
6
  attr_accessor :scans_by_num
11
7
  attr_reader :basename_noext
@@ -32,7 +28,7 @@ class Spec::MSRunIndex
32
28
  # index_file has one row for each scan:
33
29
  # ms_level scan_num time [prec_mz prec_inten]
34
30
  # also consider getting this data directly from the mzXML file
35
- # via the Spec::MzXML::Parser.get_msrun_index command
31
+ # via the MS::MzXML::Parser.get_msrun_index command
36
32
  def set_from_index_file(index_file)
37
33
  self.basename_noext = index_file
38
34
  @scans_by_num = []
@@ -41,7 +37,7 @@ class Spec::MSRunIndex
41
37
  next if line !~ /\d/ || line =~ /^#/
42
38
  line.chomp!
43
39
  arr = line.split(" ")
44
- scan = Spec::Scan.new(arr[1].to_i, arr[0].to_i, arr[2].to_f)
40
+ scan = MS::Scan.new(arr[1].to_i, arr[0].to_i, arr[2].to_f)
45
41
  if scan.ms_level > 1
46
42
  scan.prec_mz = arr[3].to_f
47
43
  scan.prec_inten = arr[4].to_f
@@ -49,7 +45,7 @@ class Spec::MSRunIndex
49
45
  @scans_by_num[scan.num] = scan
50
46
  end
51
47
  end
52
- Spec::Scan.add_parent_scan(@scans_by_num)
48
+ MS::Scan.add_parent_scan(@scans_by_num)
53
49
  end
54
50
 
55
51
  # Takes a .mzXML file or .timeIndex file (currently)
@@ -73,7 +69,7 @@ class Spec::MSRunIndex
73
69
  # returns a new
74
70
  def set_from_mzxml(file)
75
71
  self.basename_noext = file
76
- @scans_by_num = Spec::MzXML::Parser.new.scans_by_num(file)
72
+ @scans_by_num = MS::Parser.new(file, :scans_by_num).parse(file)
77
73
  end
78
74
 
79
75
  # writes the index to filename
@@ -109,33 +105,4 @@ class Spec::MSRunIndex
109
105
  end
110
106
 
111
107
 
112
- class Spec::MSRun
113
-
114
- # scan_count is an array [0] is all the scans, [1] is mslevel 1, [2] is mslevel 2, etc
115
- attr_accessor :scan_count, :start_time, :end_time, :start_mz, :end_mz
116
-
117
- # returns an array indexed by scan number where the precursor mz is recorded
118
- # for each fragment (ms2) ion
119
- # The precursor mz will be a String!
120
- def self.precursor_mz_by_scan(file)
121
- extname = File.extname(file)
122
- case extname
123
- when '.mzXML' || '.timeIndex'
124
- klass = Spec::MzXML::Parser
125
- when '.xml'
126
- klass = Spec::MzData::Parser
127
- when '' # they want us to figure out the right extension
128
- if File.exist? file + '.xml'
129
- klass = Spec::MzData::Parser
130
- else
131
- # This will cover .timeIndex, .mzXML and .RAW
132
- klass = Spec::MzXML::Parser
133
- end
134
- else
135
- abort "files of extension #{extname} are not currently supported"
136
- end
137
- klass.new.precursor_mz_by_scan(file)
138
- end
139
-
140
- end
141
108
 
@@ -0,0 +1,12 @@
1
+ require 'ms/parser/mzdata/dom'
2
+
3
+ class MS::Parser::MzData::AXML < MS::Parser::MzData::DOM
4
+ def get_root_node_from_file(file)
5
+ ::AXML.parse_file(file)
6
+ end
7
+ end
8
+
9
+
10
+
11
+
12
+