mspire 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -1,449 +0,0 @@
1
- require 'spec/msrun'
2
-
3
- begin
4
- require 'xmlparser'
5
- rescue LoadError
6
- puts "*******************************************************************"
7
- puts "WARNING: 'xmlparser' library not installed:"
8
- puts "Defaulting to REXML (slower, but guaranteed to parse correct xml)"
9
- puts "Use :parse_type => 'regex' for faster (but not guaranteed) parsing"
10
- puts "Or install 'xmlparser'!"
11
- puts "*******************************************************************"
12
- end
13
- begin
14
- $XMLParserClass = XMLParser
15
- rescue NameError
16
- $XMLParserClass = Object
17
- end
18
-
19
- require 'spec/mzxml'
20
-
21
- require 'rexml/document'
22
- require 'rexml/streamlistener'
23
-
24
- module Spec::MzXML::REXMLStreamListener; end
25
- module Spec::MzXML::PrecMzByNum; end
26
-
27
- # for REXML
28
- class Spec::MzXML::REXMLStreamListener::PrecMzByNum
29
- include REXML::StreamListener
30
-
31
- attr_accessor :prec_mz
32
-
33
- def initialize
34
- @prec_mz = []
35
- @scan_num = nil
36
- @get_data = false
37
- end
38
-
39
- def tag_start(name,attrs)
40
- if name == "scan"
41
- @scan_num = attrs["num"].to_i
42
- elsif name == "precursorMz"
43
- @get_data = true
44
- end
45
- end
46
-
47
- def tag_end(name)
48
- if name == "precursorMz"
49
- @get_data = false
50
- end
51
- end
52
-
53
- def text(txt)
54
- if @get_data
55
- @prec_mz[@scan_num] = txt
56
- end
57
- end
58
-
59
- end
60
-
61
- module Spec::MzXML::XMLParser; end
62
-
63
- class Spec::MzXML::XMLParser::PrecMzByNum < $XMLParserClass
64
- @@scan_num = nil
65
- @@get_data = false
66
-
67
- attr_accessor :prec_mz
68
- def initialize
69
- @prec_mz = []
70
- end
71
-
72
- def startElement(name,attrs)
73
- if name == "scan"
74
- @@scan_num = attrs["num"].to_i
75
- elsif name == "precursorMz"
76
- @prec_mz[@@scan_num] = ""
77
- @@get_data = true
78
- end
79
- end
80
-
81
- def endElement(name)
82
- if name == "precursorMz"
83
- @@get_data = false
84
- end
85
- end
86
-
87
- def character(data)
88
- if @@get_data
89
- @prec_mz[@@scan_num] << data
90
- end
91
- end
92
-
93
- end
94
-
95
-
96
- # Returns parallel arrays (times, spectra) where each spectra is an array
97
- # containing alternating mz and intensity (MS1 scans only)
98
- # and times are strings with the time in seconds
99
- class Spec::MzXML::XMLParser::TimesAndSpectra < $XMLParserClass
100
- include Spec::MzXML
101
- @@get_data = false
102
- @@get_peaks = false
103
- @@precision = 32 # @TODO: set dynamic
104
-
105
- attr_accessor :times, :spectra
106
- def times_and_spectra
107
- [@times, @spectra]
108
- end
109
-
110
- def initialize(ms_level=1)
111
- @ms_level = "#{ms_level}"
112
- @times = []
113
- @spectra = []
114
- end
115
-
116
- def startElement(name,attrs)
117
- if name == "scan" && attrs["msLevel"] == @ms_level
118
- @times << attrs["retentionTime"][2...-1] # strip PT and S: "PTx.xxxxS"
119
- @@get_peaks = true
120
- elsif name == "peaks" && @@get_peaks
121
- @@get_data = true
122
- @data = ""
123
- end
124
- end
125
-
126
- def character(data)
127
- if @@get_data
128
- @data << data
129
- end
130
- end
131
-
132
- def endElement(name)
133
- if name == "peaks" && @@get_peaks
134
- @spectra << base64_peaks_to_array(@data, @@precision)
135
- @@get_data = false
136
- @@get_peaks = false
137
- end
138
- end
139
-
140
- end
141
-
142
-
143
- class Spec::MzXML::Regexp
144
- @@scan_re = /<scan.*?num="(\d+)"(.*?)<\/scan>/mo
145
- def self.precursor_mz_and_intensity_by_scan(file)
146
- prec_re = /msLevel="2".*?<precursorMz precursorIntensity="([\d\.]+)".*?>([\d\.]+)<\/precursorMz>/mo
147
- self.by_scan_num(file, prec_re) {|match_obj| match_obj.captures.reverse}
148
- end
149
-
150
- # (array will likely start at 1!)
151
- def self.by_scan_num(file, regex)
152
- arr = []
153
- File.open(file) do |fh|
154
- string = fh.read
155
- matches = string.scan(@@scan_re)
156
- matches.each do |matched|
157
- if inner_match = regex.match(matched[1])
158
- index = matched[0].to_i
159
- arr[index] = yield(inner_match)
160
- end
161
- end
162
- end
163
- arr
164
- end
165
-
166
- # Returns array where array[scan_num] = precursorMz
167
- # Parent scans are not arrayed
168
- # Values are strings. Array index likely starts at 1!
169
- # @TODO: replace the use of a yield block
170
- def self.precursor_mz_by_scan(file)
171
- prec_re = /msLevel="2".*?<precursorMz.*?>([\d\.]+)<\/precursorMz>/mo
172
- self.by_scan_num(file, prec_re) {|match_obj| match_obj.captures[0]}
173
- end
174
-
175
- end
176
-
177
-
178
- class Spec::MzXML::Parser
179
-
180
- def default_parser
181
- xmlparser = false
182
- $".each do |lib|
183
- if lib =~ /xmlparser/
184
- xmlparser = true
185
- end
186
- end
187
- if xmlparser
188
- return "xmlparser"
189
- else
190
- return "rexml"
191
- end
192
- end
193
-
194
- def initialize(file=nil, parse_type=:parse, *args)
195
- if file
196
- send(parse_type, file, *args)
197
- end
198
- end
199
-
200
- # Parse into a complete object structure (REXML??)
201
- def parse(file)
202
- # @TODO: write complete parser
203
- puts "need to write this guy!!!!"
204
- exit
205
- end
206
-
207
- # returns: [times_arr, [m/z,inten,m/z,inten...]]
208
- # where times are time strings (in seconds)
209
- def times_and_spectra(file)
210
- parser = Spec::MzXML::XMLParser::TimesAndSpectra.new
211
- parser.parse(IO.read(file))
212
- parser.times_and_spectra
213
- end
214
-
215
- # Returns an array of scans indexed by scan number
216
- # NOTE that the first scan (zero indexed) will likely be nil!
217
- # accepts an optional parse_type = 'xmlparser' | 'rexml'
218
- def scans_by_num(mzXML_file, parse_type=nil)
219
- unless parse_type
220
- parse_type = default_parser
221
- end
222
- scans = []
223
- case parse_type
224
- when 'xmlparser'
225
- parser = Spec::MzXML::XMLParser::TimeMzIntenIndexer.new
226
- parser.parse(IO.read(mzXML_file))
227
- scans = parser.scans_by_num
228
- when 'rexml' # use REXML
229
- # This is really too slow for files of this size
230
- doc = REXML::Document.new File.new(mzXML_file)
231
- doc.elements.each('msRun/scan') do |scan|
232
- rt = scan.attributes['retentionTime'] ## like PT0.154000S"
233
- level = scan.attributes['msLevel']
234
- to_print = []
235
- prec_mz = nil
236
- prec_int = nil
237
- if level.to_i != 1
238
- scan.elements.each("precursorMz") do |prec|
239
- prec_mz = prec.text.to_f
240
- prec_int = prec.attributes["precursorIntensity"].to_f
241
- end
242
- end
243
- # remove the leading PT and trailing S on the retention time!
244
- rt = rt[2...-1]
245
-
246
- num = scan.attributes['num'].to_i
247
- scans[num] = Spec::Scan.new(num, scan.attributes['msLevel'].to_i, rt.to_f, prec_mz, prec_int)
248
- end #doc.elements
249
- else
250
- throw ArgumentError, "invalid parse type: #{parse_type}"
251
- end
252
- ## update the scans for parents
253
- Spec::Scan.add_parent_scan(scans)
254
- scans
255
- end
256
-
257
- # Returns a Hash indexed by filename (with no extension) for a given path
258
- # extension = glob (string) or regex
259
- # The basename is given as: file.split('.').first
260
- def precursor_mz_by_scan_for_path(path, extension, parse_type=nil)
261
- hash = {}
262
- Dir.chdir path do
263
- files = []
264
- if extension.class == String
265
- files = Dir[extension]
266
- elsif extension.class == Regexp
267
- files = Dir.entries(".").find_all do |dir|
268
- dir =~ extension
269
- end
270
- else
271
- puts "extension: #{extension} not a String or Regexp!"
272
- end
273
- files.each do |file|
274
- base = file.split('.').first
275
- hash[base] = precursor_mz_by_scan(file, parse_type)
276
- end
277
- end
278
- hash
279
- end
280
-
281
- # Returns hash where hash[scan_num] = [precursorMz, precursorIntensity]
282
- # Parent scans are not hashed
283
- # Keys and values are both strings
284
- def precursor_mz_and_inten_by_scan(file)
285
- # in progress
286
- end
287
-
288
- def get_prec_mz_by_scan_for_time_index(file)
289
- index = Spec::MSRunIndex.new(file)
290
- prec_mz_by_scan = index.scans_by_num.collect do |scan|
291
- if scan ; scan.prec_mz
292
- else ; nil
293
- end
294
- end
295
- prec_mz_by_scan
296
- end
297
-
298
- # Returns array where array[scan_num] = precursorMz
299
- # Parent scans are not arrayed
300
- # Values are strings. Array index likely starts at 1!
301
- # parse_type = "regex" | "rexml" | "xmlparser"
302
- # also takes a MSRunIndex file (terminates with '.timeIndex')
303
- # also takes .RAW or .raw files and converts them to mzXML using
304
- # Spec::MzXML::MZXML_CONVERTER
305
- # also takes a file without an extension, in which case tests to see if the
306
- # index file exists, then the .mzXML file, then .RAW/.raw (and converts)
307
- def precursor_mz_by_scan(file, parse_type=nil)
308
- # If given a time index file:
309
-
310
- if File.exist?(file + '.timeIndex')
311
- return get_prec_mz_by_scan_for_time_index(file + '.timeIndex')
312
- elsif File.exist?(file + '.mzXML.timeIndex')
313
- return get_prec_mz_by_scan_for_time_index(file + '.mzXML.timeIndex')
314
- elsif file =~ /\.timeIndex$/
315
- return get_prec_mz_by_scan_for_time_index(file)
316
- end
317
-
318
- file = Spec::MzXML.file_to_mzxml(file)
319
-
320
- unless parse_type then parse_type = default_parser end
321
- case parse_type
322
- when "xmlparser"
323
- ##XMLParser:
324
- parser = Spec::MzXML::XMLParser::PrecMzByNum.new
325
- File.open(file) do |fh|
326
- parser.parse(fh.read)
327
- end
328
- parser.prec_mz
329
- when "regex"
330
- Spec::MzXML::Regexp.precursor_mz_by_scan(file)
331
- when "rexml"
332
- listener = Spec::MzXML::REXMLStreamListener::PrecMzByNum.new
333
- REXML::Document.parse_stream(File.new(file), listener)
334
- listener.prec_mz
335
- else
336
- puts "Don't recognize parse_type: #{parse_type}"
337
- end
338
- end
339
-
340
- # Returns a hash of basic info on an mzXML run:
341
- # *mzXML_elemt* *hash keys (symbols)*
342
- # scanCount scan_count
343
- # startTime start_time
344
- # endTime end_time
345
- # startMz start_mz
346
- # endMz end_mz
347
- def basic_info(mzxml_file)
348
- puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}" if $VERBOSE
349
- hash = {}
350
- scan_count_tmp = []
351
- (1..5).to_a.each do |n| scan_count_tmp[n] = 0 end
352
- @fh = File.open(mzxml_file)
353
- @line = ""
354
- scan_count_tmp[0] = _el("scanCount").to_i
355
- hash[:start_time] = _el("startTime").sub(/^PT/, "").sub(/S$/,"").to_f
356
- hash[:end_time] = _el("endTime").sub(/^PT/, "").sub(/S$/,"").to_f
357
- hash[:ms_level] = _el("msLevel").to_i
358
- scan_count_tmp[1] = 1
359
- if hash[:ms_level] == 1
360
- hash[:start_mz] = _el("startMz").to_f
361
- hash[:end_mz] = _el("endMz").to_f
362
- end
363
-
364
- while !@fh.eof?
365
- @line = @fh.readline
366
- ms_level = _el("msLevel")
367
- if ms_level
368
- scan_count_tmp[ms_level.to_i] += 1
369
- else
370
- break
371
- end
372
- end
373
- scan_count = []
374
- scan_count_tmp.each do |cnt|
375
- if cnt != 0
376
- scan_count.push cnt
377
- else
378
- break
379
- end
380
- end
381
- hash[:scan_count] = scan_count
382
- @fh.close
383
- hash
384
- end
385
-
386
- # returns [start_mz, end_mz] of the first full scan (ms_level == 1)
387
- def start_and_end_mz(mzxml_file)
388
- @fh = File.open(mzxml_file)
389
- ms_level = 0
390
- @line = ""
391
- while ms_level != 1
392
- ms_level = _el("msLevel").to_i
393
- end
394
- start_mz = _el("startMz").to_f
395
- end_mz = _el("endMz").to_f
396
- @fh.close
397
- [start_mz, end_mz]
398
- end
399
-
400
- def _el(name)
401
- re = /#{name}="(.*)"/
402
- while @line !~ re && !@fh.eof?
403
- @line = @fh.readline
404
- end
405
- if $1
406
- return $1.dup
407
- else
408
- return nil
409
- end
410
- end
411
-
412
- end
413
-
414
- class Spec::MzXML::XMLParser::TimeMzIntenIndexer < XMLParser
415
-
416
- @@scan_num = nil
417
- @@get_data = false
418
-
419
- attr_accessor :scans_by_num
420
- def initialize
421
- @current_scan = nil
422
- @scans_by_num = []
423
- end
424
-
425
- def startElement(name,attrs)
426
- if name == "scan"
427
- num = attrs["num"].to_i
428
- @current_scan = Spec::Scan.new(num, attrs["msLevel"].to_i, attrs["retentionTime"].gsub(/^PT/,'').gsub(/S$/,'').to_f)
429
- scans_by_num[num] = @current_scan
430
- elsif name == "precursorMz"
431
- @current_scan.prec_inten = attrs["precursorIntensity"].to_f
432
- @@get_data = true
433
- end
434
- end
435
-
436
- def endElement(name)
437
- if name == "precursorMz"
438
- @@get_data = false
439
- end
440
- end
441
-
442
- def character(data)
443
- if @@get_data
444
- @current_scan.prec_mz = data
445
- end
446
- end
447
-
448
- end
449
-
data/lib/spec/scan.rb DELETED
@@ -1,55 +0,0 @@
1
-
2
- module Spec; end
3
-
4
- class Spec::Scan
5
-
6
- attr_accessor :time, :ms_level, :num, :prec_mz, :prec_inten, :parent
7
- def initialize(num=nil, ms_level=nil, time=nil, prec_mz=nil, prec_inten=nil, parent=nil)
8
- @num = num
9
- @ms_level = ms_level
10
- @time = time
11
- if prec_mz then @prec_mz = prec_mz end
12
- if prec_inten then @prec_inten = prec_inten end
13
- if parent then @parent = parent end
14
- end
15
-
16
- def to_s
17
- "<Scan num=#{@num} ms_level=#{@ms_level} time=#{@time}>"
18
- end
19
-
20
- # returns the string (space delimited): "ms_level num time [prec_mz prec_inten]"
21
- def to_index_file_string
22
- arr = [@ms_level, @num, @time]
23
- if prec_mz then arr << @prec_mz end
24
- if prec_inten then arr << @prec_inten end
25
- arr.join(" ")
26
- end
27
-
28
- # adds the attribute parent to each scan with a parent
29
- # (level 1 = no parent; level 2 = prev level 1, etc.
30
- def self.add_parent_scan(scans)
31
- prev_scan = nil
32
- parent_stack = [nil]
33
- ## we want to set the level to be the first mslevel we come to
34
- prev_level = 1
35
- scans.each do |scan|
36
- if scan then prev_level = scan.ms_level; break; end
37
- end
38
- scans.each do |scan|
39
- next unless scan ## the first one is nil, (others?)
40
- level = scan.ms_level
41
- if prev_level < level
42
- parent_stack.unshift prev_scan
43
- end
44
- if prev_level > level
45
- (prev_level - level).times do parent_stack.shift end
46
- end
47
- scan.parent = parent_stack.first
48
- prev_level = level
49
- prev_scan = scan
50
- end
51
- end
52
-
53
- end
54
-
55
-