mspire 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -1,449 +0,0 @@
1
- require 'spec/msrun'
2
-
3
- begin
4
- require 'xmlparser'
5
- rescue LoadError
6
- puts "*******************************************************************"
7
- puts "WARNING: 'xmlparser' library not installed:"
8
- puts "Defaulting to REXML (slower, but guaranteed to parse correct xml)"
9
- puts "Use :parse_type => 'regex' for faster (but not guaranteed) parsing"
10
- puts "Or install 'xmlparser'!"
11
- puts "*******************************************************************"
12
- end
13
- begin
14
- $XMLParserClass = XMLParser
15
- rescue NameError
16
- $XMLParserClass = Object
17
- end
18
-
19
- require 'spec/mzxml'
20
-
21
- require 'rexml/document'
22
- require 'rexml/streamlistener'
23
-
24
- module Spec::MzXML::REXMLStreamListener; end
25
- module Spec::MzXML::PrecMzByNum; end
26
-
27
- # for REXML
28
- class Spec::MzXML::REXMLStreamListener::PrecMzByNum
29
- include REXML::StreamListener
30
-
31
- attr_accessor :prec_mz
32
-
33
- def initialize
34
- @prec_mz = []
35
- @scan_num = nil
36
- @get_data = false
37
- end
38
-
39
- def tag_start(name,attrs)
40
- if name == "scan"
41
- @scan_num = attrs["num"].to_i
42
- elsif name == "precursorMz"
43
- @get_data = true
44
- end
45
- end
46
-
47
- def tag_end(name)
48
- if name == "precursorMz"
49
- @get_data = false
50
- end
51
- end
52
-
53
- def text(txt)
54
- if @get_data
55
- @prec_mz[@scan_num] = txt
56
- end
57
- end
58
-
59
- end
60
-
61
- module Spec::MzXML::XMLParser; end
62
-
63
- class Spec::MzXML::XMLParser::PrecMzByNum < $XMLParserClass
64
- @@scan_num = nil
65
- @@get_data = false
66
-
67
- attr_accessor :prec_mz
68
- def initialize
69
- @prec_mz = []
70
- end
71
-
72
- def startElement(name,attrs)
73
- if name == "scan"
74
- @@scan_num = attrs["num"].to_i
75
- elsif name == "precursorMz"
76
- @prec_mz[@@scan_num] = ""
77
- @@get_data = true
78
- end
79
- end
80
-
81
- def endElement(name)
82
- if name == "precursorMz"
83
- @@get_data = false
84
- end
85
- end
86
-
87
- def character(data)
88
- if @@get_data
89
- @prec_mz[@@scan_num] << data
90
- end
91
- end
92
-
93
- end
94
-
95
-
96
- # Returns parallel arrays (times, spectra) where each spectra is an array
97
- # containing alternating mz and intensity (MS1 scans only)
98
- # and times are strings with the time in seconds
99
- class Spec::MzXML::XMLParser::TimesAndSpectra < $XMLParserClass
100
- include Spec::MzXML
101
- @@get_data = false
102
- @@get_peaks = false
103
- @@precision = 32 # @TODO: set dynamic
104
-
105
- attr_accessor :times, :spectra
106
- def times_and_spectra
107
- [@times, @spectra]
108
- end
109
-
110
- def initialize(ms_level=1)
111
- @ms_level = "#{ms_level}"
112
- @times = []
113
- @spectra = []
114
- end
115
-
116
- def startElement(name,attrs)
117
- if name == "scan" && attrs["msLevel"] == @ms_level
118
- @times << attrs["retentionTime"][2...-1] # strip PT and S: "PTx.xxxxS"
119
- @@get_peaks = true
120
- elsif name == "peaks" && @@get_peaks
121
- @@get_data = true
122
- @data = ""
123
- end
124
- end
125
-
126
- def character(data)
127
- if @@get_data
128
- @data << data
129
- end
130
- end
131
-
132
- def endElement(name)
133
- if name == "peaks" && @@get_peaks
134
- @spectra << base64_peaks_to_array(@data, @@precision)
135
- @@get_data = false
136
- @@get_peaks = false
137
- end
138
- end
139
-
140
- end
141
-
142
-
143
- class Spec::MzXML::Regexp
144
- @@scan_re = /<scan.*?num="(\d+)"(.*?)<\/scan>/mo
145
- def self.precursor_mz_and_intensity_by_scan(file)
146
- prec_re = /msLevel="2".*?<precursorMz precursorIntensity="([\d\.]+)".*?>([\d\.]+)<\/precursorMz>/mo
147
- self.by_scan_num(file, prec_re) {|match_obj| match_obj.captures.reverse}
148
- end
149
-
150
- # (array will likely start at 1!)
151
- def self.by_scan_num(file, regex)
152
- arr = []
153
- File.open(file) do |fh|
154
- string = fh.read
155
- matches = string.scan(@@scan_re)
156
- matches.each do |matched|
157
- if inner_match = regex.match(matched[1])
158
- index = matched[0].to_i
159
- arr[index] = yield(inner_match)
160
- end
161
- end
162
- end
163
- arr
164
- end
165
-
166
- # Returns array where array[scan_num] = precursorMz
167
- # Parent scans are not arrayed
168
- # Values are strings. Array index likely starts at 1!
169
- # @TODO: replace the use of a yield block
170
- def self.precursor_mz_by_scan(file)
171
- prec_re = /msLevel="2".*?<precursorMz.*?>([\d\.]+)<\/precursorMz>/mo
172
- self.by_scan_num(file, prec_re) {|match_obj| match_obj.captures[0]}
173
- end
174
-
175
- end
176
-
177
-
178
- class Spec::MzXML::Parser
179
-
180
- def default_parser
181
- xmlparser = false
182
- $".each do |lib|
183
- if lib =~ /xmlparser/
184
- xmlparser = true
185
- end
186
- end
187
- if xmlparser
188
- return "xmlparser"
189
- else
190
- return "rexml"
191
- end
192
- end
193
-
194
- def initialize(file=nil, parse_type=:parse, *args)
195
- if file
196
- send(parse_type, file, *args)
197
- end
198
- end
199
-
200
- # Parse into a complete object structure (REXML??)
201
- def parse(file)
202
- # @TODO: write complete parser
203
- puts "need to write this guy!!!!"
204
- exit
205
- end
206
-
207
- # returns: [times_arr, [m/z,inten,m/z,inten...]]
208
- # where times are time strings (in seconds)
209
- def times_and_spectra(file)
210
- parser = Spec::MzXML::XMLParser::TimesAndSpectra.new
211
- parser.parse(IO.read(file))
212
- parser.times_and_spectra
213
- end
214
-
215
- # Returns an array of scans indexed by scan number
216
- # NOTE that the first scan (zero indexed) will likely be nil!
217
- # accepts an optional parse_type = 'xmlparser' | 'rexml'
218
- def scans_by_num(mzXML_file, parse_type=nil)
219
- unless parse_type
220
- parse_type = default_parser
221
- end
222
- scans = []
223
- case parse_type
224
- when 'xmlparser'
225
- parser = Spec::MzXML::XMLParser::TimeMzIntenIndexer.new
226
- parser.parse(IO.read(mzXML_file))
227
- scans = parser.scans_by_num
228
- when 'rexml' # use REXML
229
- # This is really too slow for files of this size
230
- doc = REXML::Document.new File.new(mzXML_file)
231
- doc.elements.each('msRun/scan') do |scan|
232
- rt = scan.attributes['retentionTime'] ## like PT0.154000S"
233
- level = scan.attributes['msLevel']
234
- to_print = []
235
- prec_mz = nil
236
- prec_int = nil
237
- if level.to_i != 1
238
- scan.elements.each("precursorMz") do |prec|
239
- prec_mz = prec.text.to_f
240
- prec_int = prec.attributes["precursorIntensity"].to_f
241
- end
242
- end
243
- # remove the leading PT and trailing S on the retention time!
244
- rt = rt[2...-1]
245
-
246
- num = scan.attributes['num'].to_i
247
- scans[num] = Spec::Scan.new(num, scan.attributes['msLevel'].to_i, rt.to_f, prec_mz, prec_int)
248
- end #doc.elements
249
- else
250
- throw ArgumentError, "invalid parse type: #{parse_type}"
251
- end
252
- ## update the scans for parents
253
- Spec::Scan.add_parent_scan(scans)
254
- scans
255
- end
256
-
257
- # Returns a Hash indexed by filename (with no extension) for a given path
258
- # extension = glob (string) or regex
259
- # The basename is given as: file.split('.').first
260
- def precursor_mz_by_scan_for_path(path, extension, parse_type=nil)
261
- hash = {}
262
- Dir.chdir path do
263
- files = []
264
- if extension.class == String
265
- files = Dir[extension]
266
- elsif extension.class == Regexp
267
- files = Dir.entries(".").find_all do |dir|
268
- dir =~ extension
269
- end
270
- else
271
- puts "extension: #{extension} not a String or Regexp!"
272
- end
273
- files.each do |file|
274
- base = file.split('.').first
275
- hash[base] = precursor_mz_by_scan(file, parse_type)
276
- end
277
- end
278
- hash
279
- end
280
-
281
- # Returns hash where hash[scan_num] = [precursorMz, precursorIntensity]
282
- # Parent scans are not hashed
283
- # Keys and values are both strings
284
- def precursor_mz_and_inten_by_scan(file)
285
- # in progress
286
- end
287
-
288
- def get_prec_mz_by_scan_for_time_index(file)
289
- index = Spec::MSRunIndex.new(file)
290
- prec_mz_by_scan = index.scans_by_num.collect do |scan|
291
- if scan ; scan.prec_mz
292
- else ; nil
293
- end
294
- end
295
- prec_mz_by_scan
296
- end
297
-
298
- # Returns array where array[scan_num] = precursorMz
299
- # Parent scans are not arrayed
300
- # Values are strings. Array index likely starts at 1!
301
- # parse_type = "regex" | "rexml" | "xmlparser"
302
- # also takes a MSRunIndex file (terminates with '.timeIndex')
303
- # also takes .RAW or .raw files and converts them to mzXML using
304
- # Spec::MzXML::MZXML_CONVERTER
305
- # also takes a file without an extension, in which case tests to see if the
306
- # index file exists, then the .mzXML file, then .RAW/.raw (and converts)
307
- def precursor_mz_by_scan(file, parse_type=nil)
308
- # If given a time index file:
309
-
310
- if File.exist?(file + '.timeIndex')
311
- return get_prec_mz_by_scan_for_time_index(file + '.timeIndex')
312
- elsif File.exist?(file + '.mzXML.timeIndex')
313
- return get_prec_mz_by_scan_for_time_index(file + '.mzXML.timeIndex')
314
- elsif file =~ /\.timeIndex$/
315
- return get_prec_mz_by_scan_for_time_index(file)
316
- end
317
-
318
- file = Spec::MzXML.file_to_mzxml(file)
319
-
320
- unless parse_type then parse_type = default_parser end
321
- case parse_type
322
- when "xmlparser"
323
- ##XMLParser:
324
- parser = Spec::MzXML::XMLParser::PrecMzByNum.new
325
- File.open(file) do |fh|
326
- parser.parse(fh.read)
327
- end
328
- parser.prec_mz
329
- when "regex"
330
- Spec::MzXML::Regexp.precursor_mz_by_scan(file)
331
- when "rexml"
332
- listener = Spec::MzXML::REXMLStreamListener::PrecMzByNum.new
333
- REXML::Document.parse_stream(File.new(file), listener)
334
- listener.prec_mz
335
- else
336
- puts "Don't recognize parse_type: #{parse_type}"
337
- end
338
- end
339
-
340
- # Returns a hash of basic info on an mzXML run:
341
- # *mzXML_elemt* *hash keys (symbols)*
342
- # scanCount scan_count
343
- # startTime start_time
344
- # endTime end_time
345
- # startMz start_mz
346
- # endMz end_mz
347
- def basic_info(mzxml_file)
348
- puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}" if $VERBOSE
349
- hash = {}
350
- scan_count_tmp = []
351
- (1..5).to_a.each do |n| scan_count_tmp[n] = 0 end
352
- @fh = File.open(mzxml_file)
353
- @line = ""
354
- scan_count_tmp[0] = _el("scanCount").to_i
355
- hash[:start_time] = _el("startTime").sub(/^PT/, "").sub(/S$/,"").to_f
356
- hash[:end_time] = _el("endTime").sub(/^PT/, "").sub(/S$/,"").to_f
357
- hash[:ms_level] = _el("msLevel").to_i
358
- scan_count_tmp[1] = 1
359
- if hash[:ms_level] == 1
360
- hash[:start_mz] = _el("startMz").to_f
361
- hash[:end_mz] = _el("endMz").to_f
362
- end
363
-
364
- while !@fh.eof?
365
- @line = @fh.readline
366
- ms_level = _el("msLevel")
367
- if ms_level
368
- scan_count_tmp[ms_level.to_i] += 1
369
- else
370
- break
371
- end
372
- end
373
- scan_count = []
374
- scan_count_tmp.each do |cnt|
375
- if cnt != 0
376
- scan_count.push cnt
377
- else
378
- break
379
- end
380
- end
381
- hash[:scan_count] = scan_count
382
- @fh.close
383
- hash
384
- end
385
-
386
- # returns [start_mz, end_mz] of the first full scan (ms_level == 1)
387
- def start_and_end_mz(mzxml_file)
388
- @fh = File.open(mzxml_file)
389
- ms_level = 0
390
- @line = ""
391
- while ms_level != 1
392
- ms_level = _el("msLevel").to_i
393
- end
394
- start_mz = _el("startMz").to_f
395
- end_mz = _el("endMz").to_f
396
- @fh.close
397
- [start_mz, end_mz]
398
- end
399
-
400
- def _el(name)
401
- re = /#{name}="(.*)"/
402
- while @line !~ re && !@fh.eof?
403
- @line = @fh.readline
404
- end
405
- if $1
406
- return $1.dup
407
- else
408
- return nil
409
- end
410
- end
411
-
412
- end
413
-
414
- class Spec::MzXML::XMLParser::TimeMzIntenIndexer < XMLParser
415
-
416
- @@scan_num = nil
417
- @@get_data = false
418
-
419
- attr_accessor :scans_by_num
420
- def initialize
421
- @current_scan = nil
422
- @scans_by_num = []
423
- end
424
-
425
- def startElement(name,attrs)
426
- if name == "scan"
427
- num = attrs["num"].to_i
428
- @current_scan = Spec::Scan.new(num, attrs["msLevel"].to_i, attrs["retentionTime"].gsub(/^PT/,'').gsub(/S$/,'').to_f)
429
- scans_by_num[num] = @current_scan
430
- elsif name == "precursorMz"
431
- @current_scan.prec_inten = attrs["precursorIntensity"].to_f
432
- @@get_data = true
433
- end
434
- end
435
-
436
- def endElement(name)
437
- if name == "precursorMz"
438
- @@get_data = false
439
- end
440
- end
441
-
442
- def character(data)
443
- if @@get_data
444
- @current_scan.prec_mz = data
445
- end
446
- end
447
-
448
- end
449
-
data/lib/spec/scan.rb DELETED
@@ -1,55 +0,0 @@
1
-
2
- module Spec; end
3
-
4
- class Spec::Scan
5
-
6
- attr_accessor :time, :ms_level, :num, :prec_mz, :prec_inten, :parent
7
- def initialize(num=nil, ms_level=nil, time=nil, prec_mz=nil, prec_inten=nil, parent=nil)
8
- @num = num
9
- @ms_level = ms_level
10
- @time = time
11
- if prec_mz then @prec_mz = prec_mz end
12
- if prec_inten then @prec_inten = prec_inten end
13
- if parent then @parent = parent end
14
- end
15
-
16
- def to_s
17
- "<Scan num=#{@num} ms_level=#{@ms_level} time=#{@time}>"
18
- end
19
-
20
- # returns the string (space delimited): "ms_level num time [prec_mz prec_inten]"
21
- def to_index_file_string
22
- arr = [@ms_level, @num, @time]
23
- if prec_mz then arr << @prec_mz end
24
- if prec_inten then arr << @prec_inten end
25
- arr.join(" ")
26
- end
27
-
28
- # adds the attribute parent to each scan with a parent
29
- # (level 1 = no parent; level 2 = prev level 1, etc.
30
- def self.add_parent_scan(scans)
31
- prev_scan = nil
32
- parent_stack = [nil]
33
- ## we want to set the level to be the first mslevel we come to
34
- prev_level = 1
35
- scans.each do |scan|
36
- if scan then prev_level = scan.ms_level; break; end
37
- end
38
- scans.each do |scan|
39
- next unless scan ## the first one is nil, (others?)
40
- level = scan.ms_level
41
- if prev_level < level
42
- parent_stack.unshift prev_scan
43
- end
44
- if prev_level > level
45
- (prev_level - level).times do parent_stack.shift end
46
- end
47
- scan.parent = parent_stack.first
48
- prev_level = level
49
- prev_scan = scan
50
- end
51
- end
52
-
53
- end
54
-
55
-