mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,253 +0,0 @@
1
-
2
- require 'xml_style_parser'
3
- require 'ms/spectrum'
4
- require 'ms/scan'
5
-
6
-
7
- class MS::Parser::MzXML::Hpricot
8
- include XMLStyleParser
9
- include MS::Parser::MzXML
10
-
11
- @@scan_atts = %w(num msLevel retentionTime startMz endMz precursor spectrum)
12
-
13
- def initialize(parse_type=:msrun, version='1.0')
14
- @method = parse_type
15
- @version = version
16
- end
17
-
18
- def new_scan_from_hash(node)
19
- scan = MS::Scan.new # array class creates one with 9 positions
20
- scan[0] = node['num'].to_i
21
- scan[1] = node['msLevel'].to_i
22
- scan[2] = node['retentionTime'][2...-1].to_f
23
- if x = node['startMz']
24
- scan[3] = x.to_f
25
- scan[4] = node['endMz'].to_f
26
- end
27
- scan
28
- end
29
-
30
- # takes a scan node and creates a scan object
31
- # the parent scan is the one directly above it in mslevel
32
- # if the
33
- def create_scan(scan_n, scans_by_num, get_spectra=true)
34
- if @version < '3.0'
35
- scan = new_scan_from_hash(scan_n)
36
- precs = []
37
- scan_n.each_child do |node|
38
- case node.name
39
- when 'precursorMz'
40
- # should be able to do this!!!
41
- #scan[5] = scan_n.find('child::precursorMz').map do |prec_n|
42
- prec = MS::Precursor.new
43
- prec[1] = node['precursorIntensity'].to_f
44
- prec[0] = node.content.to_f
45
- if x = node['precursorScanNum']
46
- prec[2] = scans_by_num[x.to_i]
47
- end
48
- precs << prec
49
- when 'peaks'
50
- next unless get_spectra
51
- # SHOULD be able to do this!!
52
- #peaks_n = scan_n.find_first('child::peaks')
53
- scan[6] = MS::Spectrum.from_base64_peaks(node.content, node['precision'].to_i)
54
- end
55
- end
56
- scan[5] = precs
57
- scan
58
- else # for version > 3.0
59
- abort 'not supporting version 3.0 just yet'
60
- # note that mzXML version 3.0 *can* have more than one peak...
61
- # I'm not sure how to deal with that since I have one spectrum/scan
62
- end
63
- end
64
-
65
-
66
- # returns an array of msrun objects
67
- def msruns(file)
68
- raise NotImplementedError
69
- end
70
-
71
- # returns a string with double </scan></scan> tags into single and missing
72
- # </scan> tags after peaks added in
73
- # we do this in windows style since these are generated off a windows
74
- # machine only
75
- def fix_bad_scan_tags(file)
76
- IO.read(file).gsub(/<\/scan>\s+<\/scan>/m, '</scan>').gsub(/<\/peaks>\s+<scan/m, "</peaks>\r\n </scan>\r\n <scan")
77
- end
78
-
79
- # right now cannot parse multiple runs out of an mzXML version 2 file since
80
- # this is built around a single run per file
81
- # OPTIONS:
82
- # :msrun => MSRun # use this object instead of creating one
83
- # :spectra => *true|false # if false don't get spectra
84
- def msrun(file, opts={})
85
- unless opts.key?(:spectra)
86
- opts[:spectra] = true
87
- end
88
-
89
- msrun_obj =
90
- if x = opts[:msrun]
91
- msrun_obj = x
92
- else
93
- MS::MSRun.new
94
- end
95
-
96
- doc = File.open(file) {|fh| ::Hpricot.XML(fh) }
97
- #if @version == '2.0'
98
- # # may not be necessary in hpricot!
99
- # #string = fix_bad_scan_tags(file)
100
- # #XML::Parser.string(string).parse
101
- #else
102
- # XML::Document.file(file)
103
- #end
104
- msrun_n = doc.at('msRun')
105
-
106
- ## HEADER
107
- scan_count = msrun_n['scanCount'].to_i
108
- msrun_obj.scan_count = scan_count
109
- scans_by_num = Array.new(scan_count + 1)
110
-
111
- ## SPECTRUM
112
- parent = nil
113
- scans = Array.new( scan_count )
114
- scn_index = 0
115
-
116
- # we should be able to do this, but it's not working!!!
117
- #scan_n = msrun_n.find_first('scan')
118
- #while (scn_index < scan_count)
119
- get_spectra = opts[:spectra]
120
-
121
- msrun_n.each_child do |scan_n|
122
- p scan_n
123
- abort
124
-
125
- next unless scan_n.name == 'scan'
126
- scan = create_scan(scan_n, scans_by_num, get_spectra)
127
- scans[scn_index] = scan
128
- sc = scan_n.next
129
- scans_by_num[scan[0]] = scan
130
- scn_index += 1
131
- end
132
-
133
-
134
- ## update the scan's parents
135
- MS::MSRun.add_parent_scan(scans)
136
-
137
- # note that startTime and endTime are optional AND in >2.2 are dateTime
138
- # instead of duration types!, so we will just use scan times...
139
- # Also, note that startTime and endTime are BROKEN on readw -> mzXML 2.0
140
- # export. They give the start and end time in seconds, but they are
141
- # really minutes. All the more reason to use the first and last scans!
142
- msrun_obj.start_time = scans.first.time
143
- msrun_obj.end_time = scans.last.time
144
-
145
- msrun_obj.scans = scans
146
- end
147
-
148
- end
149
-
150
-
151
-
152
- =begin
153
- ## THIS IS THE SAX PARSER VERSION. IT NEEDS A BIT OF BRUSH UP AND IT WOULD
154
- ## WORK. I THINK THE default guy is probably faster
155
-
156
- def msrun(file, msrun_obj)
157
- # Figure out where the first scan is at in the file:
158
- pos_after_first_scan = nil
159
- File.open(file) do |fh|
160
- fh.each do |line|
161
- if line =~ /<scan/
162
- pos_after_first_scan = fh.pos
163
- end
164
- end
165
- end
166
-
167
- # Get only the header:
168
- header_string = IO.read(file, pos_after_first_scan)
169
-
170
- @msrun_obj = msrun_obj
171
- # Parse out the header info:
172
- parser = XML::SaxParser.new
173
- parser.string = header_string
174
- parser.on_start_element do |name, attrs|
175
- if name == 'msRun'
176
- @msrun_obj.scan_count = attrs['scanCount'].to_i
177
- @msrun_obj.start_time = attrs['startTime'][2...-1].to_f
178
- @msrun_obj.end_time = attrs['endTime'][2...-1].to_f
179
- end
180
- end
181
- parser.parse
182
-
183
-
184
- # Parse the scans out:
185
- scan_st = 'scan'
186
- prec_st = 'precursorMz'
187
- peaks_st = 'peaks'
188
- prec_inten_st = 'precursorIntensity'
189
- precision_st = 'precision'
190
-
191
- #parser = MS::Parser::MzXML::Hpricot::SaxParser::MSRun.new
192
- parser = XML::SaxParser.new
193
- parser.filename = file
194
- parser.on_start_document do
195
- @scans = []
196
- @current_scan = nil
197
- @get_peaks = false
198
- @get_prec_mz = false
199
- end
200
-
201
- parser.on_characters do |chars|
202
- if @get_peaks
203
- @get_peaks << chars
204
- elsif @get_prec_mz
205
- @get_prec_mz << chars
206
- end
207
- end
208
-
209
- parser.on_end_element do |el|
210
- case el
211
- when 'peaks'
212
- @current_scan.spectrum = Spectrum.from_base64_peaks(@get_peaks, @precision, true)
213
- @get_peaks = false
214
- when 'precursorMz'
215
- @current_scan[5] = [Precursor.new([@get_prec_mz.to_f])]
216
- @get_prec_mz = false
217
- end
218
- end
219
-
220
- parser.on_start_element do |name, attr_hash|
221
- case name
222
- when scan_st
223
- @current_scan = new_scan_from_hash(attr_hash)
224
- sz = @scans.size
225
- @scans << @current_scan
226
- when prec_st
227
- @current_scan[5].first[1] = attr_hash[prec_inten_st].to_f
228
- @get_prec_mz = ''
229
- when peaks_st
230
- @precision = attr_hash[precision_st].to_i
231
- case @version[0,1].to_ip
232
- when 3
233
- if ch['pairOrder'] != 'm/z-int' # only version 3.0 has others
234
- abort "cannot yet read anything but 'm/z-int' pair order"
235
- end
236
- end
237
- @get_peaks = ''
238
- end
239
- end
240
- parser.parse
241
-
242
- @msrun_obj.scans = @scans
243
- @msrun_obj.scans.each_with_index do |sc,i|
244
- if sc.spectrum.mz == nil
245
- abort "INDEX: #{i}"
246
- end
247
- end
248
- @msrun_obj
249
- end
250
- =end
251
-
252
-
253
-
@@ -1,19 +0,0 @@
1
-
2
- require 'ms/parser/mzxml/dom'
3
-
4
- class MS::Parser::MzXML::LibXML < MS::Parser::MzXML::DOM
5
- def get_root_node_from_string(string)
6
- XML::Parser.string(string).parse.root
7
- end
8
- def get_root_node_from_file(file)
9
- XML::Parser.filename(file).parse.root
10
- end
11
- def get_root_node_from_io(io)
12
- XML::Parser.io(io).parse.root
13
- end
14
-
15
- end
16
-
17
-
18
-
19
-
@@ -1,122 +0,0 @@
1
- require 'strscan'
2
-
3
- module MS::Parser::MzXML ; end
4
-
5
- class MS::Parser::MzXML::Regexp
6
- @@first_scan_regexp = /<scan /o
7
- include MS::Parser::MzXML
8
-
9
- def initialize(method=:msrun, version='1.0')
10
- @method = method
11
- @version = version
12
- end
13
-
14
- def parse(file)
15
- send(@method, file)
16
- end
17
-
18
- # returns a MS::MsRun Object
19
- def msrun(file)
20
- fh = File.open(file)
21
- get_header(fh)
22
-
23
- fh.close
24
- end
25
-
26
- #def msrun(file, opts={})
27
- #end
28
-
29
- @@scan_re = /<scan.*?num="(\d+)"(.*?)<\/scan>/mo
30
- def self.precursor_mz_and_intensity_by_scan(file)
31
- prec_re = /msLevel="2".*?<precursorMz precursorIntensity="([\d\.]+)".*?>([\d\.]+)<\/precursorMz>/mo
32
- self.by_scan_num(file, prec_re) {|match_obj| match_obj.captures.reverse}
33
- end
34
-
35
- # (array will likely start at 1!)
36
- def self.by_scan_num(file, regex)
37
- arr = []
38
- File.open(file) do |fh|
39
- string = fh.read
40
- matches = string.scan(@@scan_re)
41
- matches.each do |matched|
42
- if inner_match = regex.match(matched[1])
43
- index = matched[0].to_i
44
- arr[index] = yield(inner_match)
45
- end
46
- end
47
- end
48
- arr
49
- end
50
-
51
- # Returns array where array[scan_num] = precursorMz
52
- # Parent scans armme not arrayed
53
- # Values are strings. Array index likely starts at 1!
54
- # @TODO: replace the use of a yield block
55
- def self.precursor_mz_by_scan(file)
56
- prec_re = /msLevel="2".*?<precursorMz.*?>([\d\.]+)<\/precursorMz>/mo
57
- self.by_scan_num(file, prec_re) {|match_obj| match_obj.captures[0]}
58
- end
59
-
60
- end
61
-
62
-
63
- class MS::Parser::MzXML::Regexp::MsRun
64
- @@scan_count_regexp = /scanCount="(\d+)"/o
65
- @@start_time_regexp = /startTime="PT([\d\.]+)S"/o
66
- @@end_time_regexp = /endTime="PT([\d\.]+)S"/o
67
- @@first_scan_regexp = /<scan /
68
-
69
- def initialize(version='1.0')
70
- @version = version
71
- end
72
-
73
- def parse(io, msrun_object)
74
- atts = {}
75
- [:scan_count, :start_time, :end_time].zip(get_header_info(io)) {|v,k| atts[k] = v }
76
- ###
77
- # HERE <------------------------------------
78
- abort "NEED TO FINISH WRITING SCANS EXTRACTOR!"
79
- get_scans(io)
80
- # HERE <------------------------------------
81
-
82
- # set the attributes
83
- atts.each do |k,v|
84
- msrun_object.send(k,v)
85
- end
86
- # need to fill in the scan_counts array
87
- end
88
-
89
- # assumes the attributes are each on a line
90
- def get_scans(io)
91
- io.each do |line|
92
- end
93
- end
94
-
95
- # returns [total_num_scans, start_time, end_time] and positions the handle
96
- # so that the next 'gets' will call a scan
97
- def get_header_info(io)
98
- scan_count = nil
99
- start_time = nil
100
- end_time = nil
101
-
102
- previous_position = nil
103
- io.each do |line|
104
- if line =~ @@scan_count_regexp
105
- scan_count = $1.dup
106
- end
107
- if line =~ @@start_time_regexp
108
- start_time = $1.dup
109
- end
110
- if line =~ @@end_time_regexp
111
- end_time = $1.dup
112
- end
113
- if line =~ @@first_scan_regexp
114
- io.pos = previous_position
115
- break
116
- end
117
- previous_position = io.pos
118
- end
119
- [scan_count, start_time, end_time]
120
- end
121
-
122
- end
@@ -1,72 +0,0 @@
1
- require 'rexml/document'
2
- require 'rexml/streamlistener'
3
-
4
- module MS::Parser::MzXML::REXMLStreamListener; end
5
- class MS::Parser::MzXML::REXMLStreamListener::PrecMzByNum; end
6
-
7
- module REXMLStreamListenerHelper
8
- def parse_and_report(file, const, report_method=:report)
9
- listener = self.const_get(const).new
10
- File.open(file) do |fh|
11
- REXML::Document.parse_stream(fh, listener)
12
- end
13
- listener.send(report_method)
14
- end
15
- end
16
-
17
- class MS::Parser::MzXML::REXML
18
- include MS::Parser::MzXML
19
-
20
- def initialize(version='1.0', method=:msrun)
21
- @version = version
22
- @method = parse_type
23
- end
24
-
25
- # returns an array indexed by scan_num that gives the precursor_mz
26
- def precursor_mz_by_scan(file, opts={})
27
- parse_and_report(file, PrecMzByNum)
28
- end
29
-
30
- end
31
-
32
-
33
-
34
-
35
- # for REXML
36
- class MS::Parser::MzXML::REXML::PrecMzByNum
37
- include REXML::StreamListener
38
-
39
- attr_accessor :prec_mz
40
- alias_method :report, :prec_mz
41
-
42
- def initialize
43
- @prec_mz = []
44
- @scan_num = nil
45
- @get_data = false
46
- end
47
-
48
- def tag_start(name,attrs)
49
- if name == "scan"
50
- @scan_num = attrs["num"].to_i
51
- elsif name == "precursorMz"
52
- @get_data = true
53
- end
54
- end
55
-
56
- def tag_end(name)
57
- if name == "precursorMz"
58
- @get_data = false
59
- end
60
- end
61
-
62
- def text(txt)
63
- if @get_data
64
- @prec_mz[@scan_num] = txt
65
- end
66
- end
67
-
68
- end
69
-
70
-
71
-
72
-
@@ -1,248 +0,0 @@
1
- require 'xmlparser_wrapper'
2
-
3
- # this is the wrapper class
4
- class MS::Parser::MzXML::XMLParser
5
- include XMLStyleParser
6
- include MS::Parser::MzXML
7
- include XMLParserWrapper
8
-
9
- def initialize(parse_type=:msrun, version='1.0')
10
- @method = parse_type
11
- @version = version
12
- end
13
-
14
- # returns: [times_arr, [m/z,inten,m/z,inten...]]
15
- # where times are time strings (in seconds)
16
- def times_and_spectra(file, opts={})
17
- parse_and_report(file, 'TimesAndSpectra')
18
- end
19
-
20
-
21
- ## IN PROGRESS ...
22
- # opts is actually the msrun object that will be fleshed out in the parsing
23
- def msrun(file, opts={})
24
- p opts
25
- fh = File.open(file)
26
- reply = parse_and_report_io(fh, 'MsRunHeader')
27
- p reply
28
- abort
29
- fh.close
30
- end
31
-
32
- def prec_mz_by_scan_num(file, opts={})
33
- end
34
-
35
- # could easily do this for all these guys
36
- #def method_missing(*args)
37
- # method = args.shift
38
- # parse_and_report(
39
- #end
40
-
41
- end
42
-
43
- class MS::Parser::MzXML::XMLParser::MsRunHeader < XMLParser
44
- def initialize(version='1.0')
45
- @version = version
46
- @atts = []
47
- end
48
-
49
- def startElement(name,attrs)
50
- case name
51
- when 'msRun'
52
- @atts = attrs.values_at(%w(scanCount startTime endTime))
53
- end
54
- end
55
-
56
- def endElement(name)
57
- if name == 'dataProcessing'
58
- done
59
- reset
60
- end
61
- end
62
- end
63
-
64
- class MS::Parser::MzXML::XMLParser::Spectrum < XMLParser
65
- @@scan_atts = %w(num msLevel retentionTime startMz endMz)
66
- @@precursor_mz_atts = %w(precursorIntensity)
67
-
68
-
69
- def initialize(version='1.0')
70
- @version = version
71
- @spectrum = []
72
- @current_scan = nil
73
- end
74
-
75
- def report
76
- @spectrum
77
- end
78
-
79
- def startElement(name,attrs)
80
- if name == 'scan'
81
- vals = attrs.values_at(@@scan_atts)
82
- vals[2] = vals[2][2...-1].to_f #remove PT and trailing S
83
- [0, 1].each do |i| vals[i] = vals[i].to_i end # num and ms_level
84
- [3, 4].each do |i| vals[i] = vals[i].to_f end # start_mz and end_mz
85
- @current_scan = MS::Scan.new(vals)
86
- elsif name == 'precursorMz'
87
- # 5, 6, 7 are the scans indices for prec_mz prec_inten and parent
88
- @current_scan[6] = attrs['precursorIntensity'].to_f
89
- @current_scan[5] = ''
90
- @get_precursor_mz = true
91
- elsif name == 'peaks'
92
- @precision = attrs['precision'].to_i
93
- @get_peaks = true
94
- @current_peaks_string = ''
95
- end
96
- end
97
-
98
- def endElement(name)
99
- if name == 'peaks'
100
- @get_peaks = false
101
- @spectrum << Spectrum.new(@current_peaks_string, @precision)
102
- @spectrum.context = @current_scan
103
- elsif name == 'precursorMz'
104
- @current_scan[5] = @current_scan[5].to_f
105
- @get_precursor_mz = false
106
- end
107
- end
108
-
109
- def character(data)
110
- if @get_peaks
111
- @current_peaks_string << data
112
- elsif @get_precursor_mz
113
- @current_scan[5] << data
114
- end
115
- end
116
-
117
- end
118
-
119
-
120
-
121
-
122
- class MS::Parser::MzXML::XMLParser::PrecMzByNum < XMLParser
123
- @scan_num = nil
124
- @get_data = false
125
-
126
- attr_accessor :prec_mz
127
- alias_method :report, :prec_mz
128
-
129
- def initialize
130
- @prec_mz = []
131
- end
132
-
133
- def startElement(name,attrs)
134
- if name == "scan"
135
- @scan_num = attrs["num"].to_i
136
- elsif name == "precursorMz"
137
- @current_prec_mz = ""
138
- @get_data = true
139
- end
140
- end
141
-
142
- def endElement(name)
143
- if name == "precursorMz"
144
- @get_data = false
145
- @prec_mz[@scan_num] = @current_prec_mz.to_f
146
- end
147
- end
148
-
149
- def character(data)
150
- if @get_data
151
- @current_prec_mz << data
152
- end
153
- end
154
-
155
- end
156
-
157
-
158
- =begin
159
-
160
-
161
- # Returns parallel arrays (times, spectra) where each spectra is an array
162
- # containing alternating mz and intensity (MS1 scans only)
163
- # and times are strings with the time in seconds
164
- class MS::Parser::MzXML::XMLParser::TimesAndSpectra < XMLParser
165
- include MS::Parser::MzXML
166
- @@get_data = false
167
- @@get_peaks = false
168
- @@precision = 32 # @TODO: set dynamic
169
-
170
- attr_accessor :times, :spectra
171
- def times_and_spectra
172
- [@times, @spectra]
173
- end
174
-
175
- alias_method :report, :times_and_spectra
176
-
177
- def initialize(ms_level=1)
178
- @ms_level = "#{ms_level}"
179
- @times = []
180
- @spectra = []
181
- end
182
-
183
- def startElement(name,attrs)
184
- if name == "scan" && attrs["msLevel"] == @ms_level
185
- @times << attrs["retentionTime"][2...-1] # strip PT and S: "PTx.xxxxS"
186
- @@get_peaks = true
187
- elsif name == "peaks" && @@get_peaks
188
- @@get_data = true
189
- @data = ""
190
- end
191
- end
192
-
193
- def character(data)
194
- if @@get_data
195
- @data << data
196
- end
197
- end
198
-
199
- def endElement(name)
200
- if name == "peaks" && @@get_peaks
201
- @spectra << base64_peaks_to_array(@data, @@precision)
202
- @@get_data = false
203
- @@get_peaks = false
204
- end
205
- end
206
-
207
- end
208
-
209
-
210
- class MS::Parser::MzXML::XMLParser::TimeMzIntenIndexer < XMLParser
211
-
212
- @@scan_num = nil
213
- @@get_data = false
214
-
215
- attr_accessor :scans_by_num
216
- alias_method :report, :scans_by_num
217
-
218
- def initialize
219
- @current_scan = nil
220
- @scans_by_num = []
221
- end
222
-
223
- def startElement(name,attrs)
224
- if name == "scan"
225
- num = attrs["num"].to_i
226
- @current_scan = MS::Scan.new(num, attrs["msLevel"].to_i, attrs["retentionTime"].gsub(/^PT/,'').gsub(/S$/,'').to_f)
227
- scans_by_num[num] = @current_scan
228
- elsif name == "precursorMz"
229
- @current_scan.prec_inten = attrs["precursorIntensity"].to_f
230
- @@get_data = true
231
- end
232
- end
233
-
234
- def endElement(name)
235
- if name == "precursorMz"
236
- @@get_data = false
237
- end
238
- end
239
-
240
- def character(data)
241
- if @@get_data
242
- @current_scan.prec_mz = data
243
- end
244
- end
245
-
246
- end
247
-
248
- =end