mspire 0.4.9 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,253 +0,0 @@
1
-
2
- require 'xml_style_parser'
3
- require 'ms/spectrum'
4
- require 'ms/scan'
5
-
6
-
7
- class MS::Parser::MzXML::Hpricot
8
- include XMLStyleParser
9
- include MS::Parser::MzXML
10
-
11
- @@scan_atts = %w(num msLevel retentionTime startMz endMz precursor spectrum)
12
-
13
- def initialize(parse_type=:msrun, version='1.0')
14
- @method = parse_type
15
- @version = version
16
- end
17
-
18
- def new_scan_from_hash(node)
19
- scan = MS::Scan.new # array class creates one with 9 positions
20
- scan[0] = node['num'].to_i
21
- scan[1] = node['msLevel'].to_i
22
- scan[2] = node['retentionTime'][2...-1].to_f
23
- if x = node['startMz']
24
- scan[3] = x.to_f
25
- scan[4] = node['endMz'].to_f
26
- end
27
- scan
28
- end
29
-
30
- # takes a scan node and creates a scan object
31
- # the parent scan is the one directly above it in mslevel
32
- # if the
33
- def create_scan(scan_n, scans_by_num, get_spectra=true)
34
- if @version < '3.0'
35
- scan = new_scan_from_hash(scan_n)
36
- precs = []
37
- scan_n.each_child do |node|
38
- case node.name
39
- when 'precursorMz'
40
- # should be able to do this!!!
41
- #scan[5] = scan_n.find('child::precursorMz').map do |prec_n|
42
- prec = MS::Precursor.new
43
- prec[1] = node['precursorIntensity'].to_f
44
- prec[0] = node.content.to_f
45
- if x = node['precursorScanNum']
46
- prec[2] = scans_by_num[x.to_i]
47
- end
48
- precs << prec
49
- when 'peaks'
50
- next unless get_spectra
51
- # SHOULD be able to do this!!
52
- #peaks_n = scan_n.find_first('child::peaks')
53
- scan[6] = MS::Spectrum.from_base64_peaks(node.content, node['precision'].to_i)
54
- end
55
- end
56
- scan[5] = precs
57
- scan
58
- else # for version > 3.0
59
- abort 'not supporting version 3.0 just yet'
60
- # note that mzXML version 3.0 *can* have more than one peak...
61
- # I'm not sure how to deal with that since I have one spectrum/scan
62
- end
63
- end
64
-
65
-
66
- # returns an array of msrun objects
67
- def msruns(file)
68
- raise NotImplementedError
69
- end
70
-
71
- # returns a string with double </scan></scan> tags into single and missing
72
- # </scan> tags after peaks added in
73
- # we do this in windows style since these are generated off a windows
74
- # machine only
75
- def fix_bad_scan_tags(file)
76
- IO.read(file).gsub(/<\/scan>\s+<\/scan>/m, '</scan>').gsub(/<\/peaks>\s+<scan/m, "</peaks>\r\n </scan>\r\n <scan")
77
- end
78
-
79
- # right now cannot parse multiple runs out of an mzXML version 2 file since
80
- # this is built around a single run per file
81
- # OPTIONS:
82
- # :msrun => MSRun # use this object instead of creating one
83
- # :spectra => *true|false # if false don't get spectra
84
- def msrun(file, opts={})
85
- unless opts.key?(:spectra)
86
- opts[:spectra] = true
87
- end
88
-
89
- msrun_obj =
90
- if x = opts[:msrun]
91
- msrun_obj = x
92
- else
93
- MS::MSRun.new
94
- end
95
-
96
- doc = File.open(file) {|fh| ::Hpricot.XML(fh) }
97
- #if @version == '2.0'
98
- # # may not be necessary in hpricot!
99
- # #string = fix_bad_scan_tags(file)
100
- # #XML::Parser.string(string).parse
101
- #else
102
- # XML::Document.file(file)
103
- #end
104
- msrun_n = doc.at('msRun')
105
-
106
- ## HEADER
107
- scan_count = msrun_n['scanCount'].to_i
108
- msrun_obj.scan_count = scan_count
109
- scans_by_num = Array.new(scan_count + 1)
110
-
111
- ## SPECTRUM
112
- parent = nil
113
- scans = Array.new( scan_count )
114
- scn_index = 0
115
-
116
- # we should be able to do this, but it's not working!!!
117
- #scan_n = msrun_n.find_first('scan')
118
- #while (scn_index < scan_count)
119
- get_spectra = opts[:spectra]
120
-
121
- msrun_n.each_child do |scan_n|
122
- p scan_n
123
- abort
124
-
125
- next unless scan_n.name == 'scan'
126
- scan = create_scan(scan_n, scans_by_num, get_spectra)
127
- scans[scn_index] = scan
128
- sc = scan_n.next
129
- scans_by_num[scan[0]] = scan
130
- scn_index += 1
131
- end
132
-
133
-
134
- ## update the scan's parents
135
- MS::MSRun.add_parent_scan(scans)
136
-
137
- # note that startTime and endTime are optional AND in >2.2 are dateTime
138
- # instead of duration types!, so we will just use scan times...
139
- # Also, note that startTime and endTime are BROKEN on readw -> mzXML 2.0
140
- # export. They give the start and end time in seconds, but they are
141
- # really minutes. All the more reason to use the first and last scans!
142
- msrun_obj.start_time = scans.first.time
143
- msrun_obj.end_time = scans.last.time
144
-
145
- msrun_obj.scans = scans
146
- end
147
-
148
- end
149
-
150
-
151
-
152
- =begin
153
- ## THIS IS THE SAX PARSER VERSION. IT NEEDS A BIT OF BRUSH UP AND IT WOULD
154
- ## WORK. I THINK THE default guy is probably faster
155
-
156
- def msrun(file, msrun_obj)
157
- # Figure out where the first scan is at in the file:
158
- pos_after_first_scan = nil
159
- File.open(file) do |fh|
160
- fh.each do |line|
161
- if line =~ /<scan/
162
- pos_after_first_scan = fh.pos
163
- end
164
- end
165
- end
166
-
167
- # Get only the header:
168
- header_string = IO.read(file, pos_after_first_scan)
169
-
170
- @msrun_obj = msrun_obj
171
- # Parse out the header info:
172
- parser = XML::SaxParser.new
173
- parser.string = header_string
174
- parser.on_start_element do |name, attrs|
175
- if name == 'msRun'
176
- @msrun_obj.scan_count = attrs['scanCount'].to_i
177
- @msrun_obj.start_time = attrs['startTime'][2...-1].to_f
178
- @msrun_obj.end_time = attrs['endTime'][2...-1].to_f
179
- end
180
- end
181
- parser.parse
182
-
183
-
184
- # Parse the scans out:
185
- scan_st = 'scan'
186
- prec_st = 'precursorMz'
187
- peaks_st = 'peaks'
188
- prec_inten_st = 'precursorIntensity'
189
- precision_st = 'precision'
190
-
191
- #parser = MS::Parser::MzXML::Hpricot::SaxParser::MSRun.new
192
- parser = XML::SaxParser.new
193
- parser.filename = file
194
- parser.on_start_document do
195
- @scans = []
196
- @current_scan = nil
197
- @get_peaks = false
198
- @get_prec_mz = false
199
- end
200
-
201
- parser.on_characters do |chars|
202
- if @get_peaks
203
- @get_peaks << chars
204
- elsif @get_prec_mz
205
- @get_prec_mz << chars
206
- end
207
- end
208
-
209
- parser.on_end_element do |el|
210
- case el
211
- when 'peaks'
212
- @current_scan.spectrum = Spectrum.from_base64_peaks(@get_peaks, @precision, true)
213
- @get_peaks = false
214
- when 'precursorMz'
215
- @current_scan[5] = [Precursor.new([@get_prec_mz.to_f])]
216
- @get_prec_mz = false
217
- end
218
- end
219
-
220
- parser.on_start_element do |name, attr_hash|
221
- case name
222
- when scan_st
223
- @current_scan = new_scan_from_hash(attr_hash)
224
- sz = @scans.size
225
- @scans << @current_scan
226
- when prec_st
227
- @current_scan[5].first[1] = attr_hash[prec_inten_st].to_f
228
- @get_prec_mz = ''
229
- when peaks_st
230
- @precision = attr_hash[precision_st].to_i
231
- case @version[0,1].to_ip
232
- when 3
233
- if ch['pairOrder'] != 'm/z-int' # only version 3.0 has others
234
- abort "cannot yet read anything but 'm/z-int' pair order"
235
- end
236
- end
237
- @get_peaks = ''
238
- end
239
- end
240
- parser.parse
241
-
242
- @msrun_obj.scans = @scans
243
- @msrun_obj.scans.each_with_index do |sc,i|
244
- if sc.spectrum.mz == nil
245
- abort "INDEX: #{i}"
246
- end
247
- end
248
- @msrun_obj
249
- end
250
- =end
251
-
252
-
253
-
@@ -1,19 +0,0 @@
1
-
2
- require 'ms/parser/mzxml/dom'
3
-
4
- class MS::Parser::MzXML::LibXML < MS::Parser::MzXML::DOM
5
- def get_root_node_from_string(string)
6
- XML::Parser.string(string).parse.root
7
- end
8
- def get_root_node_from_file(file)
9
- XML::Parser.filename(file).parse.root
10
- end
11
- def get_root_node_from_io(io)
12
- XML::Parser.io(io).parse.root
13
- end
14
-
15
- end
16
-
17
-
18
-
19
-
@@ -1,122 +0,0 @@
1
- require 'strscan'
2
-
3
- module MS::Parser::MzXML ; end
4
-
5
- class MS::Parser::MzXML::Regexp
6
- @@first_scan_regexp = /<scan /o
7
- include MS::Parser::MzXML
8
-
9
- def initialize(method=:msrun, version='1.0')
10
- @method = method
11
- @version = version
12
- end
13
-
14
- def parse(file)
15
- send(@method, file)
16
- end
17
-
18
- # returns a MS::MsRun Object
19
- def msrun(file)
20
- fh = File.open(file)
21
- get_header(fh)
22
-
23
- fh.close
24
- end
25
-
26
- #def msrun(file, opts={})
27
- #end
28
-
29
- @@scan_re = /<scan.*?num="(\d+)"(.*?)<\/scan>/mo
30
- def self.precursor_mz_and_intensity_by_scan(file)
31
- prec_re = /msLevel="2".*?<precursorMz precursorIntensity="([\d\.]+)".*?>([\d\.]+)<\/precursorMz>/mo
32
- self.by_scan_num(file, prec_re) {|match_obj| match_obj.captures.reverse}
33
- end
34
-
35
- # (array will likely start at 1!)
36
- def self.by_scan_num(file, regex)
37
- arr = []
38
- File.open(file) do |fh|
39
- string = fh.read
40
- matches = string.scan(@@scan_re)
41
- matches.each do |matched|
42
- if inner_match = regex.match(matched[1])
43
- index = matched[0].to_i
44
- arr[index] = yield(inner_match)
45
- end
46
- end
47
- end
48
- arr
49
- end
50
-
51
- # Returns array where array[scan_num] = precursorMz
52
- # Parent scans armme not arrayed
53
- # Values are strings. Array index likely starts at 1!
54
- # @TODO: replace the use of a yield block
55
- def self.precursor_mz_by_scan(file)
56
- prec_re = /msLevel="2".*?<precursorMz.*?>([\d\.]+)<\/precursorMz>/mo
57
- self.by_scan_num(file, prec_re) {|match_obj| match_obj.captures[0]}
58
- end
59
-
60
- end
61
-
62
-
63
- class MS::Parser::MzXML::Regexp::MsRun
64
- @@scan_count_regexp = /scanCount="(\d+)"/o
65
- @@start_time_regexp = /startTime="PT([\d\.]+)S"/o
66
- @@end_time_regexp = /endTime="PT([\d\.]+)S"/o
67
- @@first_scan_regexp = /<scan /
68
-
69
- def initialize(version='1.0')
70
- @version = version
71
- end
72
-
73
- def parse(io, msrun_object)
74
- atts = {}
75
- [:scan_count, :start_time, :end_time].zip(get_header_info(io)) {|v,k| atts[k] = v }
76
- ###
77
- # HERE <------------------------------------
78
- abort "NEED TO FINISH WRITING SCANS EXTRACTOR!"
79
- get_scans(io)
80
- # HERE <------------------------------------
81
-
82
- # set the attributes
83
- atts.each do |k,v|
84
- msrun_object.send(k,v)
85
- end
86
- # need to fill in the scan_counts array
87
- end
88
-
89
- # assumes the attributes are each on a line
90
- def get_scans(io)
91
- io.each do |line|
92
- end
93
- end
94
-
95
- # returns [total_num_scans, start_time, end_time] and positions the handle
96
- # so that the next 'gets' will call a scan
97
- def get_header_info(io)
98
- scan_count = nil
99
- start_time = nil
100
- end_time = nil
101
-
102
- previous_position = nil
103
- io.each do |line|
104
- if line =~ @@scan_count_regexp
105
- scan_count = $1.dup
106
- end
107
- if line =~ @@start_time_regexp
108
- start_time = $1.dup
109
- end
110
- if line =~ @@end_time_regexp
111
- end_time = $1.dup
112
- end
113
- if line =~ @@first_scan_regexp
114
- io.pos = previous_position
115
- break
116
- end
117
- previous_position = io.pos
118
- end
119
- [scan_count, start_time, end_time]
120
- end
121
-
122
- end
@@ -1,72 +0,0 @@
1
- require 'rexml/document'
2
- require 'rexml/streamlistener'
3
-
4
- module MS::Parser::MzXML::REXMLStreamListener; end
5
- class MS::Parser::MzXML::REXMLStreamListener::PrecMzByNum; end
6
-
7
- module REXMLStreamListenerHelper
8
- def parse_and_report(file, const, report_method=:report)
9
- listener = self.const_get(const).new
10
- File.open(file) do |fh|
11
- REXML::Document.parse_stream(fh, listener)
12
- end
13
- listener.send(report_method)
14
- end
15
- end
16
-
17
- class MS::Parser::MzXML::REXML
18
- include MS::Parser::MzXML
19
-
20
- def initialize(version='1.0', method=:msrun)
21
- @version = version
22
- @method = parse_type
23
- end
24
-
25
- # returns an array indexed by scan_num that gives the precursor_mz
26
- def precursor_mz_by_scan(file, opts={})
27
- parse_and_report(file, PrecMzByNum)
28
- end
29
-
30
- end
31
-
32
-
33
-
34
-
35
- # for REXML
36
- class MS::Parser::MzXML::REXML::PrecMzByNum
37
- include REXML::StreamListener
38
-
39
- attr_accessor :prec_mz
40
- alias_method :report, :prec_mz
41
-
42
- def initialize
43
- @prec_mz = []
44
- @scan_num = nil
45
- @get_data = false
46
- end
47
-
48
- def tag_start(name,attrs)
49
- if name == "scan"
50
- @scan_num = attrs["num"].to_i
51
- elsif name == "precursorMz"
52
- @get_data = true
53
- end
54
- end
55
-
56
- def tag_end(name)
57
- if name == "precursorMz"
58
- @get_data = false
59
- end
60
- end
61
-
62
- def text(txt)
63
- if @get_data
64
- @prec_mz[@scan_num] = txt
65
- end
66
- end
67
-
68
- end
69
-
70
-
71
-
72
-
@@ -1,248 +0,0 @@
1
- require 'xmlparser_wrapper'
2
-
3
- # this is the wrapper class
4
- class MS::Parser::MzXML::XMLParser
5
- include XMLStyleParser
6
- include MS::Parser::MzXML
7
- include XMLParserWrapper
8
-
9
- def initialize(parse_type=:msrun, version='1.0')
10
- @method = parse_type
11
- @version = version
12
- end
13
-
14
- # returns: [times_arr, [m/z,inten,m/z,inten...]]
15
- # where times are time strings (in seconds)
16
- def times_and_spectra(file, opts={})
17
- parse_and_report(file, 'TimesAndSpectra')
18
- end
19
-
20
-
21
- ## IN PROGRESS ...
22
- # opts is actually the msrun object that will be fleshed out in the parsing
23
- def msrun(file, opts={})
24
- p opts
25
- fh = File.open(file)
26
- reply = parse_and_report_io(fh, 'MsRunHeader')
27
- p reply
28
- abort
29
- fh.close
30
- end
31
-
32
- def prec_mz_by_scan_num(file, opts={})
33
- end
34
-
35
- # could easily do this for all these guys
36
- #def method_missing(*args)
37
- # method = args.shift
38
- # parse_and_report(
39
- #end
40
-
41
- end
42
-
43
- class MS::Parser::MzXML::XMLParser::MsRunHeader < XMLParser
44
- def initialize(version='1.0')
45
- @version = version
46
- @atts = []
47
- end
48
-
49
- def startElement(name,attrs)
50
- case name
51
- when 'msRun'
52
- @atts = attrs.values_at(%w(scanCount startTime endTime))
53
- end
54
- end
55
-
56
- def endElement(name)
57
- if name == 'dataProcessing'
58
- done
59
- reset
60
- end
61
- end
62
- end
63
-
64
- class MS::Parser::MzXML::XMLParser::Spectrum < XMLParser
65
- @@scan_atts = %w(num msLevel retentionTime startMz endMz)
66
- @@precursor_mz_atts = %w(precursorIntensity)
67
-
68
-
69
- def initialize(version='1.0')
70
- @version = version
71
- @spectrum = []
72
- @current_scan = nil
73
- end
74
-
75
- def report
76
- @spectrum
77
- end
78
-
79
- def startElement(name,attrs)
80
- if name == 'scan'
81
- vals = attrs.values_at(@@scan_atts)
82
- vals[2] = vals[2][2...-1].to_f #remove PT and trailing S
83
- [0, 1].each do |i| vals[i] = vals[i].to_i end # num and ms_level
84
- [3, 4].each do |i| vals[i] = vals[i].to_f end # start_mz and end_mz
85
- @current_scan = MS::Scan.new(vals)
86
- elsif name == 'precursorMz'
87
- # 5, 6, 7 are the scans indices for prec_mz prec_inten and parent
88
- @current_scan[6] = attrs['precursorIntensity'].to_f
89
- @current_scan[5] = ''
90
- @get_precursor_mz = true
91
- elsif name == 'peaks'
92
- @precision = attrs['precision'].to_i
93
- @get_peaks = true
94
- @current_peaks_string = ''
95
- end
96
- end
97
-
98
- def endElement(name)
99
- if name == 'peaks'
100
- @get_peaks = false
101
- @spectrum << Spectrum.new(@current_peaks_string, @precision)
102
- @spectrum.context = @current_scan
103
- elsif name == 'precursorMz'
104
- @current_scan[5] = @current_scan[5].to_f
105
- @get_precursor_mz = false
106
- end
107
- end
108
-
109
- def character(data)
110
- if @get_peaks
111
- @current_peaks_string << data
112
- elsif @get_precursor_mz
113
- @current_scan[5] << data
114
- end
115
- end
116
-
117
- end
118
-
119
-
120
-
121
-
122
- class MS::Parser::MzXML::XMLParser::PrecMzByNum < XMLParser
123
- @scan_num = nil
124
- @get_data = false
125
-
126
- attr_accessor :prec_mz
127
- alias_method :report, :prec_mz
128
-
129
- def initialize
130
- @prec_mz = []
131
- end
132
-
133
- def startElement(name,attrs)
134
- if name == "scan"
135
- @scan_num = attrs["num"].to_i
136
- elsif name == "precursorMz"
137
- @current_prec_mz = ""
138
- @get_data = true
139
- end
140
- end
141
-
142
- def endElement(name)
143
- if name == "precursorMz"
144
- @get_data = false
145
- @prec_mz[@scan_num] = @current_prec_mz.to_f
146
- end
147
- end
148
-
149
- def character(data)
150
- if @get_data
151
- @current_prec_mz << data
152
- end
153
- end
154
-
155
- end
156
-
157
-
158
- =begin
159
-
160
-
161
- # Returns parallel arrays (times, spectra) where each spectra is an array
162
- # containing alternating mz and intensity (MS1 scans only)
163
- # and times are strings with the time in seconds
164
- class MS::Parser::MzXML::XMLParser::TimesAndSpectra < XMLParser
165
- include MS::Parser::MzXML
166
- @@get_data = false
167
- @@get_peaks = false
168
- @@precision = 32 # @TODO: set dynamic
169
-
170
- attr_accessor :times, :spectra
171
- def times_and_spectra
172
- [@times, @spectra]
173
- end
174
-
175
- alias_method :report, :times_and_spectra
176
-
177
- def initialize(ms_level=1)
178
- @ms_level = "#{ms_level}"
179
- @times = []
180
- @spectra = []
181
- end
182
-
183
- def startElement(name,attrs)
184
- if name == "scan" && attrs["msLevel"] == @ms_level
185
- @times << attrs["retentionTime"][2...-1] # strip PT and S: "PTx.xxxxS"
186
- @@get_peaks = true
187
- elsif name == "peaks" && @@get_peaks
188
- @@get_data = true
189
- @data = ""
190
- end
191
- end
192
-
193
- def character(data)
194
- if @@get_data
195
- @data << data
196
- end
197
- end
198
-
199
- def endElement(name)
200
- if name == "peaks" && @@get_peaks
201
- @spectra << base64_peaks_to_array(@data, @@precision)
202
- @@get_data = false
203
- @@get_peaks = false
204
- end
205
- end
206
-
207
- end
208
-
209
-
210
- class MS::Parser::MzXML::XMLParser::TimeMzIntenIndexer < XMLParser
211
-
212
- @@scan_num = nil
213
- @@get_data = false
214
-
215
- attr_accessor :scans_by_num
216
- alias_method :report, :scans_by_num
217
-
218
- def initialize
219
- @current_scan = nil
220
- @scans_by_num = []
221
- end
222
-
223
- def startElement(name,attrs)
224
- if name == "scan"
225
- num = attrs["num"].to_i
226
- @current_scan = MS::Scan.new(num, attrs["msLevel"].to_i, attrs["retentionTime"].gsub(/^PT/,'').gsub(/S$/,'').to_f)
227
- scans_by_num[num] = @current_scan
228
- elsif name == "precursorMz"
229
- @current_scan.prec_inten = attrs["precursorIntensity"].to_f
230
- @@get_data = true
231
- end
232
- end
233
-
234
- def endElement(name)
235
- if name == "precursorMz"
236
- @@get_data = false
237
- end
238
- end
239
-
240
- def character(data)
241
- if @@get_data
242
- @current_scan.prec_mz = data
243
- end
244
- end
245
-
246
- end
247
-
248
- =end