mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,108 +0,0 @@
1
- require 'ms/scan'
2
- require 'ms/parser'
3
-
4
- class MS::MSRunIndex
5
- # basename_noext is the base name of the file (with NO extensions)
6
- attr_accessor :scans_by_num
7
- attr_reader :basename_noext
8
-
9
- # identifies and removes .mzXML .mzXML.timeIndex and .timeIndex
10
- # otherwise, removes one extension and that's the filename_noext
11
- # Also, removes any leading path
12
- def basename_noext=(filename)
13
- ext = File.extname(filename)
14
- basename = File.basename(filename)
15
- case ext
16
- when '.mzXML'
17
- @basename_noext = basename.gsub(/\.mzXML$/, "")
18
- when '.timeIndex'
19
- @basename_noext = basename.gsub(/\.timeIndex$/, "")
20
- if File.extname(@basename_noext) == ".mzXML"
21
- @basename_noext.gsub!(/\.mzXML$/, "")
22
- end
23
- else
24
- @basename_noext = basename.gsub(/#{Regexp.escape(ext)}/, "")
25
- end
26
- end
27
-
28
- # index_file has one row for each scan:
29
- # ms_level scan_num time [prec_mz prec_inten]
30
- # also consider getting this data directly from the mzXML file
31
- # via the MS::MzXML::Parser.get_msrun_index command
32
- def set_from_index_file(index_file)
33
- self.basename_noext = index_file
34
- @scans_by_num = []
35
- if index_file
36
- File.open(index_file).each do |line|
37
- next if line !~ /\d/ || line =~ /^#/
38
- line.chomp!
39
- arr = line.split(" ")
40
- scan = MS::Scan.new(arr[1].to_i, arr[0].to_i, arr[2].to_f)
41
- if scan.ms_level > 1
42
- scan.prec_mz = arr[3].to_f
43
- scan.prec_inten = arr[4].to_f
44
- end
45
- @scans_by_num[scan.num] = scan
46
- end
47
- end
48
- MS::Scan.add_parent_scan(@scans_by_num)
49
- end
50
-
51
- # Takes a .mzXML file or .timeIndex file (currently)
52
- # and creates an index of scans from it
53
- def initialize(file=nil)
54
- @scans_by_num = []
55
- if file
56
- ext = File.extname(file)
57
- case ext
58
- when '.mzXML'
59
- set_from_mzxml(file)
60
- when '.timeIndex'
61
- set_from_index_file(file)
62
- else
63
- raise ArgumentError, "#{self.class}.new doesn't recognize files of extension: #{ext}"
64
- end
65
- end
66
- end
67
-
68
-
69
- # returns a new
70
- def set_from_mzxml(file)
71
- self.basename_noext = file
72
- @scans_by_num = MS::Parser.new(file, :scans_by_num).parse(file)
73
- end
74
-
75
- # writes the index to filename
76
- # each line:
77
- # ms_level scan_num time (if !ms_level=1) { prec_mz prec_intensity)
78
- def to_index_file(filename)
79
- strings = []
80
- @scans_by_num.each do |scan|
81
- if scan
82
- strings << scan.to_index_file_string
83
- end
84
- end
85
- File.open(filename, "w") do |fh|
86
- fh.print strings.join("\n")
87
- end
88
- end
89
-
90
- # returns an array of the times of the precursor scan's parent (not its own
91
- # acquisition time). The parent scan index will also retrieve the time of
92
- # the parent scan.
93
- def parent_times_by_scan_num
94
- by_num = []
95
- parent_time = nil
96
- @scans_by_num.each_with_index do |scan,i|
97
- if scan.ms_level == 1
98
- parent_time = scan.time
99
- end
100
- by_num[i] = parent_time
101
- end
102
- by_num
103
- end
104
-
105
- end
106
-
107
-
108
-
@@ -1,67 +0,0 @@
1
- require 'ms/parser/mzdata/dom'
2
-
3
- class MS::Parser::MzData::AXML < MS::Parser::MzData::DOM
4
- def get_root_node_from_file(file)
5
- ::AXML.parse_file(file)
6
- end
7
- def get_root_node_from_io(io)
8
- ::AXML.parse(io)
9
- end
10
- end
11
-
12
- class MS::Parser::MzData::AXML::LazyData < MS::Parser::MzData::AXML
13
- def get_root_node_from_string(string)
14
- ::AXML::LazyData.parse(string)
15
- end
16
- def get_root_node_from_file(file)
17
- ::AXML::LazyData.parse_file(file)
18
- end
19
- def get_root_node_from_io(io)
20
- ::AXML::LazyData.parse(io)
21
- end
22
- end
23
-
24
- class AXML::LazyData < AXML
25
- # Returns the root node (as Element) or nodes (as Array)
26
- def self.parse(stream)
27
- parser = ::AXML::XMLParser::LazyData.new
28
- parser.parse(stream)
29
- parser.root
30
- end
31
- end
32
-
33
- # This parser stores information about where the data (peaks) information is
34
- # in the file
35
- # The content of the data node is an array where the first member is the
36
- # start index and the last member is the number of bytes. All other members
37
- # should be ignored.
38
- class AXML::XMLParser::LazyData < ::AXML::XMLParser
39
-
40
- def startElement(name, attributes)
41
- text =
42
- if name == 'data' ; []
43
- else ; ''
44
- end
45
- new_el = ::AXML::El.new(@cur, name, attributes, text, [])
46
- # add the new node to the previous parent node
47
- @cur.add_node(new_el)
48
- # notice the change in @cur node
49
- @cur = new_el
50
- end
51
-
52
- def character(data)
53
- if @cur.text.is_a? Array
54
- @cur.text << byteIndex
55
- else
56
- @cur.text << data
57
- end
58
- end
59
-
60
- def endElement(name)
61
- if @cur.text.is_a? Array
62
- @cur.text << (byteIndex - @cur.text.first)
63
- end
64
- @cur = @cur.parent
65
- end
66
-
67
- end
@@ -1,175 +0,0 @@
1
- require 'xml_style_parser'
2
- require 'ms/spectrum'
3
- require 'ms/scan'
4
-
5
- module MS::Parser::MzData ; end
6
-
7
- class MS::Parser::MzData::DOM
8
- include XMLStyleParser
9
- include MS::Parser::MzData
10
-
11
- def initialize(parse_type=:msrun, version='1.0')
12
- @method = parse_type
13
- @version = version
14
- end
15
-
16
- # true if there is a node <dataProcessing><software><name>Bioworks Browser</...>
17
- # otherwise false
18
- def is_bioworks33?(description_node)
19
- begin
20
- software_node = description_node.find_first('child::dataProcessing').find_first('child::software')
21
- name = software_node.find_first('child::name').content
22
- version = software_node.find_first('child::version').content
23
- ((name == 'Bioworks Browser') and (version == '3.3'))
24
- rescue
25
- false
26
- end
27
- end
28
-
29
- # OPTIONS:
30
- # :msrun => MSRun # use this object instead of creating one
31
- def msrun(file, opts={})
32
- msrun_obj =
33
- if x = opts[:msrun]
34
- msrun_obj = x
35
- else
36
- MS::MSRun.new
37
- end
38
- # should ensure that parsing is not counting spaces...
39
-
40
- # a string we'd parse like this:
41
- # doc = XML::Parser.string(st).parse
42
-
43
- # WE NEED TO GET scan_count, start_time and end_time!!!!
44
- id_to_scan_hash = {}
45
-
46
- # 0 1 2 3 4 5 6
47
- # %w(num msLevel retentionTime startMz endMz precursor spectrum)
48
-
49
- io =
50
- if file.is_a? String
51
- filename = file
52
- File.open(file)
53
- else
54
- file
55
- end
56
- root = get_root_node_from_io(io)
57
-
58
-
59
- description = root.find_first('child::description')
60
- bioworks33 = is_bioworks33?(description)
61
- spectrum_list = description.next
62
-
63
- scans = []
64
-
65
- # bioworks 33 gives incorrect scan count
66
- stated_num_scans = spectrum_list['count'].to_i
67
-
68
- # if I move from node to node, it means I've checked that it's a sequence
69
- # and that the elements are req'd
70
- if spectrum_list.child?
71
- spectrum_n = spectrum_list.child
72
- loop do
73
- scan = MS::Scan.new(9)
74
- id = spectrum_n["id"].to_i
75
- id_to_scan_hash[id] = scan
76
- spec_desc_n = spectrum_n.child # required in sequence
77
- spec_settings_n = spec_desc_n.child # required in sequence
78
- if acq_n = spec_settings_n.find_first('descendant::acquisition')
79
- scan[0] = acq_n['acqNumber'].to_i
80
- else
81
- scan[0] = id
82
- end
83
- spec_inst_n = spec_settings_n.find_first('child::spectrumInstrument')
84
- scan[1] = spec_inst_n['msLevel'].to_i
85
-
86
- # we could use a scan_count, but in bioworks 33, we can't trust the
87
- # scan count! So, we just collect them
88
- scans << scan
89
-
90
- scan[3] = spec_inst_n['mzRangeStart'].to_f
91
- scan[4] = spec_inst_n['mzRangeStop'].to_f
92
- spec_inst_n.find('child::cvParam').each do |cv_param|
93
- if cv_param['name'] == 'TimeInMinutes'
94
- scan[2] = cv_param['value'].to_f * 60 #convert to seconds
95
- end
96
- end
97
- if scan[1] > 1 # precursormz info
98
- prec_list_n = spec_settings_n.next
99
- raise RuntimeError, "MSRun objects can only accept 1 precursor" if prec_list_n['count'] != '1'
100
- prec_n = prec_list_n.find_first('child::precursor')
101
- # %w(mz inten parent ms_level parent charge_states)
102
- prec = MS::Precursor.new
103
- unless bioworks33 # bioworks33 points to the wrong scan!!!
104
- prec[2] = id_to_scan_hash[prec_n['spectrumRef'].to_i]
105
- end
106
- # we're not keeping track of this guy anymore
107
- # prec[3] = prec_n['msLevel'].to_i
108
- charges = []
109
- prec_n.find('descendant::cvParam').each do |cv_param_n|
110
- case cv_param_n['name']
111
- when 'MassToChargeRatio'
112
- prec[0] = cv_param_n['value'].to_f
113
- # find the prec intensity
114
- unless bioworks33
115
- prec[1] = prec[2].spectrum.intensity_at_mz(prec[0])
116
- end
117
- when 'ChargeState'
118
- charges << cv_param_n['value'].to_i
119
- end
120
- end
121
- prec[3] = charges
122
- scan[5] = prec
123
- else # no precursors
124
- scan[5] = nil
125
- end
126
- # here's the one line way of doing it, but it's probably more clear in
127
- # the loop
128
- #while ((mz_array_bin_n = spec_desc_n.next).name != 'mzArrayBinary') do
129
- unless opts[:lazy] == :no_spectra
130
- mz_array_bin_n = nil
131
- loop do
132
- mz_array_bin_n = spec_desc_n.next
133
- break if mz_array_bin_n.name == 'mzArrayBinary'
134
- end
135
- mz_data_n = mz_array_bin_n.child
136
- inten_array_bin_n = mz_array_bin_n.next
137
- inten_data_n = inten_array_bin_n.child
138
- case opts[:lazy]
139
- when :string
140
- scan[6] = MS::Spectrum::LazyString.from_base64_pair(mz_data_n.content, mz_data_n['precision'].to_i, ((mz_data_n['endian']=='little') ? false : true), inten_data_n.content, inten_data_n['precision'].to_i, ((inten_data_n['endian']=='little') ? false : true) )
141
- when :io
142
- mz_data_n_content = mz_data_n.content
143
- i_data_n_content = inten_data_n.content
144
- scan[6] = MS::Spectrum::LazyIO.new(io, mz_data_n_content.first, mz_data_n_content.last, mz_data_n['precision'].to_i, ((mz_data_n['endian']=='little') ? false : true), i_data_n_content.first, i_data_n_content.last, inten_data_n['precision'].to_i, ((inten_data_n['endian']=='little') ? false : true))
145
- when :not
146
- mz = MS::Spectrum.base64_to_array(mz_data_n.content, mz_data_n['precision'].to_i, ((mz_data_n['endian']=='little') ? false : true))
147
- inten = MS::Spectrum.base64_to_array(inten_data_n.content, inten_data_n['precision'].to_i, ((inten_data_n['endian']=='little') ? false : true))
148
- scan[6] = MS::Spectrum.new(mz, inten)
149
- end
150
- end
151
-
152
- # set up the next loop
153
- break unless spectrum_n = spectrum_n.next
154
- end
155
- end
156
- if bioworks33
157
- MS::MSRun.add_parent_scan(scans, ((opts[:lazy] == :not) ? true : false))
158
- end
159
- msrun_obj.scans = scans
160
- msrun_obj.scan_count = scans.size
161
- unless bioworks33 # we know the scan count is off here
162
- if msrun_obj.scan_count != stated_num_scans
163
- warn "num collected scans (#{scans.size}) does not agree with stated num scans (#{stated_num_scans})!"
164
- end
165
- end
166
- msrun_obj.start_time = msrun_obj.scans.first.time
167
- msrun_obj.end_time = msrun_obj.scans.last.time
168
-
169
- io.close if filename
170
- end
171
-
172
- end
173
-
174
-
175
-
@@ -1,7 +0,0 @@
1
-
2
- class MS::Parser::MzData::LibXML < MS::Parser::MzData::DOM
3
- def get_root_node_from_file(file)
4
- XML::Document.file(file).root
5
- end
6
- end
7
-
@@ -1,31 +0,0 @@
1
- require 'ms/msrun'
2
-
3
- module MS; end
4
-
5
- module MS::Parser::MzData
6
- Base_dir_for_parsers = 'ms/parser/mzdata'
7
-
8
- # inherits XMLStyleParser and version
9
- include MS::Parser
10
- include XMLStyleParser
11
-
12
- # returns a specific parser MS::Parser::MzXML::#{ParserType}
13
- # based on choose_parser from xml_style_parser
14
- def self.new(parse_type=:msrun, version='1.05', opts={})
15
- special_subclass =
16
- if opts[:lazy] == :io
17
- 'LazyData'
18
- else ; nil
19
- end
20
-
21
- @version = version
22
- @method = parse_type
23
- #p self.methods.grep /choose_parser/
24
- XMLStyleParser.require_parse_files(Base_dir_for_parsers)
25
- parser_class = XMLStyleParser.choose_parser(self, parse_type, special_subclass)
26
- parser = parser_class.new(parse_type, version)
27
- end
28
-
29
- end
30
-
31
-
@@ -1,70 +0,0 @@
1
- require 'ms/parser/mzxml/dom'
2
-
3
- class MS::Parser::MzXML::AXML < MS::Parser::MzXML::DOM
4
- def get_root_node_from_string(string)
5
- ::AXML.parse(string)
6
- end
7
- def get_root_node_from_file(file)
8
- ::AXML.parse_file(file)
9
- end
10
- def get_root_node_from_io(io)
11
- ::AXML.parse(io)
12
- end
13
- end
14
-
15
- class MS::Parser::MzXML::AXML::LazyPeaks < MS::Parser::MzXML::AXML
16
- def get_root_node_from_string(string)
17
- ::AXML::LazyPeaks.parse(string)
18
- end
19
- def get_root_node_from_file(file)
20
- ::AXML::LazyPeaks.parse_file(file)
21
- end
22
- def get_root_node_from_io(io)
23
- ::AXML::LazyPeaks.parse(io)
24
- end
25
- end
26
-
27
- class AXML::LazyPeaks < AXML
28
- # Returns the root node (as Element) or nodes (as Array)
29
- def self.parse(stream)
30
- parser = ::AXML::XMLParser::LazyPeaks.new
31
- parser.parse(stream)
32
- parser.root
33
- end
34
- end
35
-
36
- # This parser stores information about where the peaks information is in the
37
- # file
38
- # The content of the peaks node is an array where the first member is the
39
- # start index and the last member is the number of bytes. All other members
40
- # should be ignored.
41
- class AXML::XMLParser::LazyPeaks < ::AXML::XMLParser
42
-
43
- def startElement(name, attributes)
44
- text =
45
- if name == 'peaks' ; []
46
- else ; ''
47
- end
48
- new_el = ::AXML::El.new(@cur, name, attributes, text, [])
49
- # add the new node to the previous parent node
50
- @cur.add_node(new_el)
51
- # notice the change in @cur node
52
- @cur = new_el
53
- end
54
-
55
- def character(data)
56
- if @cur.text.is_a? Array
57
- @cur.text << byteIndex
58
- else
59
- @cur.text << data
60
- end
61
- end
62
-
63
- def endElement(name)
64
- if @cur.text.is_a? Array
65
- @cur.text << (byteIndex - @cur.text.first)
66
- end
67
- @cur = @cur.parent
68
- end
69
-
70
- end
@@ -1,182 +0,0 @@
1
- require 'xml_style_parser'
2
- require 'ms/spectrum'
3
- require 'ms/scan'
4
- require 'ms/parser/mzxml'
5
- require 'tempfile'
6
-
7
-
8
- class MS::Parser::MzXML::DOM
9
- include XMLStyleParser
10
- include MS::Parser::MzXML
11
-
12
- NetworkOrder = true
13
-
14
- #@@scan_atts = %w(num msLevel retentionTime startMz endMz precursor spectrum)
15
-
16
- def initialize(parse_type=:msrun, version='1.0')
17
- @method = parse_type
18
- @version = version
19
- end
20
-
21
- def new_scan_from_hash(node)
22
- scan = MS::Scan.new # array class creates one with 9 positions
23
- scan[0] = node['num'].to_i
24
- scan[1] = node['msLevel'].to_i
25
- if x = node['retentionTime']
26
- scan[2] = x[2...-1].to_f
27
- end
28
- if x = node['startMz']
29
- scan[3] = x.to_f
30
- scan[4] = node['endMz'].to_f
31
- end
32
- scan
33
- end
34
-
35
- # assumes that node contains scans and checks any scan nodes for children
36
- def add_scan_nodes(nodes, scans, scn_index, scans_by_num, lazy, io)
37
- nodes.each do |scan_n|
38
- scan = create_scan(scan_n, scans_by_num, lazy, io)
39
- scans[scn_index] = scan
40
- scans_by_num[scan[0]] = scan
41
- scn_index += 1
42
- if @version > '1.0'
43
- new_nodes = scan_n.find('child::scan')
44
- if new_nodes.size > 0
45
- scn_index = add_scan_nodes(new_nodes, scans, scn_index, scans_by_num, lazy, io)
46
- end
47
- end
48
- end
49
- scn_index
50
- end
51
-
52
- # takes a scan node and creates a scan object
53
- # the parent scan is the one directly above it in mslevel
54
- # lazy must be a symbol from MS::MSRun.new
55
- def create_scan(scan_n, scans_by_num, lazy, io=nil)
56
- scan = new_scan_from_hash(scan_n)
57
- prec = nil
58
- scan_n.each do |node|
59
- case node.name
60
- when 'precursorMz'
61
- # should be able to do this!!!
62
- #scan[5] = scan_n.find('child::precursorMz').map do |prec_n|
63
- raise RuntimeError, "the msrun object can only handle one precursor!" unless prec.nil?
64
- prec = MS::Precursor.new
65
- prec[1] = node['precursorIntensity'].to_f
66
- prec[0] = node.content.to_f
67
- if x = node['precursorScanNum']
68
- prec[2] = scans_by_num[x.to_i]
69
- end
70
- when 'peaks'
71
- case lazy
72
- when :no_spectra
73
- next
74
- when :string
75
- scan[6] = MS::Spectrum::LazyString.from_base64_peaks(node.content, node['precision'].to_i)
76
- when :io
77
- # assumes that parsing was done with a LazyPeaks parser!
78
- nc = node.content
79
- scan[6] = MS::Spectrum::LazyIO.new(io, nc.first, nc.last, node['precision'].to_i, MS::Parser::MzXML::DOM::NetworkOrder)
80
- when :not
81
- # SHOULD be able to do this!!
82
- #peaks_n = scan_n.find_first('child::peaks')
83
- scan[6] = MS::Spectrum.from_base64_peaks(node.content, node['precision'].to_i)
84
- end
85
- end
86
- end
87
- scan[5] = prec
88
- scan
89
- end
90
-
91
-
92
- # returns an array of msrun objects
93
- def msruns(file)
94
- raise NotImplementedError
95
- end
96
-
97
- # right now cannot parse multiple runs out of an mzXML version 2 file since
98
- # this is built around a single run per file
99
- # OPTIONS:
100
- # :msrun => (an MSRun object) # use this object instead of creating one
101
- # :lazy => [See MS::MSRun for documentation]
102
- def msrun(file, opts={})
103
- #unless opts.key?(:spectra)
104
- # opts[:spectra] = true
105
- #end
106
-
107
- msrun_obj =
108
- if x = opts[:msrun]
109
- msrun_obj = x
110
- else
111
- MS::MSRun.new
112
- end
113
-
114
- io =
115
- if file.is_a? String # a filename
116
- filename = file
117
- File.open(file)
118
- else
119
- file
120
- end
121
-
122
- root = get_root_node_from_io(io)
123
-
124
- if filename
125
- io.close # can close now
126
- end
127
-
128
- # right now we are only finding the first msRun (probably a rare case of
129
- # multiple runs in an mzXML file...)
130
- msrun_n =
131
- if @version >= '2.0'
132
- kids = root.children.select {|v| v.name == 'msRun' }
133
- raise(NotImplementedError, "one msrun per doc right now" ) if kids.size > 1
134
- kids.first
135
- else
136
- root
137
- end
138
- if msrun_n.name != 'msRun'
139
- raise RuntimeError, "extra node slipped in somehow"
140
- end
141
-
142
- ## HEADER
143
- scan_count = msrun_n['scanCount'].to_i
144
- msrun_obj.scan_count = scan_count
145
- scans_by_num = Array.new(scan_count + 1)
146
-
147
- ## SPECTRUM
148
- parent = nil
149
- scans = Array.new( scan_count )
150
- scn_index = 0
151
-
152
- # we should be able to do this, but it's not working!!!
153
- #scan_n = msrun_n.find_first('scan')
154
- #while (scn_index < scan_count)
155
- lazy = opts[:lazy]
156
-
157
- if @version >= '3.0'
158
- warn '[version 3.0 parsing may fail if > 1 peak list per scan]'
159
- # note that mzXML version 3.0 *can* have more than one peak...
160
- # I'm not sure how to deal with that since I have one spectrum/scan
161
- end
162
-
163
- scan_nodes = msrun_n.find('child::scan')
164
- add_scan_nodes(scan_nodes, scans, scn_index, scans_by_num, lazy, io)
165
-
166
- ## update the scan's parents
167
- MS::MSRun.add_parent_scan(scans)
168
-
169
- # note that startTime and endTime are optional AND in >2.2 are dateTime
170
- # instead of duration types!, so we will just use scan times...
171
- # Also, note that startTime and endTime are BROKEN on readw -> mzXML 2.0
172
- # export. They give the start and end time in seconds, but they are
173
- # really minutes. All the more reason to use the first and last scans!
174
- msrun_obj.start_time = scans.first.time
175
- msrun_obj.end_time = scans.last.time
176
-
177
- msrun_obj.scans = scans
178
-
179
- end
180
- end
181
-
182
-