mspire 0.4.9 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,108 +0,0 @@
1
- require 'ms/scan'
2
- require 'ms/parser'
3
-
4
- class MS::MSRunIndex
5
- # basename_noext is the base name of the file (with NO extensions)
6
- attr_accessor :scans_by_num
7
- attr_reader :basename_noext
8
-
9
- # identifies and removes .mzXML .mzXML.timeIndex and .timeIndex
10
- # otherwise, removes one extension and that's the filename_noext
11
- # Also, removes any leading path
12
- def basename_noext=(filename)
13
- ext = File.extname(filename)
14
- basename = File.basename(filename)
15
- case ext
16
- when '.mzXML'
17
- @basename_noext = basename.gsub(/\.mzXML$/, "")
18
- when '.timeIndex'
19
- @basename_noext = basename.gsub(/\.timeIndex$/, "")
20
- if File.extname(@basename_noext) == ".mzXML"
21
- @basename_noext.gsub!(/\.mzXML$/, "")
22
- end
23
- else
24
- @basename_noext = basename.gsub(/#{Regexp.escape(ext)}/, "")
25
- end
26
- end
27
-
28
- # index_file has one row for each scan:
29
- # ms_level scan_num time [prec_mz prec_inten]
30
- # also consider getting this data directly from the mzXML file
31
- # via the MS::MzXML::Parser.get_msrun_index command
32
- def set_from_index_file(index_file)
33
- self.basename_noext = index_file
34
- @scans_by_num = []
35
- if index_file
36
- File.open(index_file).each do |line|
37
- next if line !~ /\d/ || line =~ /^#/
38
- line.chomp!
39
- arr = line.split(" ")
40
- scan = MS::Scan.new(arr[1].to_i, arr[0].to_i, arr[2].to_f)
41
- if scan.ms_level > 1
42
- scan.prec_mz = arr[3].to_f
43
- scan.prec_inten = arr[4].to_f
44
- end
45
- @scans_by_num[scan.num] = scan
46
- end
47
- end
48
- MS::Scan.add_parent_scan(@scans_by_num)
49
- end
50
-
51
- # Takes a .mzXML file or .timeIndex file (currently)
52
- # and creates an index of scans from it
53
- def initialize(file=nil)
54
- @scans_by_num = []
55
- if file
56
- ext = File.extname(file)
57
- case ext
58
- when '.mzXML'
59
- set_from_mzxml(file)
60
- when '.timeIndex'
61
- set_from_index_file(file)
62
- else
63
- raise ArgumentError, "#{self.class}.new doesn't recognize files of extension: #{ext}"
64
- end
65
- end
66
- end
67
-
68
-
69
- # returns a new
70
- def set_from_mzxml(file)
71
- self.basename_noext = file
72
- @scans_by_num = MS::Parser.new(file, :scans_by_num).parse(file)
73
- end
74
-
75
- # writes the index to filename
76
- # each line:
77
- # ms_level scan_num time (if !ms_level=1) { prec_mz prec_intensity)
78
- def to_index_file(filename)
79
- strings = []
80
- @scans_by_num.each do |scan|
81
- if scan
82
- strings << scan.to_index_file_string
83
- end
84
- end
85
- File.open(filename, "w") do |fh|
86
- fh.print strings.join("\n")
87
- end
88
- end
89
-
90
- # returns an array of the times of the precursor scan's parent (not its own
91
- # acquisition time). The parent scan index will also retrieve the time of
92
- # the parent scan.
93
- def parent_times_by_scan_num
94
- by_num = []
95
- parent_time = nil
96
- @scans_by_num.each_with_index do |scan,i|
97
- if scan.ms_level == 1
98
- parent_time = scan.time
99
- end
100
- by_num[i] = parent_time
101
- end
102
- by_num
103
- end
104
-
105
- end
106
-
107
-
108
-
@@ -1,67 +0,0 @@
1
- require 'ms/parser/mzdata/dom'
2
-
3
- class MS::Parser::MzData::AXML < MS::Parser::MzData::DOM
4
- def get_root_node_from_file(file)
5
- ::AXML.parse_file(file)
6
- end
7
- def get_root_node_from_io(io)
8
- ::AXML.parse(io)
9
- end
10
- end
11
-
12
- class MS::Parser::MzData::AXML::LazyData < MS::Parser::MzData::AXML
13
- def get_root_node_from_string(string)
14
- ::AXML::LazyData.parse(string)
15
- end
16
- def get_root_node_from_file(file)
17
- ::AXML::LazyData.parse_file(file)
18
- end
19
- def get_root_node_from_io(io)
20
- ::AXML::LazyData.parse(io)
21
- end
22
- end
23
-
24
- class AXML::LazyData < AXML
25
- # Returns the root node (as Element) or nodes (as Array)
26
- def self.parse(stream)
27
- parser = ::AXML::XMLParser::LazyData.new
28
- parser.parse(stream)
29
- parser.root
30
- end
31
- end
32
-
33
- # This parser stores information about where the data (peaks) information is
34
- # in the file
35
- # The content of the data node is an array where the first member is the
36
- # start index and the last member is the number of bytes. All other members
37
- # should be ignored.
38
- class AXML::XMLParser::LazyData < ::AXML::XMLParser
39
-
40
- def startElement(name, attributes)
41
- text =
42
- if name == 'data' ; []
43
- else ; ''
44
- end
45
- new_el = ::AXML::El.new(@cur, name, attributes, text, [])
46
- # add the new node to the previous parent node
47
- @cur.add_node(new_el)
48
- # notice the change in @cur node
49
- @cur = new_el
50
- end
51
-
52
- def character(data)
53
- if @cur.text.is_a? Array
54
- @cur.text << byteIndex
55
- else
56
- @cur.text << data
57
- end
58
- end
59
-
60
- def endElement(name)
61
- if @cur.text.is_a? Array
62
- @cur.text << (byteIndex - @cur.text.first)
63
- end
64
- @cur = @cur.parent
65
- end
66
-
67
- end
@@ -1,175 +0,0 @@
1
- require 'xml_style_parser'
2
- require 'ms/spectrum'
3
- require 'ms/scan'
4
-
5
- module MS::Parser::MzData ; end
6
-
7
- class MS::Parser::MzData::DOM
8
- include XMLStyleParser
9
- include MS::Parser::MzData
10
-
11
- def initialize(parse_type=:msrun, version='1.0')
12
- @method = parse_type
13
- @version = version
14
- end
15
-
16
- # true if there is a node <dataProcessing><software><name>Bioworks Browser</...>
17
- # otherwise false
18
- def is_bioworks33?(description_node)
19
- begin
20
- software_node = description_node.find_first('child::dataProcessing').find_first('child::software')
21
- name = software_node.find_first('child::name').content
22
- version = software_node.find_first('child::version').content
23
- ((name == 'Bioworks Browser') and (version == '3.3'))
24
- rescue
25
- false
26
- end
27
- end
28
-
29
- # OPTIONS:
30
- # :msrun => MSRun # use this object instead of creating one
31
- def msrun(file, opts={})
32
- msrun_obj =
33
- if x = opts[:msrun]
34
- msrun_obj = x
35
- else
36
- MS::MSRun.new
37
- end
38
- # should ensure that parsing is not counting spaces...
39
-
40
- # a string we'd parse like this:
41
- # doc = XML::Parser.string(st).parse
42
-
43
- # WE NEED TO GET scan_count, start_time and end_time!!!!
44
- id_to_scan_hash = {}
45
-
46
- # 0 1 2 3 4 5 6
47
- # %w(num msLevel retentionTime startMz endMz precursor spectrum)
48
-
49
- io =
50
- if file.is_a? String
51
- filename = file
52
- File.open(file)
53
- else
54
- file
55
- end
56
- root = get_root_node_from_io(io)
57
-
58
-
59
- description = root.find_first('child::description')
60
- bioworks33 = is_bioworks33?(description)
61
- spectrum_list = description.next
62
-
63
- scans = []
64
-
65
- # bioworks 33 gives incorrect scan count
66
- stated_num_scans = spectrum_list['count'].to_i
67
-
68
- # if I move from node to node, it means I've checked that it's a sequence
69
- # and that the elements are req'd
70
- if spectrum_list.child?
71
- spectrum_n = spectrum_list.child
72
- loop do
73
- scan = MS::Scan.new(9)
74
- id = spectrum_n["id"].to_i
75
- id_to_scan_hash[id] = scan
76
- spec_desc_n = spectrum_n.child # required in sequence
77
- spec_settings_n = spec_desc_n.child # required in sequence
78
- if acq_n = spec_settings_n.find_first('descendant::acquisition')
79
- scan[0] = acq_n['acqNumber'].to_i
80
- else
81
- scan[0] = id
82
- end
83
- spec_inst_n = spec_settings_n.find_first('child::spectrumInstrument')
84
- scan[1] = spec_inst_n['msLevel'].to_i
85
-
86
- # we could use a scan_count, but in bioworks 33, we can't trust the
87
- # scan count! So, we just collect them
88
- scans << scan
89
-
90
- scan[3] = spec_inst_n['mzRangeStart'].to_f
91
- scan[4] = spec_inst_n['mzRangeStop'].to_f
92
- spec_inst_n.find('child::cvParam').each do |cv_param|
93
- if cv_param['name'] == 'TimeInMinutes'
94
- scan[2] = cv_param['value'].to_f * 60 #convert to seconds
95
- end
96
- end
97
- if scan[1] > 1 # precursormz info
98
- prec_list_n = spec_settings_n.next
99
- raise RuntimeError, "MSRun objects can only accept 1 precursor" if prec_list_n['count'] != '1'
100
- prec_n = prec_list_n.find_first('child::precursor')
101
- # %w(mz inten parent ms_level parent charge_states)
102
- prec = MS::Precursor.new
103
- unless bioworks33 # bioworks33 points to the wrong scan!!!
104
- prec[2] = id_to_scan_hash[prec_n['spectrumRef'].to_i]
105
- end
106
- # we're not keeping track of this guy anymore
107
- # prec[3] = prec_n['msLevel'].to_i
108
- charges = []
109
- prec_n.find('descendant::cvParam').each do |cv_param_n|
110
- case cv_param_n['name']
111
- when 'MassToChargeRatio'
112
- prec[0] = cv_param_n['value'].to_f
113
- # find the prec intensity
114
- unless bioworks33
115
- prec[1] = prec[2].spectrum.intensity_at_mz(prec[0])
116
- end
117
- when 'ChargeState'
118
- charges << cv_param_n['value'].to_i
119
- end
120
- end
121
- prec[3] = charges
122
- scan[5] = prec
123
- else # no precursors
124
- scan[5] = nil
125
- end
126
- # here's the one line way of doing it, but it's probably more clear in
127
- # the loop
128
- #while ((mz_array_bin_n = spec_desc_n.next).name != 'mzArrayBinary') do
129
- unless opts[:lazy] == :no_spectra
130
- mz_array_bin_n = nil
131
- loop do
132
- mz_array_bin_n = spec_desc_n.next
133
- break if mz_array_bin_n.name == 'mzArrayBinary'
134
- end
135
- mz_data_n = mz_array_bin_n.child
136
- inten_array_bin_n = mz_array_bin_n.next
137
- inten_data_n = inten_array_bin_n.child
138
- case opts[:lazy]
139
- when :string
140
- scan[6] = MS::Spectrum::LazyString.from_base64_pair(mz_data_n.content, mz_data_n['precision'].to_i, ((mz_data_n['endian']=='little') ? false : true), inten_data_n.content, inten_data_n['precision'].to_i, ((inten_data_n['endian']=='little') ? false : true) )
141
- when :io
142
- mz_data_n_content = mz_data_n.content
143
- i_data_n_content = inten_data_n.content
144
- scan[6] = MS::Spectrum::LazyIO.new(io, mz_data_n_content.first, mz_data_n_content.last, mz_data_n['precision'].to_i, ((mz_data_n['endian']=='little') ? false : true), i_data_n_content.first, i_data_n_content.last, inten_data_n['precision'].to_i, ((inten_data_n['endian']=='little') ? false : true))
145
- when :not
146
- mz = MS::Spectrum.base64_to_array(mz_data_n.content, mz_data_n['precision'].to_i, ((mz_data_n['endian']=='little') ? false : true))
147
- inten = MS::Spectrum.base64_to_array(inten_data_n.content, inten_data_n['precision'].to_i, ((inten_data_n['endian']=='little') ? false : true))
148
- scan[6] = MS::Spectrum.new(mz, inten)
149
- end
150
- end
151
-
152
- # set up the next loop
153
- break unless spectrum_n = spectrum_n.next
154
- end
155
- end
156
- if bioworks33
157
- MS::MSRun.add_parent_scan(scans, ((opts[:lazy] == :not) ? true : false))
158
- end
159
- msrun_obj.scans = scans
160
- msrun_obj.scan_count = scans.size
161
- unless bioworks33 # we know the scan count is off here
162
- if msrun_obj.scan_count != stated_num_scans
163
- warn "num collected scans (#{scans.size}) does not agree with stated num scans (#{stated_num_scans})!"
164
- end
165
- end
166
- msrun_obj.start_time = msrun_obj.scans.first.time
167
- msrun_obj.end_time = msrun_obj.scans.last.time
168
-
169
- io.close if filename
170
- end
171
-
172
- end
173
-
174
-
175
-
@@ -1,7 +0,0 @@
1
-
2
- class MS::Parser::MzData::LibXML < MS::Parser::MzData::DOM
3
- def get_root_node_from_file(file)
4
- XML::Document.file(file).root
5
- end
6
- end
7
-
@@ -1,31 +0,0 @@
1
- require 'ms/msrun'
2
-
3
- module MS; end
4
-
5
- module MS::Parser::MzData
6
- Base_dir_for_parsers = 'ms/parser/mzdata'
7
-
8
- # inherits XMLStyleParser and version
9
- include MS::Parser
10
- include XMLStyleParser
11
-
12
- # returns a specific parser MS::Parser::MzXML::#{ParserType}
13
- # based on choose_parser from xml_style_parser
14
- def self.new(parse_type=:msrun, version='1.05', opts={})
15
- special_subclass =
16
- if opts[:lazy] == :io
17
- 'LazyData'
18
- else ; nil
19
- end
20
-
21
- @version = version
22
- @method = parse_type
23
- #p self.methods.grep /choose_parser/
24
- XMLStyleParser.require_parse_files(Base_dir_for_parsers)
25
- parser_class = XMLStyleParser.choose_parser(self, parse_type, special_subclass)
26
- parser = parser_class.new(parse_type, version)
27
- end
28
-
29
- end
30
-
31
-
@@ -1,70 +0,0 @@
1
- require 'ms/parser/mzxml/dom'
2
-
3
- class MS::Parser::MzXML::AXML < MS::Parser::MzXML::DOM
4
- def get_root_node_from_string(string)
5
- ::AXML.parse(string)
6
- end
7
- def get_root_node_from_file(file)
8
- ::AXML.parse_file(file)
9
- end
10
- def get_root_node_from_io(io)
11
- ::AXML.parse(io)
12
- end
13
- end
14
-
15
- class MS::Parser::MzXML::AXML::LazyPeaks < MS::Parser::MzXML::AXML
16
- def get_root_node_from_string(string)
17
- ::AXML::LazyPeaks.parse(string)
18
- end
19
- def get_root_node_from_file(file)
20
- ::AXML::LazyPeaks.parse_file(file)
21
- end
22
- def get_root_node_from_io(io)
23
- ::AXML::LazyPeaks.parse(io)
24
- end
25
- end
26
-
27
- class AXML::LazyPeaks < AXML
28
- # Returns the root node (as Element) or nodes (as Array)
29
- def self.parse(stream)
30
- parser = ::AXML::XMLParser::LazyPeaks.new
31
- parser.parse(stream)
32
- parser.root
33
- end
34
- end
35
-
36
- # This parser stores information about where the peaks information is in the
37
- # file
38
- # The content of the peaks node is an array where the first member is the
39
- # start index and the last member is the number of bytes. All other members
40
- # should be ignored.
41
- class AXML::XMLParser::LazyPeaks < ::AXML::XMLParser
42
-
43
- def startElement(name, attributes)
44
- text =
45
- if name == 'peaks' ; []
46
- else ; ''
47
- end
48
- new_el = ::AXML::El.new(@cur, name, attributes, text, [])
49
- # add the new node to the previous parent node
50
- @cur.add_node(new_el)
51
- # notice the change in @cur node
52
- @cur = new_el
53
- end
54
-
55
- def character(data)
56
- if @cur.text.is_a? Array
57
- @cur.text << byteIndex
58
- else
59
- @cur.text << data
60
- end
61
- end
62
-
63
- def endElement(name)
64
- if @cur.text.is_a? Array
65
- @cur.text << (byteIndex - @cur.text.first)
66
- end
67
- @cur = @cur.parent
68
- end
69
-
70
- end
@@ -1,182 +0,0 @@
1
- require 'xml_style_parser'
2
- require 'ms/spectrum'
3
- require 'ms/scan'
4
- require 'ms/parser/mzxml'
5
- require 'tempfile'
6
-
7
-
8
- class MS::Parser::MzXML::DOM
9
- include XMLStyleParser
10
- include MS::Parser::MzXML
11
-
12
- NetworkOrder = true
13
-
14
- #@@scan_atts = %w(num msLevel retentionTime startMz endMz precursor spectrum)
15
-
16
- def initialize(parse_type=:msrun, version='1.0')
17
- @method = parse_type
18
- @version = version
19
- end
20
-
21
- def new_scan_from_hash(node)
22
- scan = MS::Scan.new # array class creates one with 9 positions
23
- scan[0] = node['num'].to_i
24
- scan[1] = node['msLevel'].to_i
25
- if x = node['retentionTime']
26
- scan[2] = x[2...-1].to_f
27
- end
28
- if x = node['startMz']
29
- scan[3] = x.to_f
30
- scan[4] = node['endMz'].to_f
31
- end
32
- scan
33
- end
34
-
35
- # assumes that node contains scans and checks any scan nodes for children
36
- def add_scan_nodes(nodes, scans, scn_index, scans_by_num, lazy, io)
37
- nodes.each do |scan_n|
38
- scan = create_scan(scan_n, scans_by_num, lazy, io)
39
- scans[scn_index] = scan
40
- scans_by_num[scan[0]] = scan
41
- scn_index += 1
42
- if @version > '1.0'
43
- new_nodes = scan_n.find('child::scan')
44
- if new_nodes.size > 0
45
- scn_index = add_scan_nodes(new_nodes, scans, scn_index, scans_by_num, lazy, io)
46
- end
47
- end
48
- end
49
- scn_index
50
- end
51
-
52
- # takes a scan node and creates a scan object
53
- # the parent scan is the one directly above it in mslevel
54
- # lazy must be a symbol from MS::MSRun.new
55
- def create_scan(scan_n, scans_by_num, lazy, io=nil)
56
- scan = new_scan_from_hash(scan_n)
57
- prec = nil
58
- scan_n.each do |node|
59
- case node.name
60
- when 'precursorMz'
61
- # should be able to do this!!!
62
- #scan[5] = scan_n.find('child::precursorMz').map do |prec_n|
63
- raise RuntimeError, "the msrun object can only handle one precursor!" unless prec.nil?
64
- prec = MS::Precursor.new
65
- prec[1] = node['precursorIntensity'].to_f
66
- prec[0] = node.content.to_f
67
- if x = node['precursorScanNum']
68
- prec[2] = scans_by_num[x.to_i]
69
- end
70
- when 'peaks'
71
- case lazy
72
- when :no_spectra
73
- next
74
- when :string
75
- scan[6] = MS::Spectrum::LazyString.from_base64_peaks(node.content, node['precision'].to_i)
76
- when :io
77
- # assumes that parsing was done with a LazyPeaks parser!
78
- nc = node.content
79
- scan[6] = MS::Spectrum::LazyIO.new(io, nc.first, nc.last, node['precision'].to_i, MS::Parser::MzXML::DOM::NetworkOrder)
80
- when :not
81
- # SHOULD be able to do this!!
82
- #peaks_n = scan_n.find_first('child::peaks')
83
- scan[6] = MS::Spectrum.from_base64_peaks(node.content, node['precision'].to_i)
84
- end
85
- end
86
- end
87
- scan[5] = prec
88
- scan
89
- end
90
-
91
-
92
- # returns an array of msrun objects
93
- def msruns(file)
94
- raise NotImplementedError
95
- end
96
-
97
- # right now cannot parse multiple runs out of an mzXML version 2 file since
98
- # this is built around a single run per file
99
- # OPTIONS:
100
- # :msrun => (an MSRun object) # use this object instead of creating one
101
- # :lazy => [See MS::MSRun for documentation]
102
- def msrun(file, opts={})
103
- #unless opts.key?(:spectra)
104
- # opts[:spectra] = true
105
- #end
106
-
107
- msrun_obj =
108
- if x = opts[:msrun]
109
- msrun_obj = x
110
- else
111
- MS::MSRun.new
112
- end
113
-
114
- io =
115
- if file.is_a? String # a filename
116
- filename = file
117
- File.open(file)
118
- else
119
- file
120
- end
121
-
122
- root = get_root_node_from_io(io)
123
-
124
- if filename
125
- io.close # can close now
126
- end
127
-
128
- # right now we are only finding the first msRun (probably a rare case of
129
- # multiple runs in an mzXML file...)
130
- msrun_n =
131
- if @version >= '2.0'
132
- kids = root.children.select {|v| v.name == 'msRun' }
133
- raise(NotImplementedError, "one msrun per doc right now" ) if kids.size > 1
134
- kids.first
135
- else
136
- root
137
- end
138
- if msrun_n.name != 'msRun'
139
- raise RuntimeError, "extra node slipped in somehow"
140
- end
141
-
142
- ## HEADER
143
- scan_count = msrun_n['scanCount'].to_i
144
- msrun_obj.scan_count = scan_count
145
- scans_by_num = Array.new(scan_count + 1)
146
-
147
- ## SPECTRUM
148
- parent = nil
149
- scans = Array.new( scan_count )
150
- scn_index = 0
151
-
152
- # we should be able to do this, but it's not working!!!
153
- #scan_n = msrun_n.find_first('scan')
154
- #while (scn_index < scan_count)
155
- lazy = opts[:lazy]
156
-
157
- if @version >= '3.0'
158
- warn '[version 3.0 parsing may fail if > 1 peak list per scan]'
159
- # note that mzXML version 3.0 *can* have more than one peak...
160
- # I'm not sure how to deal with that since I have one spectrum/scan
161
- end
162
-
163
- scan_nodes = msrun_n.find('child::scan')
164
- add_scan_nodes(scan_nodes, scans, scn_index, scans_by_num, lazy, io)
165
-
166
- ## update the scan's parents
167
- MS::MSRun.add_parent_scan(scans)
168
-
169
- # note that startTime and endTime are optional AND in >2.2 are dateTime
170
- # instead of duration types!, so we will just use scan times...
171
- # Also, note that startTime and endTime are BROKEN on readw -> mzXML 2.0
172
- # export. They give the start and end time in seconds, but they are
173
- # really minutes. All the more reason to use the first and last scans!
174
- msrun_obj.start_time = scans.first.time
175
- msrun_obj.end_time = scans.last.time
176
-
177
- msrun_obj.scans = scans
178
-
179
- end
180
- end
181
-
182
-