mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,282 +0,0 @@
1
- require 'ms/msrun'
2
- require 'fileutils'
3
-
4
- module MS; end
5
-
6
- module MS::Parser::MzXML
7
- Base_dir_for_parsers = 'ms/parser/mzxml'
8
- # inherits XMLStyleParser and version
9
- include MS::Parser
10
- include XMLStyleParser
11
-
12
- # warning: clobbers file unless a newfilename is provided!
13
- # returns the output filename
14
- # will fix any size file!
15
- def self.fix_bad_scan_tags(filename, newfilename=nil)
16
-
17
- out_io =
18
- if newfilename
19
- File.open(newfilename, 'w')
20
- else
21
- Tempfile.new(File.basename(filename))
22
- end
23
- File.open(filename) do |fh|
24
- self.fix_bad_scan_tags_from_io(fh, out_io)
25
- end
26
- out_io.close
27
- unless newfilename
28
- FileUtils.mv out_io.path, filename
29
- end
30
- end
31
-
32
- # this is a memory efficient method to fix bad scan tags
33
- # prints cleaned up file to out_io
34
- # no effort is made to rewind the io objects, the user must do this if they
35
- # plan to continue using these objects!
36
- def self.fix_bad_scan_tags_from_io(io, out_io)
37
- regexp = /<\/scan>/
38
- end_scan_line = false
39
-
40
- io.each("\n") do |line|
41
- if end_scan_line && line =~ regexp
42
- # two end scan lines! # don't print to out_io
43
- end_scan_line = true
44
- elsif line =~ regexp
45
- out_io.print(line)
46
- end_scan_line = true
47
- else
48
- out_io.print(line)
49
- end_scan_line = false
50
- end
51
- end
52
- end
53
-
54
- # returns a string with double </scan></scan> tags into single and missing
55
- # </scan> tags after peaks added in
56
- # we do this in windows style since these are generated off a windows
57
- # machine only
58
- #def self.fix_bad_scan_tags(string)
59
- # string.gsub(/<\/scan>\s+<\/scan>/m, '</scan>').gsub(/<\/peaks>\s+<scan/m, "</peaks>\r\n </scan>\r\n <scan")
60
- #end
61
-
62
- # returns true if it has the bad tag
63
- def self.has_bad_scan_tag_from_string?(string)
64
- if string.match(/<\/scan>\s+<\/scan>/m)
65
- true
66
- else
67
- false
68
- end
69
- end
70
-
71
- def self.has_bad_scan_tag?(filename)
72
- File.open(filename) do |fh|
73
- self.has_bad_scan_tag_from_io?(fh)
74
- end
75
- end
76
-
77
- # very efficient algorithm to check for malformed xml typical of readw
78
- # output. The extra closing scan tags come after the last ms/ms scan in a
79
- # cycle rewinds the io after looking
80
- def self.has_bad_scan_tag_from_io?(io)
81
- seen_first_ms_level = false
82
- seen_higher_ms_level = false
83
- cur_ms_level = 0
84
- found_double_end_tag = false
85
- found_end_tag = false
86
- io.each("\n") do |line|
87
- if line =~ /<\/scan>/
88
- if found_end_tag # already found one!
89
- found_double_end_tag = true
90
- break
91
- end
92
- found_end_tag = true
93
- else
94
- found_end_tag = false
95
- end
96
-
97
- if line =~ /msLevel="(\d+)"/
98
- cur_ms_level = $1.dup
99
- if seen_first_ms_level && seen_higher_ms_level && cur_ms_level == '1'
100
- break
101
- end
102
- if cur_ms_level == '1'
103
- seen_first_ms_level = true
104
- elsif cur_ms_level == '2'
105
- seen_higher_ms_level = true
106
- end
107
- end
108
- end
109
- io.rewind
110
- found_double_end_tag
111
- end
112
-
113
- # returns a specific parser MS::Parser::MzXML::#{ParserType}
114
- # based on choose_parser from xml_style_parser
115
- def self.new(parse_type=:msrun, version='1.0', opts={})
116
- special_subclass =
117
- if opts[:lazy] == :io
118
- 'LazyPeaks'
119
- else ; nil
120
- end
121
- @version = version
122
- @method = parse_type
123
- XMLStyleParser.require_parse_files(Base_dir_for_parsers)
124
- parser_class = XMLStyleParser.choose_parser(self, parse_type, special_subclass)
125
- parser = parser_class.new(parse_type, version)
126
- end
127
-
128
- # Returns an array of scans indexed by scan number
129
- # NOTE that the first scan (zero indexed) will likely be nil!
130
- # accepts an optional parse_type = 'xmlparser' | 'rexml'
131
- def scans_by_num(mzXML_file, parse_type=nil)
132
- unless parse_type
133
- parse_type = default_parser
134
- end
135
- scans = []
136
- case parse_type
137
- when 'xmlparser'
138
- parser = MS::MzXML::XMLParser::TimeMzIntenIndexer.new
139
- parser.parse(IO.read(mzXML_file))
140
- scans = parser.scans_by_num
141
- when 'rexml' # use REXML
142
- # This is really too slow for files of this size
143
- doc = REXML::Document.new File.new(mzXML_file)
144
- doc.elements.each('msRun/scan') do |scan|
145
- rt = scan.attributes['retentionTime'] ## like PT0.154000S"
146
- level = scan.attributes['msLevel']
147
- to_print = []
148
- prec_mz = nil
149
- prec_int = nil
150
- if level.to_i != 1
151
- scan.elements.each("precursorMz") do |prec|
152
- prec_mz = prec.text.to_f
153
- prec_int = prec.attributes["precursorIntensity"].to_f
154
- end
155
- end
156
- # remove the leading PT and trailing S on the retention time!
157
- rt = rt[2...-1]
158
-
159
- num = scan.attributes['num'].to_i
160
- scans[num] = MS::Scan.new(num, scan.attributes['msLevel'].to_i, rt.to_f, prec_mz, prec_int)
161
- end #doc.elements
162
- else
163
- throw ArgumentError, "invalid parse type: #{parse_type}"
164
- end
165
- ## update the scans for parents
166
- MS::Scan.add_parent_scan(scans)
167
- scans
168
- end
169
-
170
- # Returns a Hash indexed by filename (with no extension) for a given path
171
- # extension = glob (string) or regex
172
- # The basename is given as: file.split('.').first
173
- def precursor_mz_by_scan_for_path(path, extension, parse_type=nil)
174
- hash = {}
175
- Dir.chdir path do
176
- files = []
177
- if extension.class == String
178
- files = Dir[extension]
179
- elsif extension.class == Regexp
180
- files = Dir.entries(".").find_all do |dir|
181
- dir =~ extension
182
- end
183
- else
184
- puts "extension: #{extension} not a String or Regexp!"
185
- end
186
- files.each do |file|
187
- base = file.split('.').first
188
- hash[base] = precursor_mz_by_scan(file, parse_type)
189
- end
190
- end
191
- hash
192
- end
193
-
194
- # Returns hash where hash[scan_num] = [precursorMz, precursorIntensity]
195
- # Parent scans are not hashed
196
- # Keys and values are both strings
197
- def precursor_mz_and_inten_by_scan(file)
198
- # in progress
199
- end
200
-
201
- # Returns array where array[scan_num] = precursorMz
202
- # precursorMz are Floats
203
- # Array index likely starts at 1!
204
- def precursor_mz_by_scan_num(file)
205
- ## THIS SHOULD BE CREATED IN specific XML LIBS
206
- end
207
-
208
- # Returns a hash of basic info on an mzXML run:
209
- # *mzXML_elemt* *hash keys (symbols)*
210
- # scanCount scan_count
211
- # startTime start_time
212
- # endTime end_time
213
- # startMz start_mz
214
- # endMz end_mz
215
- def basic_info(mzxml_file)
216
- puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}" if $VERBOSE
217
- hash = {}
218
- scan_count_tmp = []
219
- (1..5).to_a.each do |n| scan_count_tmp[n] = 0 end
220
- @fh = File.open(mzxml_file)
221
- @line = ""
222
- scan_count_tmp[0] = _el("scanCount").to_i
223
- hash[:start_time] = _el("startTime").sub(/^PT/, "").sub(/S$/,"").to_f
224
- hash[:end_time] = _el("endTime").sub(/^PT/, "").sub(/S$/,"").to_f
225
- hash[:ms_level] = _el("msLevel").to_i
226
- scan_count_tmp[1] = 1
227
- if hash[:ms_level] == 1
228
- hash[:start_mz] = _el("startMz").to_f
229
- hash[:end_mz] = _el("endMz").to_f
230
- end
231
-
232
- while !@fh.eof?
233
- @line = @fh.readline
234
- ms_level = _el("msLevel")
235
- if ms_level
236
- scan_count_tmp[ms_level.to_i] += 1
237
- else
238
- break
239
- end
240
- end
241
- scan_count = []
242
- scan_count_tmp.each do |cnt|
243
- if cnt != 0
244
- scan_count.push cnt
245
- else
246
- break
247
- end
248
- end
249
- hash[:scan_count] = scan_count
250
- @fh.close
251
- hash
252
- end
253
-
254
- # returns [start_mz, end_mz] of the first full scan (ms_level == 1)
255
- def start_and_end_mz(mzxml_file)
256
- @fh = File.open(mzxml_file)
257
- ms_level = 0
258
- @line = ""
259
- while ms_level != 1
260
- ms_level = _el("msLevel").to_i
261
- end
262
- start_mz = _el("startMz").to_f
263
- end_mz = _el("endMz").to_f
264
- @fh.close
265
- [start_mz, end_mz]
266
- end
267
-
268
- def _el(name)
269
- re = /#{name}="(.*)"/
270
- while @line !~ re && !@fh.eof?
271
- @line = @fh.readline
272
- end
273
- if $1
274
- return $1.dup
275
- else
276
- return nil
277
- end
278
- end
279
-
280
- end
281
-
282
-
data/lib/ms/parser.rb DELETED
@@ -1,108 +0,0 @@
1
- require 'xml_style_parser'
2
-
3
- module MS; end
4
-
5
- module MS::Parser
6
- # inherits attr_accessor :method, :default_parser, and parse (which should
7
- # be overridden)
8
- include XMLStyleParser
9
-
10
- Mzxml_regexp = /http:\/\/sashimi.sourceforge.net\/schema(_revision)?\/([\w\d_\.]+)/o
11
- # 'http://sashimi.sourceforge.net/schema/MsXML.xsd' # version 1
12
- # 'http://sashimi.sourceforge.net/schema_revision/mzXML_X.X' # others
13
- Mzdata_regexp = /<mzData.*version="([\d\.]+)"/m
14
-
15
- attr_accessor :version
16
-
17
- ############################################
18
- # POINTERS (to create META MAGIC)
19
- ############################################
20
-
21
- @@filetypes_to_upcase = {
22
- :mzxml => 'MzXML',
23
- :mzdata => 'MzData',
24
- :mzml => 'MzML',
25
- :raw => 'Raw',
26
- }
27
-
28
- @@filetypes_to_require = {}
29
- @@filetypes_to_constant = {}
30
-
31
- abbrevs = Dir.chdir(File.dirname(__FILE__) + "/parser") do
32
- Dir["*.rb"].map {|f| f.sub(/\.rb$/,'') }
33
- end
34
- abbrevs.each do |abbr|
35
- abb = abbr.to_sym
36
- req = ['ms', 'parser', abbr].join("/")
37
- @@filetypes_to_require[abb] = req
38
- @@filetypes_to_constant[abb] = ['MS', 'Parser', @@filetypes_to_upcase[abb]].join("::")
39
- end
40
-
41
- ############################################
42
- # END POINTERS
43
- ############################################
44
-
45
- # finds the filetype of a file (expects to be at the beginning) and rewinds
46
- # the filehandle to the beginning returns [filetype, version]. nil if
47
- # filetype and version could not be determined
48
- def self.filetype_and_version(fh_or_filename)
49
- if fh_or_filename.is_a? IO
50
- fh = fh_or_filename
51
- found = nil
52
- # Test for RAW file:
53
- header = fh.read(18).unpack('@2axaxaxaxaxaxaxa').join
54
- if header == 'Finnigan'
55
- return [:raw, nil]
56
- end
57
- fh.rewind
58
- while (line = fh.gets)
59
- found =
60
- case line
61
- when Mzxml_regexp
62
- mtch = $2.dup
63
- case mtch
64
- when /mzXML_([\d\.]+)/
65
- [:mzxml, $1.dup]
66
- when /MsXML/
67
- [:mzxml, '1.0']
68
- else
69
- abort "Cannot determine mzXML version!"
70
- end
71
- when Mzdata_regexp
72
- [:mzdata, $1.dup]
73
- end
74
- if found
75
- break
76
- end
77
- end
78
- fh.rewind
79
- found
80
- else
81
- File.open(fh_or_filename) do |fh|
82
- filetype_and_version(fh)
83
- end
84
- end
85
- end
86
-
87
- # filetype_version is an example file to parse, or it is an array: [type, version].
88
- # parse_type is the information to be gleaned (as symbol).
89
- def self.new(filetype_version, parse_type, opts={})
90
- unless filetype_version.is_a? Array
91
- filetype_version = filetype_and_version(filetype_version)
92
- end
93
- require_and_create_parser(filetype_version, parse_type, opts)
94
- end
95
-
96
- private
97
-
98
- # returns a working parser.
99
- def self.require_and_create_parser(filetype_version, parse_type, opts)
100
- (filetype, version) = filetype_version
101
- #puts "FT: #{filetype} VERSION: #{version}"
102
- reply = require @@filetypes_to_require[filetype]
103
- @@filetypes_to_require[filetype]
104
- parser_class = MS::Parser.const_get(@@filetypes_to_upcase[filetype])
105
- parser_class.new(parse_type, version, opts)
106
- end
107
-
108
- end
data/lib/ms/precursor.rb DELETED
@@ -1,25 +0,0 @@
1
- require 'arrayclass'
2
-
3
- module MS; end
4
-
5
- # charge_states are the possible charge states of the precursor
6
- # parent references a scan
7
- # 0 1 2 3
8
- MS::Precursor = Arrayclass.new(%w(mz intensity parent charge_states))
9
-
10
- class MS::Precursor
11
-
12
- undef :intensity
13
-
14
- def intensity
15
- if self[1].nil?
16
- if s = self[2].spectrum
17
- self[1] = s.intensity_at_mz(self[0])
18
- else
19
- nil # if we didn't read in the spectra, we can't get this value!
20
- end
21
- end
22
- self[1]
23
- end
24
-
25
- end
data/lib/ms/scan.rb DELETED
@@ -1,81 +0,0 @@
1
- require 'arrayclass'
2
- require 'ms/precursor'
3
-
4
- module MS ; end
5
-
6
- # 0 1 2 3 4 5 6
7
- MS::Scan = Arrayclass.new( %w(num ms_level time start_mz end_mz precursor spectrum) )
8
-
9
- # time in seconds
10
- # everything else in float/int
11
-
12
- class MS::Scan
13
- #@@order = %w(num ms_level time start_mz end_mz prec_mz prec_inten parent spectrum)
14
- #attr_accessor :num, :ms_level, :time, :start_mz, :end_mz, :prec_mz, :prec_inten, :parent, :spectrum
15
-
16
- #def initialize(ar=nil)
17
- # @@order.zip(ar) do |x,v|
18
- # send((x+'=').to_sym, v)
19
- # end
20
- #end
21
-
22
- def to_s
23
- "<Scan num=#{num} ms_level=#{ms_level} time=#{time}>"
24
- end
25
-
26
- undef_method :inspect
27
- def inspect
28
- atts = %w(num ms_level time start_mz end_mz)
29
- display = atts.map do |att|
30
- if val = send(att.to_sym)
31
- "#{att}=#{val}"
32
- else
33
- nil
34
- end
35
- end
36
- display.compact!
37
- spec_display =
38
- if spectrum
39
- spectrum.mzs.size
40
- else
41
- 'nil'
42
- end
43
- "<MS::Scan:#{__id__} " + display.join(", ") + " precursor=#{precursor.inspect}" + " spectrum(size)=#{spec_display}" + " >"
44
- end
45
-
46
- # returns the string (space delimited): "ms_level num time [prec_mz prec_inten]"
47
- def to_index_file_string
48
- arr = [ms_level, num, time]
49
- if precursor then arr << precursor.mz end
50
- if x = precursor.intensity then arr << x end
51
- arr.join(" ")
52
- end
53
-
54
- # adds the attribute parent to each scan with a parent
55
- # (level 1 = no parent; level 2 = prev level 1, etc.
56
- def self.add_parent_scan(scans)
57
- prev_scan = nil
58
- parent_stack = [nil]
59
- ## we want to set the level to be the first mslevel we come to
60
- prev_level = 1
61
- scans.each do |scan|
62
- if scan then prev_level = scan.ms_level; break; end
63
- end
64
- scans.each do |scan|
65
- next unless scan ## the first one is nil, (others?)
66
- level = scan.ms_level
67
- if prev_level < level
68
- parent_stack.unshift prev_scan
69
- end
70
- if prev_level > level
71
- (prev_level - level).times do parent_stack.shift end
72
- end
73
- scan.parent = parent_stack.first
74
- prev_level = level
75
- prev_scan = scan
76
- end
77
- end
78
-
79
- end
80
-
81
-
data/lib/mspire.rb DELETED
@@ -1,4 +0,0 @@
1
-
2
- module Mspire
3
- Version = '0.4.9'
4
- end