mspire 0.4.9 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,282 +0,0 @@
1
- require 'ms/msrun'
2
- require 'fileutils'
3
-
4
- module MS; end
5
-
6
- module MS::Parser::MzXML
7
- Base_dir_for_parsers = 'ms/parser/mzxml'
8
- # inherits XMLStyleParser and version
9
- include MS::Parser
10
- include XMLStyleParser
11
-
12
- # warning: clobbers file unless a newfilename is provided!
13
- # returns the output filename
14
- # will fix any size file!
15
- def self.fix_bad_scan_tags(filename, newfilename=nil)
16
-
17
- out_io =
18
- if newfilename
19
- File.open(newfilename, 'w')
20
- else
21
- Tempfile.new(File.basename(filename))
22
- end
23
- File.open(filename) do |fh|
24
- self.fix_bad_scan_tags_from_io(fh, out_io)
25
- end
26
- out_io.close
27
- unless newfilename
28
- FileUtils.mv out_io.path, filename
29
- end
30
- end
31
-
32
- # this is a memory efficient method to fix bad scan tags
33
- # prints cleaned up file to out_io
34
- # no effort is made to rewind the io objects, the user must do this if they
35
- # plan to continue using these objects!
36
- def self.fix_bad_scan_tags_from_io(io, out_io)
37
- regexp = /<\/scan>/
38
- end_scan_line = false
39
-
40
- io.each("\n") do |line|
41
- if end_scan_line && line =~ regexp
42
- # two end scan lines! # don't print to out_io
43
- end_scan_line = true
44
- elsif line =~ regexp
45
- out_io.print(line)
46
- end_scan_line = true
47
- else
48
- out_io.print(line)
49
- end_scan_line = false
50
- end
51
- end
52
- end
53
-
54
- # returns a string with double </scan></scan> tags into single and missing
55
- # </scan> tags after peaks added in
56
- # we do this in windows style since these are generated off a windows
57
- # machine only
58
- #def self.fix_bad_scan_tags(string)
59
- # string.gsub(/<\/scan>\s+<\/scan>/m, '</scan>').gsub(/<\/peaks>\s+<scan/m, "</peaks>\r\n </scan>\r\n <scan")
60
- #end
61
-
62
- # returns true if it has the bad tag
63
- def self.has_bad_scan_tag_from_string?(string)
64
- if string.match(/<\/scan>\s+<\/scan>/m)
65
- true
66
- else
67
- false
68
- end
69
- end
70
-
71
- def self.has_bad_scan_tag?(filename)
72
- File.open(filename) do |fh|
73
- self.has_bad_scan_tag_from_io?(fh)
74
- end
75
- end
76
-
77
- # very efficient algorithm to check for malformed xml typical of readw
78
- # output. The extra closing scan tags come after the last ms/ms scan in a
79
- # cycle rewinds the io after looking
80
- def self.has_bad_scan_tag_from_io?(io)
81
- seen_first_ms_level = false
82
- seen_higher_ms_level = false
83
- cur_ms_level = 0
84
- found_double_end_tag = false
85
- found_end_tag = false
86
- io.each("\n") do |line|
87
- if line =~ /<\/scan>/
88
- if found_end_tag # already found one!
89
- found_double_end_tag = true
90
- break
91
- end
92
- found_end_tag = true
93
- else
94
- found_end_tag = false
95
- end
96
-
97
- if line =~ /msLevel="(\d+)"/
98
- cur_ms_level = $1.dup
99
- if seen_first_ms_level && seen_higher_ms_level && cur_ms_level == '1'
100
- break
101
- end
102
- if cur_ms_level == '1'
103
- seen_first_ms_level = true
104
- elsif cur_ms_level == '2'
105
- seen_higher_ms_level = true
106
- end
107
- end
108
- end
109
- io.rewind
110
- found_double_end_tag
111
- end
112
-
113
- # returns a specific parser MS::Parser::MzXML::#{ParserType}
114
- # based on choose_parser from xml_style_parser
115
- def self.new(parse_type=:msrun, version='1.0', opts={})
116
- special_subclass =
117
- if opts[:lazy] == :io
118
- 'LazyPeaks'
119
- else ; nil
120
- end
121
- @version = version
122
- @method = parse_type
123
- XMLStyleParser.require_parse_files(Base_dir_for_parsers)
124
- parser_class = XMLStyleParser.choose_parser(self, parse_type, special_subclass)
125
- parser = parser_class.new(parse_type, version)
126
- end
127
-
128
- # Returns an array of scans indexed by scan number
129
- # NOTE that the first scan (zero indexed) will likely be nil!
130
- # accepts an optional parse_type = 'xmlparser' | 'rexml'
131
- def scans_by_num(mzXML_file, parse_type=nil)
132
- unless parse_type
133
- parse_type = default_parser
134
- end
135
- scans = []
136
- case parse_type
137
- when 'xmlparser'
138
- parser = MS::MzXML::XMLParser::TimeMzIntenIndexer.new
139
- parser.parse(IO.read(mzXML_file))
140
- scans = parser.scans_by_num
141
- when 'rexml' # use REXML
142
- # This is really too slow for files of this size
143
- doc = REXML::Document.new File.new(mzXML_file)
144
- doc.elements.each('msRun/scan') do |scan|
145
- rt = scan.attributes['retentionTime'] ## like PT0.154000S"
146
- level = scan.attributes['msLevel']
147
- to_print = []
148
- prec_mz = nil
149
- prec_int = nil
150
- if level.to_i != 1
151
- scan.elements.each("precursorMz") do |prec|
152
- prec_mz = prec.text.to_f
153
- prec_int = prec.attributes["precursorIntensity"].to_f
154
- end
155
- end
156
- # remove the leading PT and trailing S on the retention time!
157
- rt = rt[2...-1]
158
-
159
- num = scan.attributes['num'].to_i
160
- scans[num] = MS::Scan.new(num, scan.attributes['msLevel'].to_i, rt.to_f, prec_mz, prec_int)
161
- end #doc.elements
162
- else
163
- throw ArgumentError, "invalid parse type: #{parse_type}"
164
- end
165
- ## update the scans for parents
166
- MS::Scan.add_parent_scan(scans)
167
- scans
168
- end
169
-
170
- # Returns a Hash indexed by filename (with no extension) for a given path
171
- # extension = glob (string) or regex
172
- # The basename is given as: file.split('.').first
173
- def precursor_mz_by_scan_for_path(path, extension, parse_type=nil)
174
- hash = {}
175
- Dir.chdir path do
176
- files = []
177
- if extension.class == String
178
- files = Dir[extension]
179
- elsif extension.class == Regexp
180
- files = Dir.entries(".").find_all do |dir|
181
- dir =~ extension
182
- end
183
- else
184
- puts "extension: #{extension} not a String or Regexp!"
185
- end
186
- files.each do |file|
187
- base = file.split('.').first
188
- hash[base] = precursor_mz_by_scan(file, parse_type)
189
- end
190
- end
191
- hash
192
- end
193
-
194
- # Returns hash where hash[scan_num] = [precursorMz, precursorIntensity]
195
- # Parent scans are not hashed
196
- # Keys and values are both strings
197
- def precursor_mz_and_inten_by_scan(file)
198
- # in progress
199
- end
200
-
201
- # Returns array where array[scan_num] = precursorMz
202
- # precursorMz are Floats
203
- # Array index likely starts at 1!
204
- def precursor_mz_by_scan_num(file)
205
- ## THIS SHOULD BE CREATED IN specific XML LIBS
206
- end
207
-
208
- # Returns a hash of basic info on an mzXML run:
209
- # *mzXML_elemt* *hash keys (symbols)*
210
- # scanCount scan_count
211
- # startTime start_time
212
- # endTime end_time
213
- # startMz start_mz
214
- # endMz end_mz
215
- def basic_info(mzxml_file)
216
- puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}" if $VERBOSE
217
- hash = {}
218
- scan_count_tmp = []
219
- (1..5).to_a.each do |n| scan_count_tmp[n] = 0 end
220
- @fh = File.open(mzxml_file)
221
- @line = ""
222
- scan_count_tmp[0] = _el("scanCount").to_i
223
- hash[:start_time] = _el("startTime").sub(/^PT/, "").sub(/S$/,"").to_f
224
- hash[:end_time] = _el("endTime").sub(/^PT/, "").sub(/S$/,"").to_f
225
- hash[:ms_level] = _el("msLevel").to_i
226
- scan_count_tmp[1] = 1
227
- if hash[:ms_level] == 1
228
- hash[:start_mz] = _el("startMz").to_f
229
- hash[:end_mz] = _el("endMz").to_f
230
- end
231
-
232
- while !@fh.eof?
233
- @line = @fh.readline
234
- ms_level = _el("msLevel")
235
- if ms_level
236
- scan_count_tmp[ms_level.to_i] += 1
237
- else
238
- break
239
- end
240
- end
241
- scan_count = []
242
- scan_count_tmp.each do |cnt|
243
- if cnt != 0
244
- scan_count.push cnt
245
- else
246
- break
247
- end
248
- end
249
- hash[:scan_count] = scan_count
250
- @fh.close
251
- hash
252
- end
253
-
254
- # returns [start_mz, end_mz] of the first full scan (ms_level == 1)
255
- def start_and_end_mz(mzxml_file)
256
- @fh = File.open(mzxml_file)
257
- ms_level = 0
258
- @line = ""
259
- while ms_level != 1
260
- ms_level = _el("msLevel").to_i
261
- end
262
- start_mz = _el("startMz").to_f
263
- end_mz = _el("endMz").to_f
264
- @fh.close
265
- [start_mz, end_mz]
266
- end
267
-
268
- def _el(name)
269
- re = /#{name}="(.*)"/
270
- while @line !~ re && !@fh.eof?
271
- @line = @fh.readline
272
- end
273
- if $1
274
- return $1.dup
275
- else
276
- return nil
277
- end
278
- end
279
-
280
- end
281
-
282
-
data/lib/ms/parser.rb DELETED
@@ -1,108 +0,0 @@
1
- require 'xml_style_parser'
2
-
3
- module MS; end
4
-
5
- module MS::Parser
6
- # inherits attr_accessor :method, :default_parser, and parse (which should
7
- # be overridden)
8
- include XMLStyleParser
9
-
10
- Mzxml_regexp = /http:\/\/sashimi.sourceforge.net\/schema(_revision)?\/([\w\d_\.]+)/o
11
- # 'http://sashimi.sourceforge.net/schema/MsXML.xsd' # version 1
12
- # 'http://sashimi.sourceforge.net/schema_revision/mzXML_X.X' # others
13
- Mzdata_regexp = /<mzData.*version="([\d\.]+)"/m
14
-
15
- attr_accessor :version
16
-
17
- ############################################
18
- # POINTERS (to create META MAGIC)
19
- ############################################
20
-
21
- @@filetypes_to_upcase = {
22
- :mzxml => 'MzXML',
23
- :mzdata => 'MzData',
24
- :mzml => 'MzML',
25
- :raw => 'Raw',
26
- }
27
-
28
- @@filetypes_to_require = {}
29
- @@filetypes_to_constant = {}
30
-
31
- abbrevs = Dir.chdir(File.dirname(__FILE__) + "/parser") do
32
- Dir["*.rb"].map {|f| f.sub(/\.rb$/,'') }
33
- end
34
- abbrevs.each do |abbr|
35
- abb = abbr.to_sym
36
- req = ['ms', 'parser', abbr].join("/")
37
- @@filetypes_to_require[abb] = req
38
- @@filetypes_to_constant[abb] = ['MS', 'Parser', @@filetypes_to_upcase[abb]].join("::")
39
- end
40
-
41
- ############################################
42
- # END POINTERS
43
- ############################################
44
-
45
- # finds the filetype of a file (expects to be at the beginning) and rewinds
46
- # the filehandle to the beginning returns [filetype, version]. nil if
47
- # filetype and version could not be determined
48
- def self.filetype_and_version(fh_or_filename)
49
- if fh_or_filename.is_a? IO
50
- fh = fh_or_filename
51
- found = nil
52
- # Test for RAW file:
53
- header = fh.read(18).unpack('@2axaxaxaxaxaxaxa').join
54
- if header == 'Finnigan'
55
- return [:raw, nil]
56
- end
57
- fh.rewind
58
- while (line = fh.gets)
59
- found =
60
- case line
61
- when Mzxml_regexp
62
- mtch = $2.dup
63
- case mtch
64
- when /mzXML_([\d\.]+)/
65
- [:mzxml, $1.dup]
66
- when /MsXML/
67
- [:mzxml, '1.0']
68
- else
69
- abort "Cannot determine mzXML version!"
70
- end
71
- when Mzdata_regexp
72
- [:mzdata, $1.dup]
73
- end
74
- if found
75
- break
76
- end
77
- end
78
- fh.rewind
79
- found
80
- else
81
- File.open(fh_or_filename) do |fh|
82
- filetype_and_version(fh)
83
- end
84
- end
85
- end
86
-
87
- # filetype_version is an example file to parse, or it is an array: [type, version].
88
- # parse_type is the information to be gleaned (as symbol).
89
- def self.new(filetype_version, parse_type, opts={})
90
- unless filetype_version.is_a? Array
91
- filetype_version = filetype_and_version(filetype_version)
92
- end
93
- require_and_create_parser(filetype_version, parse_type, opts)
94
- end
95
-
96
- private
97
-
98
- # returns a working parser.
99
- def self.require_and_create_parser(filetype_version, parse_type, opts)
100
- (filetype, version) = filetype_version
101
- #puts "FT: #{filetype} VERSION: #{version}"
102
- reply = require @@filetypes_to_require[filetype]
103
- @@filetypes_to_require[filetype]
104
- parser_class = MS::Parser.const_get(@@filetypes_to_upcase[filetype])
105
- parser_class.new(parse_type, version, opts)
106
- end
107
-
108
- end
data/lib/ms/precursor.rb DELETED
@@ -1,25 +0,0 @@
1
- require 'arrayclass'
2
-
3
- module MS; end
4
-
5
- # charge_states are the possible charge states of the precursor
6
- # parent references a scan
7
- # 0 1 2 3
8
- MS::Precursor = Arrayclass.new(%w(mz intensity parent charge_states))
9
-
10
- class MS::Precursor
11
-
12
- undef :intensity
13
-
14
- def intensity
15
- if self[1].nil?
16
- if s = self[2].spectrum
17
- self[1] = s.intensity_at_mz(self[0])
18
- else
19
- nil # if we didn't read in the spectra, we can't get this value!
20
- end
21
- end
22
- self[1]
23
- end
24
-
25
- end
data/lib/ms/scan.rb DELETED
@@ -1,81 +0,0 @@
1
- require 'arrayclass'
2
- require 'ms/precursor'
3
-
4
- module MS ; end
5
-
6
- # 0 1 2 3 4 5 6
7
- MS::Scan = Arrayclass.new( %w(num ms_level time start_mz end_mz precursor spectrum) )
8
-
9
- # time in seconds
10
- # everything else in float/int
11
-
12
- class MS::Scan
13
- #@@order = %w(num ms_level time start_mz end_mz prec_mz prec_inten parent spectrum)
14
- #attr_accessor :num, :ms_level, :time, :start_mz, :end_mz, :prec_mz, :prec_inten, :parent, :spectrum
15
-
16
- #def initialize(ar=nil)
17
- # @@order.zip(ar) do |x,v|
18
- # send((x+'=').to_sym, v)
19
- # end
20
- #end
21
-
22
- def to_s
23
- "<Scan num=#{num} ms_level=#{ms_level} time=#{time}>"
24
- end
25
-
26
- undef_method :inspect
27
- def inspect
28
- atts = %w(num ms_level time start_mz end_mz)
29
- display = atts.map do |att|
30
- if val = send(att.to_sym)
31
- "#{att}=#{val}"
32
- else
33
- nil
34
- end
35
- end
36
- display.compact!
37
- spec_display =
38
- if spectrum
39
- spectrum.mzs.size
40
- else
41
- 'nil'
42
- end
43
- "<MS::Scan:#{__id__} " + display.join(", ") + " precursor=#{precursor.inspect}" + " spectrum(size)=#{spec_display}" + " >"
44
- end
45
-
46
- # returns the string (space delimited): "ms_level num time [prec_mz prec_inten]"
47
- def to_index_file_string
48
- arr = [ms_level, num, time]
49
- if precursor then arr << precursor.mz end
50
- if x = precursor.intensity then arr << x end
51
- arr.join(" ")
52
- end
53
-
54
- # adds the attribute parent to each scan with a parent
55
- # (level 1 = no parent; level 2 = prev level 1, etc.
56
- def self.add_parent_scan(scans)
57
- prev_scan = nil
58
- parent_stack = [nil]
59
- ## we want to set the level to be the first mslevel we come to
60
- prev_level = 1
61
- scans.each do |scan|
62
- if scan then prev_level = scan.ms_level; break; end
63
- end
64
- scans.each do |scan|
65
- next unless scan ## the first one is nil, (others?)
66
- level = scan.ms_level
67
- if prev_level < level
68
- parent_stack.unshift prev_scan
69
- end
70
- if prev_level > level
71
- (prev_level - level).times do parent_stack.shift end
72
- end
73
- scan.parent = parent_stack.first
74
- prev_level = level
75
- prev_scan = scan
76
- end
77
- end
78
-
79
- end
80
-
81
-
data/lib/mspire.rb DELETED
@@ -1,4 +0,0 @@
1
-
2
- module Mspire
3
- Version = '0.4.9'
4
- end