mspire 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
data/lib/spec/mzxml/parser.rb
DELETED
|
@@ -1,449 +0,0 @@
|
|
|
1
|
-
require 'spec/msrun'
|
|
2
|
-
|
|
3
|
-
begin
|
|
4
|
-
require 'xmlparser'
|
|
5
|
-
rescue LoadError
|
|
6
|
-
puts "*******************************************************************"
|
|
7
|
-
puts "WARNING: 'xmlparser' library not installed:"
|
|
8
|
-
puts "Defaulting to REXML (slower, but guaranteed to parse correct xml)"
|
|
9
|
-
puts "Use :parse_type => 'regex' for faster (but not guaranteed) parsing"
|
|
10
|
-
puts "Or install 'xmlparser'!"
|
|
11
|
-
puts "*******************************************************************"
|
|
12
|
-
end
|
|
13
|
-
begin
|
|
14
|
-
$XMLParserClass = XMLParser
|
|
15
|
-
rescue NameError
|
|
16
|
-
$XMLParserClass = Object
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
require 'spec/mzxml'
|
|
20
|
-
|
|
21
|
-
require 'rexml/document'
|
|
22
|
-
require 'rexml/streamlistener'
|
|
23
|
-
|
|
24
|
-
module Spec::MzXML::REXMLStreamListener; end
|
|
25
|
-
module Spec::MzXML::PrecMzByNum; end
|
|
26
|
-
|
|
27
|
-
# for REXML
|
|
28
|
-
class Spec::MzXML::REXMLStreamListener::PrecMzByNum
|
|
29
|
-
include REXML::StreamListener
|
|
30
|
-
|
|
31
|
-
attr_accessor :prec_mz
|
|
32
|
-
|
|
33
|
-
def initialize
|
|
34
|
-
@prec_mz = []
|
|
35
|
-
@scan_num = nil
|
|
36
|
-
@get_data = false
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
def tag_start(name,attrs)
|
|
40
|
-
if name == "scan"
|
|
41
|
-
@scan_num = attrs["num"].to_i
|
|
42
|
-
elsif name == "precursorMz"
|
|
43
|
-
@get_data = true
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
def tag_end(name)
|
|
48
|
-
if name == "precursorMz"
|
|
49
|
-
@get_data = false
|
|
50
|
-
end
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
def text(txt)
|
|
54
|
-
if @get_data
|
|
55
|
-
@prec_mz[@scan_num] = txt
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
module Spec::MzXML::XMLParser; end
|
|
62
|
-
|
|
63
|
-
class Spec::MzXML::XMLParser::PrecMzByNum < $XMLParserClass
|
|
64
|
-
@@scan_num = nil
|
|
65
|
-
@@get_data = false
|
|
66
|
-
|
|
67
|
-
attr_accessor :prec_mz
|
|
68
|
-
def initialize
|
|
69
|
-
@prec_mz = []
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
def startElement(name,attrs)
|
|
73
|
-
if name == "scan"
|
|
74
|
-
@@scan_num = attrs["num"].to_i
|
|
75
|
-
elsif name == "precursorMz"
|
|
76
|
-
@prec_mz[@@scan_num] = ""
|
|
77
|
-
@@get_data = true
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
def endElement(name)
|
|
82
|
-
if name == "precursorMz"
|
|
83
|
-
@@get_data = false
|
|
84
|
-
end
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
def character(data)
|
|
88
|
-
if @@get_data
|
|
89
|
-
@prec_mz[@@scan_num] << data
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
# Returns parallel arrays (times, spectra) where each spectra is an array
|
|
97
|
-
# containing alternating mz and intensity (MS1 scans only)
|
|
98
|
-
# and times are strings with the time in seconds
|
|
99
|
-
class Spec::MzXML::XMLParser::TimesAndSpectra < $XMLParserClass
|
|
100
|
-
include Spec::MzXML
|
|
101
|
-
@@get_data = false
|
|
102
|
-
@@get_peaks = false
|
|
103
|
-
@@precision = 32 # @TODO: set dynamic
|
|
104
|
-
|
|
105
|
-
attr_accessor :times, :spectra
|
|
106
|
-
def times_and_spectra
|
|
107
|
-
[@times, @spectra]
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
def initialize(ms_level=1)
|
|
111
|
-
@ms_level = "#{ms_level}"
|
|
112
|
-
@times = []
|
|
113
|
-
@spectra = []
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
def startElement(name,attrs)
|
|
117
|
-
if name == "scan" && attrs["msLevel"] == @ms_level
|
|
118
|
-
@times << attrs["retentionTime"][2...-1] # strip PT and S: "PTx.xxxxS"
|
|
119
|
-
@@get_peaks = true
|
|
120
|
-
elsif name == "peaks" && @@get_peaks
|
|
121
|
-
@@get_data = true
|
|
122
|
-
@data = ""
|
|
123
|
-
end
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
def character(data)
|
|
127
|
-
if @@get_data
|
|
128
|
-
@data << data
|
|
129
|
-
end
|
|
130
|
-
end
|
|
131
|
-
|
|
132
|
-
def endElement(name)
|
|
133
|
-
if name == "peaks" && @@get_peaks
|
|
134
|
-
@spectra << base64_peaks_to_array(@data, @@precision)
|
|
135
|
-
@@get_data = false
|
|
136
|
-
@@get_peaks = false
|
|
137
|
-
end
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
class Spec::MzXML::Regexp
|
|
144
|
-
@@scan_re = /<scan.*?num="(\d+)"(.*?)<\/scan>/mo
|
|
145
|
-
def self.precursor_mz_and_intensity_by_scan(file)
|
|
146
|
-
prec_re = /msLevel="2".*?<precursorMz precursorIntensity="([\d\.]+)".*?>([\d\.]+)<\/precursorMz>/mo
|
|
147
|
-
self.by_scan_num(file, prec_re) {|match_obj| match_obj.captures.reverse}
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
# (array will likely start at 1!)
|
|
151
|
-
def self.by_scan_num(file, regex)
|
|
152
|
-
arr = []
|
|
153
|
-
File.open(file) do |fh|
|
|
154
|
-
string = fh.read
|
|
155
|
-
matches = string.scan(@@scan_re)
|
|
156
|
-
matches.each do |matched|
|
|
157
|
-
if inner_match = regex.match(matched[1])
|
|
158
|
-
index = matched[0].to_i
|
|
159
|
-
arr[index] = yield(inner_match)
|
|
160
|
-
end
|
|
161
|
-
end
|
|
162
|
-
end
|
|
163
|
-
arr
|
|
164
|
-
end
|
|
165
|
-
|
|
166
|
-
# Returns array where array[scan_num] = precursorMz
|
|
167
|
-
# Parent scans are not arrayed
|
|
168
|
-
# Values are strings. Array index likely starts at 1!
|
|
169
|
-
# @TODO: replace the use of a yield block
|
|
170
|
-
def self.precursor_mz_by_scan(file)
|
|
171
|
-
prec_re = /msLevel="2".*?<precursorMz.*?>([\d\.]+)<\/precursorMz>/mo
|
|
172
|
-
self.by_scan_num(file, prec_re) {|match_obj| match_obj.captures[0]}
|
|
173
|
-
end
|
|
174
|
-
|
|
175
|
-
end
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
class Spec::MzXML::Parser
|
|
179
|
-
|
|
180
|
-
def default_parser
|
|
181
|
-
xmlparser = false
|
|
182
|
-
$".each do |lib|
|
|
183
|
-
if lib =~ /xmlparser/
|
|
184
|
-
xmlparser = true
|
|
185
|
-
end
|
|
186
|
-
end
|
|
187
|
-
if xmlparser
|
|
188
|
-
return "xmlparser"
|
|
189
|
-
else
|
|
190
|
-
return "rexml"
|
|
191
|
-
end
|
|
192
|
-
end
|
|
193
|
-
|
|
194
|
-
def initialize(file=nil, parse_type=:parse, *args)
|
|
195
|
-
if file
|
|
196
|
-
send(parse_type, file, *args)
|
|
197
|
-
end
|
|
198
|
-
end
|
|
199
|
-
|
|
200
|
-
# Parse into a complete object structure (REXML??)
|
|
201
|
-
def parse(file)
|
|
202
|
-
# @TODO: write complete parser
|
|
203
|
-
puts "need to write this guy!!!!"
|
|
204
|
-
exit
|
|
205
|
-
end
|
|
206
|
-
|
|
207
|
-
# returns: [times_arr, [m/z,inten,m/z,inten...]]
|
|
208
|
-
# where times are time strings (in seconds)
|
|
209
|
-
def times_and_spectra(file)
|
|
210
|
-
parser = Spec::MzXML::XMLParser::TimesAndSpectra.new
|
|
211
|
-
parser.parse(IO.read(file))
|
|
212
|
-
parser.times_and_spectra
|
|
213
|
-
end
|
|
214
|
-
|
|
215
|
-
# Returns an array of scans indexed by scan number
|
|
216
|
-
# NOTE that the first scan (zero indexed) will likely be nil!
|
|
217
|
-
# accepts an optional parse_type = 'xmlparser' | 'rexml'
|
|
218
|
-
def scans_by_num(mzXML_file, parse_type=nil)
|
|
219
|
-
unless parse_type
|
|
220
|
-
parse_type = default_parser
|
|
221
|
-
end
|
|
222
|
-
scans = []
|
|
223
|
-
case parse_type
|
|
224
|
-
when 'xmlparser'
|
|
225
|
-
parser = Spec::MzXML::XMLParser::TimeMzIntenIndexer.new
|
|
226
|
-
parser.parse(IO.read(mzXML_file))
|
|
227
|
-
scans = parser.scans_by_num
|
|
228
|
-
when 'rexml' # use REXML
|
|
229
|
-
# This is really too slow for files of this size
|
|
230
|
-
doc = REXML::Document.new File.new(mzXML_file)
|
|
231
|
-
doc.elements.each('msRun/scan') do |scan|
|
|
232
|
-
rt = scan.attributes['retentionTime'] ## like PT0.154000S"
|
|
233
|
-
level = scan.attributes['msLevel']
|
|
234
|
-
to_print = []
|
|
235
|
-
prec_mz = nil
|
|
236
|
-
prec_int = nil
|
|
237
|
-
if level.to_i != 1
|
|
238
|
-
scan.elements.each("precursorMz") do |prec|
|
|
239
|
-
prec_mz = prec.text.to_f
|
|
240
|
-
prec_int = prec.attributes["precursorIntensity"].to_f
|
|
241
|
-
end
|
|
242
|
-
end
|
|
243
|
-
# remove the leading PT and trailing S on the retention time!
|
|
244
|
-
rt = rt[2...-1]
|
|
245
|
-
|
|
246
|
-
num = scan.attributes['num'].to_i
|
|
247
|
-
scans[num] = Spec::Scan.new(num, scan.attributes['msLevel'].to_i, rt.to_f, prec_mz, prec_int)
|
|
248
|
-
end #doc.elements
|
|
249
|
-
else
|
|
250
|
-
throw ArgumentError, "invalid parse type: #{parse_type}"
|
|
251
|
-
end
|
|
252
|
-
## update the scans for parents
|
|
253
|
-
Spec::Scan.add_parent_scan(scans)
|
|
254
|
-
scans
|
|
255
|
-
end
|
|
256
|
-
|
|
257
|
-
# Returns a Hash indexed by filename (with no extension) for a given path
|
|
258
|
-
# extension = glob (string) or regex
|
|
259
|
-
# The basename is given as: file.split('.').first
|
|
260
|
-
def precursor_mz_by_scan_for_path(path, extension, parse_type=nil)
|
|
261
|
-
hash = {}
|
|
262
|
-
Dir.chdir path do
|
|
263
|
-
files = []
|
|
264
|
-
if extension.class == String
|
|
265
|
-
files = Dir[extension]
|
|
266
|
-
elsif extension.class == Regexp
|
|
267
|
-
files = Dir.entries(".").find_all do |dir|
|
|
268
|
-
dir =~ extension
|
|
269
|
-
end
|
|
270
|
-
else
|
|
271
|
-
puts "extension: #{extension} not a String or Regexp!"
|
|
272
|
-
end
|
|
273
|
-
files.each do |file|
|
|
274
|
-
base = file.split('.').first
|
|
275
|
-
hash[base] = precursor_mz_by_scan(file, parse_type)
|
|
276
|
-
end
|
|
277
|
-
end
|
|
278
|
-
hash
|
|
279
|
-
end
|
|
280
|
-
|
|
281
|
-
# Returns hash where hash[scan_num] = [precursorMz, precursorIntensity]
|
|
282
|
-
# Parent scans are not hashed
|
|
283
|
-
# Keys and values are both strings
|
|
284
|
-
def precursor_mz_and_inten_by_scan(file)
|
|
285
|
-
# in progress
|
|
286
|
-
end
|
|
287
|
-
|
|
288
|
-
def get_prec_mz_by_scan_for_time_index(file)
|
|
289
|
-
index = Spec::MSRunIndex.new(file)
|
|
290
|
-
prec_mz_by_scan = index.scans_by_num.collect do |scan|
|
|
291
|
-
if scan ; scan.prec_mz
|
|
292
|
-
else ; nil
|
|
293
|
-
end
|
|
294
|
-
end
|
|
295
|
-
prec_mz_by_scan
|
|
296
|
-
end
|
|
297
|
-
|
|
298
|
-
# Returns array where array[scan_num] = precursorMz
|
|
299
|
-
# Parent scans are not arrayed
|
|
300
|
-
# Values are strings. Array index likely starts at 1!
|
|
301
|
-
# parse_type = "regex" | "rexml" | "xmlparser"
|
|
302
|
-
# also takes a MSRunIndex file (terminates with '.timeIndex')
|
|
303
|
-
# also takes .RAW or .raw files and converts them to mzXML using
|
|
304
|
-
# Spec::MzXML::MZXML_CONVERTER
|
|
305
|
-
# also takes a file without an extension, in which case tests to see if the
|
|
306
|
-
# index file exists, then the .mzXML file, then .RAW/.raw (and converts)
|
|
307
|
-
def precursor_mz_by_scan(file, parse_type=nil)
|
|
308
|
-
# If given a time index file:
|
|
309
|
-
|
|
310
|
-
if File.exist?(file + '.timeIndex')
|
|
311
|
-
return get_prec_mz_by_scan_for_time_index(file + '.timeIndex')
|
|
312
|
-
elsif File.exist?(file + '.mzXML.timeIndex')
|
|
313
|
-
return get_prec_mz_by_scan_for_time_index(file + '.mzXML.timeIndex')
|
|
314
|
-
elsif file =~ /\.timeIndex$/
|
|
315
|
-
return get_prec_mz_by_scan_for_time_index(file)
|
|
316
|
-
end
|
|
317
|
-
|
|
318
|
-
file = Spec::MzXML.file_to_mzxml(file)
|
|
319
|
-
|
|
320
|
-
unless parse_type then parse_type = default_parser end
|
|
321
|
-
case parse_type
|
|
322
|
-
when "xmlparser"
|
|
323
|
-
##XMLParser:
|
|
324
|
-
parser = Spec::MzXML::XMLParser::PrecMzByNum.new
|
|
325
|
-
File.open(file) do |fh|
|
|
326
|
-
parser.parse(fh.read)
|
|
327
|
-
end
|
|
328
|
-
parser.prec_mz
|
|
329
|
-
when "regex"
|
|
330
|
-
Spec::MzXML::Regexp.precursor_mz_by_scan(file)
|
|
331
|
-
when "rexml"
|
|
332
|
-
listener = Spec::MzXML::REXMLStreamListener::PrecMzByNum.new
|
|
333
|
-
REXML::Document.parse_stream(File.new(file), listener)
|
|
334
|
-
listener.prec_mz
|
|
335
|
-
else
|
|
336
|
-
puts "Don't recognize parse_type: #{parse_type}"
|
|
337
|
-
end
|
|
338
|
-
end
|
|
339
|
-
|
|
340
|
-
# Returns a hash of basic info on an mzXML run:
|
|
341
|
-
# *mzXML_elemt* *hash keys (symbols)*
|
|
342
|
-
# scanCount scan_count
|
|
343
|
-
# startTime start_time
|
|
344
|
-
# endTime end_time
|
|
345
|
-
# startMz start_mz
|
|
346
|
-
# endMz end_mz
|
|
347
|
-
def basic_info(mzxml_file)
|
|
348
|
-
puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}" if $VERBOSE
|
|
349
|
-
hash = {}
|
|
350
|
-
scan_count_tmp = []
|
|
351
|
-
(1..5).to_a.each do |n| scan_count_tmp[n] = 0 end
|
|
352
|
-
@fh = File.open(mzxml_file)
|
|
353
|
-
@line = ""
|
|
354
|
-
scan_count_tmp[0] = _el("scanCount").to_i
|
|
355
|
-
hash[:start_time] = _el("startTime").sub(/^PT/, "").sub(/S$/,"").to_f
|
|
356
|
-
hash[:end_time] = _el("endTime").sub(/^PT/, "").sub(/S$/,"").to_f
|
|
357
|
-
hash[:ms_level] = _el("msLevel").to_i
|
|
358
|
-
scan_count_tmp[1] = 1
|
|
359
|
-
if hash[:ms_level] == 1
|
|
360
|
-
hash[:start_mz] = _el("startMz").to_f
|
|
361
|
-
hash[:end_mz] = _el("endMz").to_f
|
|
362
|
-
end
|
|
363
|
-
|
|
364
|
-
while !@fh.eof?
|
|
365
|
-
@line = @fh.readline
|
|
366
|
-
ms_level = _el("msLevel")
|
|
367
|
-
if ms_level
|
|
368
|
-
scan_count_tmp[ms_level.to_i] += 1
|
|
369
|
-
else
|
|
370
|
-
break
|
|
371
|
-
end
|
|
372
|
-
end
|
|
373
|
-
scan_count = []
|
|
374
|
-
scan_count_tmp.each do |cnt|
|
|
375
|
-
if cnt != 0
|
|
376
|
-
scan_count.push cnt
|
|
377
|
-
else
|
|
378
|
-
break
|
|
379
|
-
end
|
|
380
|
-
end
|
|
381
|
-
hash[:scan_count] = scan_count
|
|
382
|
-
@fh.close
|
|
383
|
-
hash
|
|
384
|
-
end
|
|
385
|
-
|
|
386
|
-
# returns [start_mz, end_mz] of the first full scan (ms_level == 1)
|
|
387
|
-
def start_and_end_mz(mzxml_file)
|
|
388
|
-
@fh = File.open(mzxml_file)
|
|
389
|
-
ms_level = 0
|
|
390
|
-
@line = ""
|
|
391
|
-
while ms_level != 1
|
|
392
|
-
ms_level = _el("msLevel").to_i
|
|
393
|
-
end
|
|
394
|
-
start_mz = _el("startMz").to_f
|
|
395
|
-
end_mz = _el("endMz").to_f
|
|
396
|
-
@fh.close
|
|
397
|
-
[start_mz, end_mz]
|
|
398
|
-
end
|
|
399
|
-
|
|
400
|
-
def _el(name)
|
|
401
|
-
re = /#{name}="(.*)"/
|
|
402
|
-
while @line !~ re && !@fh.eof?
|
|
403
|
-
@line = @fh.readline
|
|
404
|
-
end
|
|
405
|
-
if $1
|
|
406
|
-
return $1.dup
|
|
407
|
-
else
|
|
408
|
-
return nil
|
|
409
|
-
end
|
|
410
|
-
end
|
|
411
|
-
|
|
412
|
-
end
|
|
413
|
-
|
|
414
|
-
class Spec::MzXML::XMLParser::TimeMzIntenIndexer < XMLParser
|
|
415
|
-
|
|
416
|
-
@@scan_num = nil
|
|
417
|
-
@@get_data = false
|
|
418
|
-
|
|
419
|
-
attr_accessor :scans_by_num
|
|
420
|
-
def initialize
|
|
421
|
-
@current_scan = nil
|
|
422
|
-
@scans_by_num = []
|
|
423
|
-
end
|
|
424
|
-
|
|
425
|
-
def startElement(name,attrs)
|
|
426
|
-
if name == "scan"
|
|
427
|
-
num = attrs["num"].to_i
|
|
428
|
-
@current_scan = Spec::Scan.new(num, attrs["msLevel"].to_i, attrs["retentionTime"].gsub(/^PT/,'').gsub(/S$/,'').to_f)
|
|
429
|
-
scans_by_num[num] = @current_scan
|
|
430
|
-
elsif name == "precursorMz"
|
|
431
|
-
@current_scan.prec_inten = attrs["precursorIntensity"].to_f
|
|
432
|
-
@@get_data = true
|
|
433
|
-
end
|
|
434
|
-
end
|
|
435
|
-
|
|
436
|
-
def endElement(name)
|
|
437
|
-
if name == "precursorMz"
|
|
438
|
-
@@get_data = false
|
|
439
|
-
end
|
|
440
|
-
end
|
|
441
|
-
|
|
442
|
-
def character(data)
|
|
443
|
-
if @@get_data
|
|
444
|
-
@current_scan.prec_mz = data
|
|
445
|
-
end
|
|
446
|
-
end
|
|
447
|
-
|
|
448
|
-
end
|
|
449
|
-
|
data/lib/spec/scan.rb
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
module Spec; end
|
|
3
|
-
|
|
4
|
-
class Spec::Scan
|
|
5
|
-
|
|
6
|
-
attr_accessor :time, :ms_level, :num, :prec_mz, :prec_inten, :parent
|
|
7
|
-
def initialize(num=nil, ms_level=nil, time=nil, prec_mz=nil, prec_inten=nil, parent=nil)
|
|
8
|
-
@num = num
|
|
9
|
-
@ms_level = ms_level
|
|
10
|
-
@time = time
|
|
11
|
-
if prec_mz then @prec_mz = prec_mz end
|
|
12
|
-
if prec_inten then @prec_inten = prec_inten end
|
|
13
|
-
if parent then @parent = parent end
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
def to_s
|
|
17
|
-
"<Scan num=#{@num} ms_level=#{@ms_level} time=#{@time}>"
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
# returns the string (space delimited): "ms_level num time [prec_mz prec_inten]"
|
|
21
|
-
def to_index_file_string
|
|
22
|
-
arr = [@ms_level, @num, @time]
|
|
23
|
-
if prec_mz then arr << @prec_mz end
|
|
24
|
-
if prec_inten then arr << @prec_inten end
|
|
25
|
-
arr.join(" ")
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
# adds the attribute parent to each scan with a parent
|
|
29
|
-
# (level 1 = no parent; level 2 = prev level 1, etc.
|
|
30
|
-
def self.add_parent_scan(scans)
|
|
31
|
-
prev_scan = nil
|
|
32
|
-
parent_stack = [nil]
|
|
33
|
-
## we want to set the level to be the first mslevel we come to
|
|
34
|
-
prev_level = 1
|
|
35
|
-
scans.each do |scan|
|
|
36
|
-
if scan then prev_level = scan.ms_level; break; end
|
|
37
|
-
end
|
|
38
|
-
scans.each do |scan|
|
|
39
|
-
next unless scan ## the first one is nil, (others?)
|
|
40
|
-
level = scan.ms_level
|
|
41
|
-
if prev_level < level
|
|
42
|
-
parent_stack.unshift prev_scan
|
|
43
|
-
end
|
|
44
|
-
if prev_level > level
|
|
45
|
-
(prev_level - level).times do parent_stack.shift end
|
|
46
|
-
end
|
|
47
|
-
scan.parent = parent_stack.first
|
|
48
|
-
prev_level = level
|
|
49
|
-
prev_scan = scan
|
|
50
|
-
end
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
|