mspire 0.4.9 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
|
@@ -1,253 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
require 'xml_style_parser'
|
|
3
|
-
require 'ms/spectrum'
|
|
4
|
-
require 'ms/scan'
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class MS::Parser::MzXML::Hpricot
|
|
8
|
-
include XMLStyleParser
|
|
9
|
-
include MS::Parser::MzXML
|
|
10
|
-
|
|
11
|
-
@@scan_atts = %w(num msLevel retentionTime startMz endMz precursor spectrum)
|
|
12
|
-
|
|
13
|
-
def initialize(parse_type=:msrun, version='1.0')
|
|
14
|
-
@method = parse_type
|
|
15
|
-
@version = version
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
def new_scan_from_hash(node)
|
|
19
|
-
scan = MS::Scan.new # array class creates one with 9 positions
|
|
20
|
-
scan[0] = node['num'].to_i
|
|
21
|
-
scan[1] = node['msLevel'].to_i
|
|
22
|
-
scan[2] = node['retentionTime'][2...-1].to_f
|
|
23
|
-
if x = node['startMz']
|
|
24
|
-
scan[3] = x.to_f
|
|
25
|
-
scan[4] = node['endMz'].to_f
|
|
26
|
-
end
|
|
27
|
-
scan
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
# takes a scan node and creates a scan object
|
|
31
|
-
# the parent scan is the one directly above it in mslevel
|
|
32
|
-
# if the
|
|
33
|
-
def create_scan(scan_n, scans_by_num, get_spectra=true)
|
|
34
|
-
if @version < '3.0'
|
|
35
|
-
scan = new_scan_from_hash(scan_n)
|
|
36
|
-
precs = []
|
|
37
|
-
scan_n.each_child do |node|
|
|
38
|
-
case node.name
|
|
39
|
-
when 'precursorMz'
|
|
40
|
-
# should be able to do this!!!
|
|
41
|
-
#scan[5] = scan_n.find('child::precursorMz').map do |prec_n|
|
|
42
|
-
prec = MS::Precursor.new
|
|
43
|
-
prec[1] = node['precursorIntensity'].to_f
|
|
44
|
-
prec[0] = node.content.to_f
|
|
45
|
-
if x = node['precursorScanNum']
|
|
46
|
-
prec[2] = scans_by_num[x.to_i]
|
|
47
|
-
end
|
|
48
|
-
precs << prec
|
|
49
|
-
when 'peaks'
|
|
50
|
-
next unless get_spectra
|
|
51
|
-
# SHOULD be able to do this!!
|
|
52
|
-
#peaks_n = scan_n.find_first('child::peaks')
|
|
53
|
-
scan[6] = MS::Spectrum.from_base64_peaks(node.content, node['precision'].to_i)
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
scan[5] = precs
|
|
57
|
-
scan
|
|
58
|
-
else # for version > 3.0
|
|
59
|
-
abort 'not supporting version 3.0 just yet'
|
|
60
|
-
# note that mzXML version 3.0 *can* have more than one peak...
|
|
61
|
-
# I'm not sure how to deal with that since I have one spectrum/scan
|
|
62
|
-
end
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
# returns an array of msrun objects
|
|
67
|
-
def msruns(file)
|
|
68
|
-
raise NotImplementedError
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
# returns a string with double </scan></scan> tags into single and missing
|
|
72
|
-
# </scan> tags after peaks added in
|
|
73
|
-
# we do this in windows style since these are generated off a windows
|
|
74
|
-
# machine only
|
|
75
|
-
def fix_bad_scan_tags(file)
|
|
76
|
-
IO.read(file).gsub(/<\/scan>\s+<\/scan>/m, '</scan>').gsub(/<\/peaks>\s+<scan/m, "</peaks>\r\n </scan>\r\n <scan")
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
# right now cannot parse multiple runs out of an mzXML version 2 file since
|
|
80
|
-
# this is built around a single run per file
|
|
81
|
-
# OPTIONS:
|
|
82
|
-
# :msrun => MSRun # use this object instead of creating one
|
|
83
|
-
# :spectra => *true|false # if false don't get spectra
|
|
84
|
-
def msrun(file, opts={})
|
|
85
|
-
unless opts.key?(:spectra)
|
|
86
|
-
opts[:spectra] = true
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
msrun_obj =
|
|
90
|
-
if x = opts[:msrun]
|
|
91
|
-
msrun_obj = x
|
|
92
|
-
else
|
|
93
|
-
MS::MSRun.new
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
doc = File.open(file) {|fh| ::Hpricot.XML(fh) }
|
|
97
|
-
#if @version == '2.0'
|
|
98
|
-
# # may not be necessary in hpricot!
|
|
99
|
-
# #string = fix_bad_scan_tags(file)
|
|
100
|
-
# #XML::Parser.string(string).parse
|
|
101
|
-
#else
|
|
102
|
-
# XML::Document.file(file)
|
|
103
|
-
#end
|
|
104
|
-
msrun_n = doc.at('msRun')
|
|
105
|
-
|
|
106
|
-
## HEADER
|
|
107
|
-
scan_count = msrun_n['scanCount'].to_i
|
|
108
|
-
msrun_obj.scan_count = scan_count
|
|
109
|
-
scans_by_num = Array.new(scan_count + 1)
|
|
110
|
-
|
|
111
|
-
## SPECTRUM
|
|
112
|
-
parent = nil
|
|
113
|
-
scans = Array.new( scan_count )
|
|
114
|
-
scn_index = 0
|
|
115
|
-
|
|
116
|
-
# we should be able to do this, but it's not working!!!
|
|
117
|
-
#scan_n = msrun_n.find_first('scan')
|
|
118
|
-
#while (scn_index < scan_count)
|
|
119
|
-
get_spectra = opts[:spectra]
|
|
120
|
-
|
|
121
|
-
msrun_n.each_child do |scan_n|
|
|
122
|
-
p scan_n
|
|
123
|
-
abort
|
|
124
|
-
|
|
125
|
-
next unless scan_n.name == 'scan'
|
|
126
|
-
scan = create_scan(scan_n, scans_by_num, get_spectra)
|
|
127
|
-
scans[scn_index] = scan
|
|
128
|
-
sc = scan_n.next
|
|
129
|
-
scans_by_num[scan[0]] = scan
|
|
130
|
-
scn_index += 1
|
|
131
|
-
end
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
## update the scan's parents
|
|
135
|
-
MS::MSRun.add_parent_scan(scans)
|
|
136
|
-
|
|
137
|
-
# note that startTime and endTime are optional AND in >2.2 are dateTime
|
|
138
|
-
# instead of duration types!, so we will just use scan times...
|
|
139
|
-
# Also, note that startTime and endTime are BROKEN on readw -> mzXML 2.0
|
|
140
|
-
# export. They give the start and end time in seconds, but they are
|
|
141
|
-
# really minutes. All the more reason to use the first and last scans!
|
|
142
|
-
msrun_obj.start_time = scans.first.time
|
|
143
|
-
msrun_obj.end_time = scans.last.time
|
|
144
|
-
|
|
145
|
-
msrun_obj.scans = scans
|
|
146
|
-
end
|
|
147
|
-
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
=begin
|
|
153
|
-
## THIS IS THE SAX PARSER VERSION. IT NEEDS A BIT OF BRUSH UP AND IT WOULD
|
|
154
|
-
## WORK. I THINK THE default guy is probably faster
|
|
155
|
-
|
|
156
|
-
def msrun(file, msrun_obj)
|
|
157
|
-
# Figure out where the first scan is at in the file:
|
|
158
|
-
pos_after_first_scan = nil
|
|
159
|
-
File.open(file) do |fh|
|
|
160
|
-
fh.each do |line|
|
|
161
|
-
if line =~ /<scan/
|
|
162
|
-
pos_after_first_scan = fh.pos
|
|
163
|
-
end
|
|
164
|
-
end
|
|
165
|
-
end
|
|
166
|
-
|
|
167
|
-
# Get only the header:
|
|
168
|
-
header_string = IO.read(file, pos_after_first_scan)
|
|
169
|
-
|
|
170
|
-
@msrun_obj = msrun_obj
|
|
171
|
-
# Parse out the header info:
|
|
172
|
-
parser = XML::SaxParser.new
|
|
173
|
-
parser.string = header_string
|
|
174
|
-
parser.on_start_element do |name, attrs|
|
|
175
|
-
if name == 'msRun'
|
|
176
|
-
@msrun_obj.scan_count = attrs['scanCount'].to_i
|
|
177
|
-
@msrun_obj.start_time = attrs['startTime'][2...-1].to_f
|
|
178
|
-
@msrun_obj.end_time = attrs['endTime'][2...-1].to_f
|
|
179
|
-
end
|
|
180
|
-
end
|
|
181
|
-
parser.parse
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
# Parse the scans out:
|
|
185
|
-
scan_st = 'scan'
|
|
186
|
-
prec_st = 'precursorMz'
|
|
187
|
-
peaks_st = 'peaks'
|
|
188
|
-
prec_inten_st = 'precursorIntensity'
|
|
189
|
-
precision_st = 'precision'
|
|
190
|
-
|
|
191
|
-
#parser = MS::Parser::MzXML::Hpricot::SaxParser::MSRun.new
|
|
192
|
-
parser = XML::SaxParser.new
|
|
193
|
-
parser.filename = file
|
|
194
|
-
parser.on_start_document do
|
|
195
|
-
@scans = []
|
|
196
|
-
@current_scan = nil
|
|
197
|
-
@get_peaks = false
|
|
198
|
-
@get_prec_mz = false
|
|
199
|
-
end
|
|
200
|
-
|
|
201
|
-
parser.on_characters do |chars|
|
|
202
|
-
if @get_peaks
|
|
203
|
-
@get_peaks << chars
|
|
204
|
-
elsif @get_prec_mz
|
|
205
|
-
@get_prec_mz << chars
|
|
206
|
-
end
|
|
207
|
-
end
|
|
208
|
-
|
|
209
|
-
parser.on_end_element do |el|
|
|
210
|
-
case el
|
|
211
|
-
when 'peaks'
|
|
212
|
-
@current_scan.spectrum = Spectrum.from_base64_peaks(@get_peaks, @precision, true)
|
|
213
|
-
@get_peaks = false
|
|
214
|
-
when 'precursorMz'
|
|
215
|
-
@current_scan[5] = [Precursor.new([@get_prec_mz.to_f])]
|
|
216
|
-
@get_prec_mz = false
|
|
217
|
-
end
|
|
218
|
-
end
|
|
219
|
-
|
|
220
|
-
parser.on_start_element do |name, attr_hash|
|
|
221
|
-
case name
|
|
222
|
-
when scan_st
|
|
223
|
-
@current_scan = new_scan_from_hash(attr_hash)
|
|
224
|
-
sz = @scans.size
|
|
225
|
-
@scans << @current_scan
|
|
226
|
-
when prec_st
|
|
227
|
-
@current_scan[5].first[1] = attr_hash[prec_inten_st].to_f
|
|
228
|
-
@get_prec_mz = ''
|
|
229
|
-
when peaks_st
|
|
230
|
-
@precision = attr_hash[precision_st].to_i
|
|
231
|
-
case @version[0,1].to_ip
|
|
232
|
-
when 3
|
|
233
|
-
if ch['pairOrder'] != 'm/z-int' # only version 3.0 has others
|
|
234
|
-
abort "cannot yet read anything but 'm/z-int' pair order"
|
|
235
|
-
end
|
|
236
|
-
end
|
|
237
|
-
@get_peaks = ''
|
|
238
|
-
end
|
|
239
|
-
end
|
|
240
|
-
parser.parse
|
|
241
|
-
|
|
242
|
-
@msrun_obj.scans = @scans
|
|
243
|
-
@msrun_obj.scans.each_with_index do |sc,i|
|
|
244
|
-
if sc.spectrum.mz == nil
|
|
245
|
-
abort "INDEX: #{i}"
|
|
246
|
-
end
|
|
247
|
-
end
|
|
248
|
-
@msrun_obj
|
|
249
|
-
end
|
|
250
|
-
=end
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
require 'ms/parser/mzxml/dom'
|
|
3
|
-
|
|
4
|
-
class MS::Parser::MzXML::LibXML < MS::Parser::MzXML::DOM
|
|
5
|
-
def get_root_node_from_string(string)
|
|
6
|
-
XML::Parser.string(string).parse.root
|
|
7
|
-
end
|
|
8
|
-
def get_root_node_from_file(file)
|
|
9
|
-
XML::Parser.filename(file).parse.root
|
|
10
|
-
end
|
|
11
|
-
def get_root_node_from_io(io)
|
|
12
|
-
XML::Parser.io(io).parse.root
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
require 'strscan'
|
|
2
|
-
|
|
3
|
-
module MS::Parser::MzXML ; end
|
|
4
|
-
|
|
5
|
-
class MS::Parser::MzXML::Regexp
|
|
6
|
-
@@first_scan_regexp = /<scan /o
|
|
7
|
-
include MS::Parser::MzXML
|
|
8
|
-
|
|
9
|
-
def initialize(method=:msrun, version='1.0')
|
|
10
|
-
@method = method
|
|
11
|
-
@version = version
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def parse(file)
|
|
15
|
-
send(@method, file)
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
# returns a MS::MsRun Object
|
|
19
|
-
def msrun(file)
|
|
20
|
-
fh = File.open(file)
|
|
21
|
-
get_header(fh)
|
|
22
|
-
|
|
23
|
-
fh.close
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
#def msrun(file, opts={})
|
|
27
|
-
#end
|
|
28
|
-
|
|
29
|
-
@@scan_re = /<scan.*?num="(\d+)"(.*?)<\/scan>/mo
|
|
30
|
-
def self.precursor_mz_and_intensity_by_scan(file)
|
|
31
|
-
prec_re = /msLevel="2".*?<precursorMz precursorIntensity="([\d\.]+)".*?>([\d\.]+)<\/precursorMz>/mo
|
|
32
|
-
self.by_scan_num(file, prec_re) {|match_obj| match_obj.captures.reverse}
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# (array will likely start at 1!)
|
|
36
|
-
def self.by_scan_num(file, regex)
|
|
37
|
-
arr = []
|
|
38
|
-
File.open(file) do |fh|
|
|
39
|
-
string = fh.read
|
|
40
|
-
matches = string.scan(@@scan_re)
|
|
41
|
-
matches.each do |matched|
|
|
42
|
-
if inner_match = regex.match(matched[1])
|
|
43
|
-
index = matched[0].to_i
|
|
44
|
-
arr[index] = yield(inner_match)
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
|
-
arr
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# Returns array where array[scan_num] = precursorMz
|
|
52
|
-
# Parent scans armme not arrayed
|
|
53
|
-
# Values are strings. Array index likely starts at 1!
|
|
54
|
-
# @TODO: replace the use of a yield block
|
|
55
|
-
def self.precursor_mz_by_scan(file)
|
|
56
|
-
prec_re = /msLevel="2".*?<precursorMz.*?>([\d\.]+)<\/precursorMz>/mo
|
|
57
|
-
self.by_scan_num(file, prec_re) {|match_obj| match_obj.captures[0]}
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
class MS::Parser::MzXML::Regexp::MsRun
|
|
64
|
-
@@scan_count_regexp = /scanCount="(\d+)"/o
|
|
65
|
-
@@start_time_regexp = /startTime="PT([\d\.]+)S"/o
|
|
66
|
-
@@end_time_regexp = /endTime="PT([\d\.]+)S"/o
|
|
67
|
-
@@first_scan_regexp = /<scan /
|
|
68
|
-
|
|
69
|
-
def initialize(version='1.0')
|
|
70
|
-
@version = version
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
def parse(io, msrun_object)
|
|
74
|
-
atts = {}
|
|
75
|
-
[:scan_count, :start_time, :end_time].zip(get_header_info(io)) {|v,k| atts[k] = v }
|
|
76
|
-
###
|
|
77
|
-
# HERE <------------------------------------
|
|
78
|
-
abort "NEED TO FINISH WRITING SCANS EXTRACTOR!"
|
|
79
|
-
get_scans(io)
|
|
80
|
-
# HERE <------------------------------------
|
|
81
|
-
|
|
82
|
-
# set the attributes
|
|
83
|
-
atts.each do |k,v|
|
|
84
|
-
msrun_object.send(k,v)
|
|
85
|
-
end
|
|
86
|
-
# need to fill in the scan_counts array
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
# assumes the attributes are each on a line
|
|
90
|
-
def get_scans(io)
|
|
91
|
-
io.each do |line|
|
|
92
|
-
end
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
# returns [total_num_scans, start_time, end_time] and positions the handle
|
|
96
|
-
# so that the next 'gets' will call a scan
|
|
97
|
-
def get_header_info(io)
|
|
98
|
-
scan_count = nil
|
|
99
|
-
start_time = nil
|
|
100
|
-
end_time = nil
|
|
101
|
-
|
|
102
|
-
previous_position = nil
|
|
103
|
-
io.each do |line|
|
|
104
|
-
if line =~ @@scan_count_regexp
|
|
105
|
-
scan_count = $1.dup
|
|
106
|
-
end
|
|
107
|
-
if line =~ @@start_time_regexp
|
|
108
|
-
start_time = $1.dup
|
|
109
|
-
end
|
|
110
|
-
if line =~ @@end_time_regexp
|
|
111
|
-
end_time = $1.dup
|
|
112
|
-
end
|
|
113
|
-
if line =~ @@first_scan_regexp
|
|
114
|
-
io.pos = previous_position
|
|
115
|
-
break
|
|
116
|
-
end
|
|
117
|
-
previous_position = io.pos
|
|
118
|
-
end
|
|
119
|
-
[scan_count, start_time, end_time]
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
end
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
require 'rexml/document'
|
|
2
|
-
require 'rexml/streamlistener'
|
|
3
|
-
|
|
4
|
-
module MS::Parser::MzXML::REXMLStreamListener; end
|
|
5
|
-
class MS::Parser::MzXML::REXMLStreamListener::PrecMzByNum; end
|
|
6
|
-
|
|
7
|
-
module REXMLStreamListenerHelper
|
|
8
|
-
def parse_and_report(file, const, report_method=:report)
|
|
9
|
-
listener = self.const_get(const).new
|
|
10
|
-
File.open(file) do |fh|
|
|
11
|
-
REXML::Document.parse_stream(fh, listener)
|
|
12
|
-
end
|
|
13
|
-
listener.send(report_method)
|
|
14
|
-
end
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
class MS::Parser::MzXML::REXML
|
|
18
|
-
include MS::Parser::MzXML
|
|
19
|
-
|
|
20
|
-
def initialize(version='1.0', method=:msrun)
|
|
21
|
-
@version = version
|
|
22
|
-
@method = parse_type
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
# returns an array indexed by scan_num that gives the precursor_mz
|
|
26
|
-
def precursor_mz_by_scan(file, opts={})
|
|
27
|
-
parse_and_report(file, PrecMzByNum)
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
# for REXML
|
|
36
|
-
class MS::Parser::MzXML::REXML::PrecMzByNum
|
|
37
|
-
include REXML::StreamListener
|
|
38
|
-
|
|
39
|
-
attr_accessor :prec_mz
|
|
40
|
-
alias_method :report, :prec_mz
|
|
41
|
-
|
|
42
|
-
def initialize
|
|
43
|
-
@prec_mz = []
|
|
44
|
-
@scan_num = nil
|
|
45
|
-
@get_data = false
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
def tag_start(name,attrs)
|
|
49
|
-
if name == "scan"
|
|
50
|
-
@scan_num = attrs["num"].to_i
|
|
51
|
-
elsif name == "precursorMz"
|
|
52
|
-
@get_data = true
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
def tag_end(name)
|
|
57
|
-
if name == "precursorMz"
|
|
58
|
-
@get_data = false
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
def text(txt)
|
|
63
|
-
if @get_data
|
|
64
|
-
@prec_mz[@scan_num] = txt
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
@@ -1,248 +0,0 @@
|
|
|
1
|
-
require 'xmlparser_wrapper'
|
|
2
|
-
|
|
3
|
-
# this is the wrapper class
|
|
4
|
-
class MS::Parser::MzXML::XMLParser
|
|
5
|
-
include XMLStyleParser
|
|
6
|
-
include MS::Parser::MzXML
|
|
7
|
-
include XMLParserWrapper
|
|
8
|
-
|
|
9
|
-
def initialize(parse_type=:msrun, version='1.0')
|
|
10
|
-
@method = parse_type
|
|
11
|
-
@version = version
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
# returns: [times_arr, [m/z,inten,m/z,inten...]]
|
|
15
|
-
# where times are time strings (in seconds)
|
|
16
|
-
def times_and_spectra(file, opts={})
|
|
17
|
-
parse_and_report(file, 'TimesAndSpectra')
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
## IN PROGRESS ...
|
|
22
|
-
# opts is actually the msrun object that will be fleshed out in the parsing
|
|
23
|
-
def msrun(file, opts={})
|
|
24
|
-
p opts
|
|
25
|
-
fh = File.open(file)
|
|
26
|
-
reply = parse_and_report_io(fh, 'MsRunHeader')
|
|
27
|
-
p reply
|
|
28
|
-
abort
|
|
29
|
-
fh.close
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
def prec_mz_by_scan_num(file, opts={})
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# could easily do this for all these guys
|
|
36
|
-
#def method_missing(*args)
|
|
37
|
-
# method = args.shift
|
|
38
|
-
# parse_and_report(
|
|
39
|
-
#end
|
|
40
|
-
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
class MS::Parser::MzXML::XMLParser::MsRunHeader < XMLParser
|
|
44
|
-
def initialize(version='1.0')
|
|
45
|
-
@version = version
|
|
46
|
-
@atts = []
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def startElement(name,attrs)
|
|
50
|
-
case name
|
|
51
|
-
when 'msRun'
|
|
52
|
-
@atts = attrs.values_at(%w(scanCount startTime endTime))
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
def endElement(name)
|
|
57
|
-
if name == 'dataProcessing'
|
|
58
|
-
done
|
|
59
|
-
reset
|
|
60
|
-
end
|
|
61
|
-
end
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
class MS::Parser::MzXML::XMLParser::Spectrum < XMLParser
|
|
65
|
-
@@scan_atts = %w(num msLevel retentionTime startMz endMz)
|
|
66
|
-
@@precursor_mz_atts = %w(precursorIntensity)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def initialize(version='1.0')
|
|
70
|
-
@version = version
|
|
71
|
-
@spectrum = []
|
|
72
|
-
@current_scan = nil
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
def report
|
|
76
|
-
@spectrum
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
def startElement(name,attrs)
|
|
80
|
-
if name == 'scan'
|
|
81
|
-
vals = attrs.values_at(@@scan_atts)
|
|
82
|
-
vals[2] = vals[2][2...-1].to_f #remove PT and trailing S
|
|
83
|
-
[0, 1].each do |i| vals[i] = vals[i].to_i end # num and ms_level
|
|
84
|
-
[3, 4].each do |i| vals[i] = vals[i].to_f end # start_mz and end_mz
|
|
85
|
-
@current_scan = MS::Scan.new(vals)
|
|
86
|
-
elsif name == 'precursorMz'
|
|
87
|
-
# 5, 6, 7 are the scans indices for prec_mz prec_inten and parent
|
|
88
|
-
@current_scan[6] = attrs['precursorIntensity'].to_f
|
|
89
|
-
@current_scan[5] = ''
|
|
90
|
-
@get_precursor_mz = true
|
|
91
|
-
elsif name == 'peaks'
|
|
92
|
-
@precision = attrs['precision'].to_i
|
|
93
|
-
@get_peaks = true
|
|
94
|
-
@current_peaks_string = ''
|
|
95
|
-
end
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
def endElement(name)
|
|
99
|
-
if name == 'peaks'
|
|
100
|
-
@get_peaks = false
|
|
101
|
-
@spectrum << Spectrum.new(@current_peaks_string, @precision)
|
|
102
|
-
@spectrum.context = @current_scan
|
|
103
|
-
elsif name == 'precursorMz'
|
|
104
|
-
@current_scan[5] = @current_scan[5].to_f
|
|
105
|
-
@get_precursor_mz = false
|
|
106
|
-
end
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
def character(data)
|
|
110
|
-
if @get_peaks
|
|
111
|
-
@current_peaks_string << data
|
|
112
|
-
elsif @get_precursor_mz
|
|
113
|
-
@current_scan[5] << data
|
|
114
|
-
end
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
class MS::Parser::MzXML::XMLParser::PrecMzByNum < XMLParser
|
|
123
|
-
@scan_num = nil
|
|
124
|
-
@get_data = false
|
|
125
|
-
|
|
126
|
-
attr_accessor :prec_mz
|
|
127
|
-
alias_method :report, :prec_mz
|
|
128
|
-
|
|
129
|
-
def initialize
|
|
130
|
-
@prec_mz = []
|
|
131
|
-
end
|
|
132
|
-
|
|
133
|
-
def startElement(name,attrs)
|
|
134
|
-
if name == "scan"
|
|
135
|
-
@scan_num = attrs["num"].to_i
|
|
136
|
-
elsif name == "precursorMz"
|
|
137
|
-
@current_prec_mz = ""
|
|
138
|
-
@get_data = true
|
|
139
|
-
end
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
def endElement(name)
|
|
143
|
-
if name == "precursorMz"
|
|
144
|
-
@get_data = false
|
|
145
|
-
@prec_mz[@scan_num] = @current_prec_mz.to_f
|
|
146
|
-
end
|
|
147
|
-
end
|
|
148
|
-
|
|
149
|
-
def character(data)
|
|
150
|
-
if @get_data
|
|
151
|
-
@current_prec_mz << data
|
|
152
|
-
end
|
|
153
|
-
end
|
|
154
|
-
|
|
155
|
-
end
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
=begin
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
# Returns parallel arrays (times, spectra) where each spectra is an array
|
|
162
|
-
# containing alternating mz and intensity (MS1 scans only)
|
|
163
|
-
# and times are strings with the time in seconds
|
|
164
|
-
class MS::Parser::MzXML::XMLParser::TimesAndSpectra < XMLParser
|
|
165
|
-
include MS::Parser::MzXML
|
|
166
|
-
@@get_data = false
|
|
167
|
-
@@get_peaks = false
|
|
168
|
-
@@precision = 32 # @TODO: set dynamic
|
|
169
|
-
|
|
170
|
-
attr_accessor :times, :spectra
|
|
171
|
-
def times_and_spectra
|
|
172
|
-
[@times, @spectra]
|
|
173
|
-
end
|
|
174
|
-
|
|
175
|
-
alias_method :report, :times_and_spectra
|
|
176
|
-
|
|
177
|
-
def initialize(ms_level=1)
|
|
178
|
-
@ms_level = "#{ms_level}"
|
|
179
|
-
@times = []
|
|
180
|
-
@spectra = []
|
|
181
|
-
end
|
|
182
|
-
|
|
183
|
-
def startElement(name,attrs)
|
|
184
|
-
if name == "scan" && attrs["msLevel"] == @ms_level
|
|
185
|
-
@times << attrs["retentionTime"][2...-1] # strip PT and S: "PTx.xxxxS"
|
|
186
|
-
@@get_peaks = true
|
|
187
|
-
elsif name == "peaks" && @@get_peaks
|
|
188
|
-
@@get_data = true
|
|
189
|
-
@data = ""
|
|
190
|
-
end
|
|
191
|
-
end
|
|
192
|
-
|
|
193
|
-
def character(data)
|
|
194
|
-
if @@get_data
|
|
195
|
-
@data << data
|
|
196
|
-
end
|
|
197
|
-
end
|
|
198
|
-
|
|
199
|
-
def endElement(name)
|
|
200
|
-
if name == "peaks" && @@get_peaks
|
|
201
|
-
@spectra << base64_peaks_to_array(@data, @@precision)
|
|
202
|
-
@@get_data = false
|
|
203
|
-
@@get_peaks = false
|
|
204
|
-
end
|
|
205
|
-
end
|
|
206
|
-
|
|
207
|
-
end
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
class MS::Parser::MzXML::XMLParser::TimeMzIntenIndexer < XMLParser
|
|
211
|
-
|
|
212
|
-
@@scan_num = nil
|
|
213
|
-
@@get_data = false
|
|
214
|
-
|
|
215
|
-
attr_accessor :scans_by_num
|
|
216
|
-
alias_method :report, :scans_by_num
|
|
217
|
-
|
|
218
|
-
def initialize
|
|
219
|
-
@current_scan = nil
|
|
220
|
-
@scans_by_num = []
|
|
221
|
-
end
|
|
222
|
-
|
|
223
|
-
def startElement(name,attrs)
|
|
224
|
-
if name == "scan"
|
|
225
|
-
num = attrs["num"].to_i
|
|
226
|
-
@current_scan = MS::Scan.new(num, attrs["msLevel"].to_i, attrs["retentionTime"].gsub(/^PT/,'').gsub(/S$/,'').to_f)
|
|
227
|
-
scans_by_num[num] = @current_scan
|
|
228
|
-
elsif name == "precursorMz"
|
|
229
|
-
@current_scan.prec_inten = attrs["precursorIntensity"].to_f
|
|
230
|
-
@@get_data = true
|
|
231
|
-
end
|
|
232
|
-
end
|
|
233
|
-
|
|
234
|
-
def endElement(name)
|
|
235
|
-
if name == "precursorMz"
|
|
236
|
-
@@get_data = false
|
|
237
|
-
end
|
|
238
|
-
end
|
|
239
|
-
|
|
240
|
-
def character(data)
|
|
241
|
-
if @@get_data
|
|
242
|
-
@current_scan.prec_mz = data
|
|
243
|
-
end
|
|
244
|
-
end
|
|
245
|
-
|
|
246
|
-
end
|
|
247
|
-
|
|
248
|
-
=end
|