mspire 0.4.9 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/ms/parser/mzxml.rb
DELETED
|
@@ -1,282 +0,0 @@
|
|
|
1
|
-
require 'ms/msrun'
|
|
2
|
-
require 'fileutils'
|
|
3
|
-
|
|
4
|
-
module MS; end
|
|
5
|
-
|
|
6
|
-
module MS::Parser::MzXML
|
|
7
|
-
Base_dir_for_parsers = 'ms/parser/mzxml'
|
|
8
|
-
# inherits XMLStyleParser and version
|
|
9
|
-
include MS::Parser
|
|
10
|
-
include XMLStyleParser
|
|
11
|
-
|
|
12
|
-
# warning: clobbers file unless a newfilename is provided!
|
|
13
|
-
# returns the output filename
|
|
14
|
-
# will fix any size file!
|
|
15
|
-
def self.fix_bad_scan_tags(filename, newfilename=nil)
|
|
16
|
-
|
|
17
|
-
out_io =
|
|
18
|
-
if newfilename
|
|
19
|
-
File.open(newfilename, 'w')
|
|
20
|
-
else
|
|
21
|
-
Tempfile.new(File.basename(filename))
|
|
22
|
-
end
|
|
23
|
-
File.open(filename) do |fh|
|
|
24
|
-
self.fix_bad_scan_tags_from_io(fh, out_io)
|
|
25
|
-
end
|
|
26
|
-
out_io.close
|
|
27
|
-
unless newfilename
|
|
28
|
-
FileUtils.mv out_io.path, filename
|
|
29
|
-
end
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
# this is a memory efficient method to fix bad scan tags
|
|
33
|
-
# prints cleaned up file to out_io
|
|
34
|
-
# no effort is made to rewind the io objects, the user must do this if they
|
|
35
|
-
# plan to continue using these objects!
|
|
36
|
-
def self.fix_bad_scan_tags_from_io(io, out_io)
|
|
37
|
-
regexp = /<\/scan>/
|
|
38
|
-
end_scan_line = false
|
|
39
|
-
|
|
40
|
-
io.each("\n") do |line|
|
|
41
|
-
if end_scan_line && line =~ regexp
|
|
42
|
-
# two end scan lines! # don't print to out_io
|
|
43
|
-
end_scan_line = true
|
|
44
|
-
elsif line =~ regexp
|
|
45
|
-
out_io.print(line)
|
|
46
|
-
end_scan_line = true
|
|
47
|
-
else
|
|
48
|
-
out_io.print(line)
|
|
49
|
-
end_scan_line = false
|
|
50
|
-
end
|
|
51
|
-
end
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# returns a string with double </scan></scan> tags into single and missing
|
|
55
|
-
# </scan> tags after peaks added in
|
|
56
|
-
# we do this in windows style since these are generated off a windows
|
|
57
|
-
# machine only
|
|
58
|
-
#def self.fix_bad_scan_tags(string)
|
|
59
|
-
# string.gsub(/<\/scan>\s+<\/scan>/m, '</scan>').gsub(/<\/peaks>\s+<scan/m, "</peaks>\r\n </scan>\r\n <scan")
|
|
60
|
-
#end
|
|
61
|
-
|
|
62
|
-
# returns true if it has the bad tag
|
|
63
|
-
def self.has_bad_scan_tag_from_string?(string)
|
|
64
|
-
if string.match(/<\/scan>\s+<\/scan>/m)
|
|
65
|
-
true
|
|
66
|
-
else
|
|
67
|
-
false
|
|
68
|
-
end
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
def self.has_bad_scan_tag?(filename)
|
|
72
|
-
File.open(filename) do |fh|
|
|
73
|
-
self.has_bad_scan_tag_from_io?(fh)
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
# very efficient algorithm to check for malformed xml typical of readw
|
|
78
|
-
# output. The extra closing scan tags come after the last ms/ms scan in a
|
|
79
|
-
# cycle rewinds the io after looking
|
|
80
|
-
def self.has_bad_scan_tag_from_io?(io)
|
|
81
|
-
seen_first_ms_level = false
|
|
82
|
-
seen_higher_ms_level = false
|
|
83
|
-
cur_ms_level = 0
|
|
84
|
-
found_double_end_tag = false
|
|
85
|
-
found_end_tag = false
|
|
86
|
-
io.each("\n") do |line|
|
|
87
|
-
if line =~ /<\/scan>/
|
|
88
|
-
if found_end_tag # already found one!
|
|
89
|
-
found_double_end_tag = true
|
|
90
|
-
break
|
|
91
|
-
end
|
|
92
|
-
found_end_tag = true
|
|
93
|
-
else
|
|
94
|
-
found_end_tag = false
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
if line =~ /msLevel="(\d+)"/
|
|
98
|
-
cur_ms_level = $1.dup
|
|
99
|
-
if seen_first_ms_level && seen_higher_ms_level && cur_ms_level == '1'
|
|
100
|
-
break
|
|
101
|
-
end
|
|
102
|
-
if cur_ms_level == '1'
|
|
103
|
-
seen_first_ms_level = true
|
|
104
|
-
elsif cur_ms_level == '2'
|
|
105
|
-
seen_higher_ms_level = true
|
|
106
|
-
end
|
|
107
|
-
end
|
|
108
|
-
end
|
|
109
|
-
io.rewind
|
|
110
|
-
found_double_end_tag
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
# returns a specific parser MS::Parser::MzXML::#{ParserType}
|
|
114
|
-
# based on choose_parser from xml_style_parser
|
|
115
|
-
def self.new(parse_type=:msrun, version='1.0', opts={})
|
|
116
|
-
special_subclass =
|
|
117
|
-
if opts[:lazy] == :io
|
|
118
|
-
'LazyPeaks'
|
|
119
|
-
else ; nil
|
|
120
|
-
end
|
|
121
|
-
@version = version
|
|
122
|
-
@method = parse_type
|
|
123
|
-
XMLStyleParser.require_parse_files(Base_dir_for_parsers)
|
|
124
|
-
parser_class = XMLStyleParser.choose_parser(self, parse_type, special_subclass)
|
|
125
|
-
parser = parser_class.new(parse_type, version)
|
|
126
|
-
end
|
|
127
|
-
|
|
128
|
-
# Returns an array of scans indexed by scan number
|
|
129
|
-
# NOTE that the first scan (zero indexed) will likely be nil!
|
|
130
|
-
# accepts an optional parse_type = 'xmlparser' | 'rexml'
|
|
131
|
-
def scans_by_num(mzXML_file, parse_type=nil)
|
|
132
|
-
unless parse_type
|
|
133
|
-
parse_type = default_parser
|
|
134
|
-
end
|
|
135
|
-
scans = []
|
|
136
|
-
case parse_type
|
|
137
|
-
when 'xmlparser'
|
|
138
|
-
parser = MS::MzXML::XMLParser::TimeMzIntenIndexer.new
|
|
139
|
-
parser.parse(IO.read(mzXML_file))
|
|
140
|
-
scans = parser.scans_by_num
|
|
141
|
-
when 'rexml' # use REXML
|
|
142
|
-
# This is really too slow for files of this size
|
|
143
|
-
doc = REXML::Document.new File.new(mzXML_file)
|
|
144
|
-
doc.elements.each('msRun/scan') do |scan|
|
|
145
|
-
rt = scan.attributes['retentionTime'] ## like PT0.154000S"
|
|
146
|
-
level = scan.attributes['msLevel']
|
|
147
|
-
to_print = []
|
|
148
|
-
prec_mz = nil
|
|
149
|
-
prec_int = nil
|
|
150
|
-
if level.to_i != 1
|
|
151
|
-
scan.elements.each("precursorMz") do |prec|
|
|
152
|
-
prec_mz = prec.text.to_f
|
|
153
|
-
prec_int = prec.attributes["precursorIntensity"].to_f
|
|
154
|
-
end
|
|
155
|
-
end
|
|
156
|
-
# remove the leading PT and trailing S on the retention time!
|
|
157
|
-
rt = rt[2...-1]
|
|
158
|
-
|
|
159
|
-
num = scan.attributes['num'].to_i
|
|
160
|
-
scans[num] = MS::Scan.new(num, scan.attributes['msLevel'].to_i, rt.to_f, prec_mz, prec_int)
|
|
161
|
-
end #doc.elements
|
|
162
|
-
else
|
|
163
|
-
throw ArgumentError, "invalid parse type: #{parse_type}"
|
|
164
|
-
end
|
|
165
|
-
## update the scans for parents
|
|
166
|
-
MS::Scan.add_parent_scan(scans)
|
|
167
|
-
scans
|
|
168
|
-
end
|
|
169
|
-
|
|
170
|
-
# Returns a Hash indexed by filename (with no extension) for a given path
|
|
171
|
-
# extension = glob (string) or regex
|
|
172
|
-
# The basename is given as: file.split('.').first
|
|
173
|
-
def precursor_mz_by_scan_for_path(path, extension, parse_type=nil)
|
|
174
|
-
hash = {}
|
|
175
|
-
Dir.chdir path do
|
|
176
|
-
files = []
|
|
177
|
-
if extension.class == String
|
|
178
|
-
files = Dir[extension]
|
|
179
|
-
elsif extension.class == Regexp
|
|
180
|
-
files = Dir.entries(".").find_all do |dir|
|
|
181
|
-
dir =~ extension
|
|
182
|
-
end
|
|
183
|
-
else
|
|
184
|
-
puts "extension: #{extension} not a String or Regexp!"
|
|
185
|
-
end
|
|
186
|
-
files.each do |file|
|
|
187
|
-
base = file.split('.').first
|
|
188
|
-
hash[base] = precursor_mz_by_scan(file, parse_type)
|
|
189
|
-
end
|
|
190
|
-
end
|
|
191
|
-
hash
|
|
192
|
-
end
|
|
193
|
-
|
|
194
|
-
# Returns hash where hash[scan_num] = [precursorMz, precursorIntensity]
|
|
195
|
-
# Parent scans are not hashed
|
|
196
|
-
# Keys and values are both strings
|
|
197
|
-
def precursor_mz_and_inten_by_scan(file)
|
|
198
|
-
# in progress
|
|
199
|
-
end
|
|
200
|
-
|
|
201
|
-
# Returns array where array[scan_num] = precursorMz
|
|
202
|
-
# precursorMz are Floats
|
|
203
|
-
# Array index likely starts at 1!
|
|
204
|
-
def precursor_mz_by_scan_num(file)
|
|
205
|
-
## THIS SHOULD BE CREATED IN specific XML LIBS
|
|
206
|
-
end
|
|
207
|
-
|
|
208
|
-
# Returns a hash of basic info on an mzXML run:
|
|
209
|
-
# *mzXML_elemt* *hash keys (symbols)*
|
|
210
|
-
# scanCount scan_count
|
|
211
|
-
# startTime start_time
|
|
212
|
-
# endTime end_time
|
|
213
|
-
# startMz start_mz
|
|
214
|
-
# endMz end_mz
|
|
215
|
-
def basic_info(mzxml_file)
|
|
216
|
-
puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}" if $VERBOSE
|
|
217
|
-
hash = {}
|
|
218
|
-
scan_count_tmp = []
|
|
219
|
-
(1..5).to_a.each do |n| scan_count_tmp[n] = 0 end
|
|
220
|
-
@fh = File.open(mzxml_file)
|
|
221
|
-
@line = ""
|
|
222
|
-
scan_count_tmp[0] = _el("scanCount").to_i
|
|
223
|
-
hash[:start_time] = _el("startTime").sub(/^PT/, "").sub(/S$/,"").to_f
|
|
224
|
-
hash[:end_time] = _el("endTime").sub(/^PT/, "").sub(/S$/,"").to_f
|
|
225
|
-
hash[:ms_level] = _el("msLevel").to_i
|
|
226
|
-
scan_count_tmp[1] = 1
|
|
227
|
-
if hash[:ms_level] == 1
|
|
228
|
-
hash[:start_mz] = _el("startMz").to_f
|
|
229
|
-
hash[:end_mz] = _el("endMz").to_f
|
|
230
|
-
end
|
|
231
|
-
|
|
232
|
-
while !@fh.eof?
|
|
233
|
-
@line = @fh.readline
|
|
234
|
-
ms_level = _el("msLevel")
|
|
235
|
-
if ms_level
|
|
236
|
-
scan_count_tmp[ms_level.to_i] += 1
|
|
237
|
-
else
|
|
238
|
-
break
|
|
239
|
-
end
|
|
240
|
-
end
|
|
241
|
-
scan_count = []
|
|
242
|
-
scan_count_tmp.each do |cnt|
|
|
243
|
-
if cnt != 0
|
|
244
|
-
scan_count.push cnt
|
|
245
|
-
else
|
|
246
|
-
break
|
|
247
|
-
end
|
|
248
|
-
end
|
|
249
|
-
hash[:scan_count] = scan_count
|
|
250
|
-
@fh.close
|
|
251
|
-
hash
|
|
252
|
-
end
|
|
253
|
-
|
|
254
|
-
# returns [start_mz, end_mz] of the first full scan (ms_level == 1)
|
|
255
|
-
def start_and_end_mz(mzxml_file)
|
|
256
|
-
@fh = File.open(mzxml_file)
|
|
257
|
-
ms_level = 0
|
|
258
|
-
@line = ""
|
|
259
|
-
while ms_level != 1
|
|
260
|
-
ms_level = _el("msLevel").to_i
|
|
261
|
-
end
|
|
262
|
-
start_mz = _el("startMz").to_f
|
|
263
|
-
end_mz = _el("endMz").to_f
|
|
264
|
-
@fh.close
|
|
265
|
-
[start_mz, end_mz]
|
|
266
|
-
end
|
|
267
|
-
|
|
268
|
-
def _el(name)
|
|
269
|
-
re = /#{name}="(.*)"/
|
|
270
|
-
while @line !~ re && !@fh.eof?
|
|
271
|
-
@line = @fh.readline
|
|
272
|
-
end
|
|
273
|
-
if $1
|
|
274
|
-
return $1.dup
|
|
275
|
-
else
|
|
276
|
-
return nil
|
|
277
|
-
end
|
|
278
|
-
end
|
|
279
|
-
|
|
280
|
-
end
|
|
281
|
-
|
|
282
|
-
|
data/lib/ms/parser.rb
DELETED
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
require 'xml_style_parser'
|
|
2
|
-
|
|
3
|
-
module MS; end
|
|
4
|
-
|
|
5
|
-
module MS::Parser
|
|
6
|
-
# inherits attr_accessor :method, :default_parser, and parse (which should
|
|
7
|
-
# be overridden)
|
|
8
|
-
include XMLStyleParser
|
|
9
|
-
|
|
10
|
-
Mzxml_regexp = /http:\/\/sashimi.sourceforge.net\/schema(_revision)?\/([\w\d_\.]+)/o
|
|
11
|
-
# 'http://sashimi.sourceforge.net/schema/MsXML.xsd' # version 1
|
|
12
|
-
# 'http://sashimi.sourceforge.net/schema_revision/mzXML_X.X' # others
|
|
13
|
-
Mzdata_regexp = /<mzData.*version="([\d\.]+)"/m
|
|
14
|
-
|
|
15
|
-
attr_accessor :version
|
|
16
|
-
|
|
17
|
-
############################################
|
|
18
|
-
# POINTERS (to create META MAGIC)
|
|
19
|
-
############################################
|
|
20
|
-
|
|
21
|
-
@@filetypes_to_upcase = {
|
|
22
|
-
:mzxml => 'MzXML',
|
|
23
|
-
:mzdata => 'MzData',
|
|
24
|
-
:mzml => 'MzML',
|
|
25
|
-
:raw => 'Raw',
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
@@filetypes_to_require = {}
|
|
29
|
-
@@filetypes_to_constant = {}
|
|
30
|
-
|
|
31
|
-
abbrevs = Dir.chdir(File.dirname(__FILE__) + "/parser") do
|
|
32
|
-
Dir["*.rb"].map {|f| f.sub(/\.rb$/,'') }
|
|
33
|
-
end
|
|
34
|
-
abbrevs.each do |abbr|
|
|
35
|
-
abb = abbr.to_sym
|
|
36
|
-
req = ['ms', 'parser', abbr].join("/")
|
|
37
|
-
@@filetypes_to_require[abb] = req
|
|
38
|
-
@@filetypes_to_constant[abb] = ['MS', 'Parser', @@filetypes_to_upcase[abb]].join("::")
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
############################################
|
|
42
|
-
# END POINTERS
|
|
43
|
-
############################################
|
|
44
|
-
|
|
45
|
-
# finds the filetype of a file (expects to be at the beginning) and rewinds
|
|
46
|
-
# the filehandle to the beginning returns [filetype, version]. nil if
|
|
47
|
-
# filetype and version could not be determined
|
|
48
|
-
def self.filetype_and_version(fh_or_filename)
|
|
49
|
-
if fh_or_filename.is_a? IO
|
|
50
|
-
fh = fh_or_filename
|
|
51
|
-
found = nil
|
|
52
|
-
# Test for RAW file:
|
|
53
|
-
header = fh.read(18).unpack('@2axaxaxaxaxaxaxa').join
|
|
54
|
-
if header == 'Finnigan'
|
|
55
|
-
return [:raw, nil]
|
|
56
|
-
end
|
|
57
|
-
fh.rewind
|
|
58
|
-
while (line = fh.gets)
|
|
59
|
-
found =
|
|
60
|
-
case line
|
|
61
|
-
when Mzxml_regexp
|
|
62
|
-
mtch = $2.dup
|
|
63
|
-
case mtch
|
|
64
|
-
when /mzXML_([\d\.]+)/
|
|
65
|
-
[:mzxml, $1.dup]
|
|
66
|
-
when /MsXML/
|
|
67
|
-
[:mzxml, '1.0']
|
|
68
|
-
else
|
|
69
|
-
abort "Cannot determine mzXML version!"
|
|
70
|
-
end
|
|
71
|
-
when Mzdata_regexp
|
|
72
|
-
[:mzdata, $1.dup]
|
|
73
|
-
end
|
|
74
|
-
if found
|
|
75
|
-
break
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
fh.rewind
|
|
79
|
-
found
|
|
80
|
-
else
|
|
81
|
-
File.open(fh_or_filename) do |fh|
|
|
82
|
-
filetype_and_version(fh)
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
# filetype_version is an example file to parse, or it is an array: [type, version].
|
|
88
|
-
# parse_type is the information to be gleaned (as symbol).
|
|
89
|
-
def self.new(filetype_version, parse_type, opts={})
|
|
90
|
-
unless filetype_version.is_a? Array
|
|
91
|
-
filetype_version = filetype_and_version(filetype_version)
|
|
92
|
-
end
|
|
93
|
-
require_and_create_parser(filetype_version, parse_type, opts)
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
private
|
|
97
|
-
|
|
98
|
-
# returns a working parser.
|
|
99
|
-
def self.require_and_create_parser(filetype_version, parse_type, opts)
|
|
100
|
-
(filetype, version) = filetype_version
|
|
101
|
-
#puts "FT: #{filetype} VERSION: #{version}"
|
|
102
|
-
reply = require @@filetypes_to_require[filetype]
|
|
103
|
-
@@filetypes_to_require[filetype]
|
|
104
|
-
parser_class = MS::Parser.const_get(@@filetypes_to_upcase[filetype])
|
|
105
|
-
parser_class.new(parse_type, version, opts)
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
end
|
data/lib/ms/precursor.rb
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
require 'arrayclass'
|
|
2
|
-
|
|
3
|
-
module MS; end
|
|
4
|
-
|
|
5
|
-
# charge_states are the possible charge states of the precursor
|
|
6
|
-
# parent references a scan
|
|
7
|
-
# 0 1 2 3
|
|
8
|
-
MS::Precursor = Arrayclass.new(%w(mz intensity parent charge_states))
|
|
9
|
-
|
|
10
|
-
class MS::Precursor
|
|
11
|
-
|
|
12
|
-
undef :intensity
|
|
13
|
-
|
|
14
|
-
def intensity
|
|
15
|
-
if self[1].nil?
|
|
16
|
-
if s = self[2].spectrum
|
|
17
|
-
self[1] = s.intensity_at_mz(self[0])
|
|
18
|
-
else
|
|
19
|
-
nil # if we didn't read in the spectra, we can't get this value!
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
self[1]
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
end
|
data/lib/ms/scan.rb
DELETED
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
require 'arrayclass'
|
|
2
|
-
require 'ms/precursor'
|
|
3
|
-
|
|
4
|
-
module MS ; end
|
|
5
|
-
|
|
6
|
-
# 0 1 2 3 4 5 6
|
|
7
|
-
MS::Scan = Arrayclass.new( %w(num ms_level time start_mz end_mz precursor spectrum) )
|
|
8
|
-
|
|
9
|
-
# time in seconds
|
|
10
|
-
# everything else in float/int
|
|
11
|
-
|
|
12
|
-
class MS::Scan
|
|
13
|
-
#@@order = %w(num ms_level time start_mz end_mz prec_mz prec_inten parent spectrum)
|
|
14
|
-
#attr_accessor :num, :ms_level, :time, :start_mz, :end_mz, :prec_mz, :prec_inten, :parent, :spectrum
|
|
15
|
-
|
|
16
|
-
#def initialize(ar=nil)
|
|
17
|
-
# @@order.zip(ar) do |x,v|
|
|
18
|
-
# send((x+'=').to_sym, v)
|
|
19
|
-
# end
|
|
20
|
-
#end
|
|
21
|
-
|
|
22
|
-
def to_s
|
|
23
|
-
"<Scan num=#{num} ms_level=#{ms_level} time=#{time}>"
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
undef_method :inspect
|
|
27
|
-
def inspect
|
|
28
|
-
atts = %w(num ms_level time start_mz end_mz)
|
|
29
|
-
display = atts.map do |att|
|
|
30
|
-
if val = send(att.to_sym)
|
|
31
|
-
"#{att}=#{val}"
|
|
32
|
-
else
|
|
33
|
-
nil
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
display.compact!
|
|
37
|
-
spec_display =
|
|
38
|
-
if spectrum
|
|
39
|
-
spectrum.mzs.size
|
|
40
|
-
else
|
|
41
|
-
'nil'
|
|
42
|
-
end
|
|
43
|
-
"<MS::Scan:#{__id__} " + display.join(", ") + " precursor=#{precursor.inspect}" + " spectrum(size)=#{spec_display}" + " >"
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
# returns the string (space delimited): "ms_level num time [prec_mz prec_inten]"
|
|
47
|
-
def to_index_file_string
|
|
48
|
-
arr = [ms_level, num, time]
|
|
49
|
-
if precursor then arr << precursor.mz end
|
|
50
|
-
if x = precursor.intensity then arr << x end
|
|
51
|
-
arr.join(" ")
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# adds the attribute parent to each scan with a parent
|
|
55
|
-
# (level 1 = no parent; level 2 = prev level 1, etc.
|
|
56
|
-
def self.add_parent_scan(scans)
|
|
57
|
-
prev_scan = nil
|
|
58
|
-
parent_stack = [nil]
|
|
59
|
-
## we want to set the level to be the first mslevel we come to
|
|
60
|
-
prev_level = 1
|
|
61
|
-
scans.each do |scan|
|
|
62
|
-
if scan then prev_level = scan.ms_level; break; end
|
|
63
|
-
end
|
|
64
|
-
scans.each do |scan|
|
|
65
|
-
next unless scan ## the first one is nil, (others?)
|
|
66
|
-
level = scan.ms_level
|
|
67
|
-
if prev_level < level
|
|
68
|
-
parent_stack.unshift prev_scan
|
|
69
|
-
end
|
|
70
|
-
if prev_level > level
|
|
71
|
-
(prev_level - level).times do parent_stack.shift end
|
|
72
|
-
end
|
|
73
|
-
scan.parent = parent_stack.first
|
|
74
|
-
prev_level = level
|
|
75
|
-
prev_scan = scan
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
|
data/lib/mspire.rb
DELETED