mspire 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
|
@@ -1,109 +1,77 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
module
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
#
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
# if .mzXML
|
|
16
|
-
#
|
|
17
|
-
# if
|
|
18
|
-
#
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
file
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
cmd = "#{converter} #{basename}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
return file_to_mzxml(file + '.
|
|
51
|
-
elsif File.exist?( file + '.
|
|
52
|
-
return file_to_mzxml(file + '.
|
|
53
|
-
|
|
54
|
-
return
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
ndata
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
# takes a base64 peaks string and returns an array of alternating m/z and
|
|
82
|
-
# intensity mzXML as network ordered
|
|
83
|
-
def base64_peaks_to_array(string, precision=32)
|
|
84
|
-
b64d = Base64.decode64(string)
|
|
85
|
-
if precision == 32
|
|
86
|
-
unpack_code = "g*"
|
|
87
|
-
elsif precision == 64
|
|
88
|
-
unpack_code = "G*"
|
|
89
|
-
end
|
|
90
|
-
b64d.unpack(unpack_code)
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
# Searchs each path element and returns the first one it finds
|
|
94
|
-
# returns nil if none found
|
|
95
|
-
def self.find_mzxml_converter
|
|
96
|
-
ENV['PATH'].split(/[:;]/).each do |path|
|
|
97
|
-
Dir.chdir(path) do
|
|
98
|
-
Potential_mzxml_converters.each do |pc|
|
|
99
|
-
if File.exist? pc
|
|
100
|
-
return File.join(path, pc)
|
|
101
|
-
end
|
|
102
|
-
end
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
|
-
nil
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
end
|
|
1
|
+
|
|
2
|
+
module MS ; end
|
|
3
|
+
module MS::Converter ; end
|
|
4
|
+
module MS::Converter::MzXML
|
|
5
|
+
Potential_mzxml_converters = %w(readw.exe readw t2x)
|
|
6
|
+
|
|
7
|
+
# takes PT2.7500000S and returns it as 2.700000 (no PT or S)
|
|
8
|
+
#def strip_time(time)
|
|
9
|
+
# return time[2...-1]
|
|
10
|
+
#end
|
|
11
|
+
|
|
12
|
+
# first, converts backslash to forward slash in filename.
|
|
13
|
+
# if .mzXML returns the filename
|
|
14
|
+
# if .raw or .RAW converts the file to .mZXML and returns mzXML filename
|
|
15
|
+
# if no recognized extension, looks for .mzXML file, then .RAW file (and
|
|
16
|
+
# converts)
|
|
17
|
+
# aborts if file was not able to be converted
|
|
18
|
+
# returns nil if a file that can be converted or used was not found
|
|
19
|
+
def self.file_to_mzxml(file)
|
|
20
|
+
file.gsub!("\\",'/')
|
|
21
|
+
old_file = file.dup
|
|
22
|
+
if file =~ /\.mzXML$/
|
|
23
|
+
return file
|
|
24
|
+
elsif file =~ /\.RAW$/i
|
|
25
|
+
old_file = file.dup
|
|
26
|
+
## t2x outputs in cwd (so go to the directory of the file!)
|
|
27
|
+
dir = File.dirname(file)
|
|
28
|
+
basename = File.basename(file)
|
|
29
|
+
converter = MS::MzXML.find_mzxml_converter
|
|
30
|
+
Dir.chdir(dir) do
|
|
31
|
+
if converter =~ /readw/
|
|
32
|
+
cmd = "#{converter} #{basename} c #{basename.sub(/\.RAW$/i, '.mzXML')}"
|
|
33
|
+
else
|
|
34
|
+
cmd = "#{converter} #{basename}"
|
|
35
|
+
end
|
|
36
|
+
#puts cmd
|
|
37
|
+
#puts `#{cmd}`
|
|
38
|
+
reply = `#{cmd}`
|
|
39
|
+
puts reply if $VERBOSE
|
|
40
|
+
end
|
|
41
|
+
file.sub!(/\.RAW$/i, '.mzXML')
|
|
42
|
+
unless File.exist? file
|
|
43
|
+
abort "Couldn't convert #{old_file} to #{file}"
|
|
44
|
+
end
|
|
45
|
+
return file
|
|
46
|
+
else
|
|
47
|
+
if File.exist?( file + '.mzXML' )
|
|
48
|
+
return file_to_mzxml(file + '.mzXML')
|
|
49
|
+
elsif File.exist?( file + '.RAW' )
|
|
50
|
+
return file_to_mzxml(file + '.RAW')
|
|
51
|
+
elsif File.exist?( file + '.raw' )
|
|
52
|
+
return file_to_mzxml(file + '.raw')
|
|
53
|
+
else
|
|
54
|
+
return nil
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# Searchs each path element and returns the first one it finds
|
|
62
|
+
# returns nil if none found
|
|
63
|
+
def self.find_mzxml_converter
|
|
64
|
+
ENV['PATH'].split(/[:;]/).each do |path|
|
|
65
|
+
Dir.chdir(path) do
|
|
66
|
+
Potential_mzxml_converters.each do |pc|
|
|
67
|
+
if File.exist? pc
|
|
68
|
+
return File.join(path, pc)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
nil
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
end
|
|
77
|
+
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
require 'array_class'
|
|
2
|
+
|
|
3
|
+
# This is modeled after the Thermo gradient
|
|
4
|
+
class GradientProgram
|
|
5
|
+
attr_accessor :time_points
|
|
6
|
+
attr_accessor :pump_type
|
|
7
|
+
# array of solvents parallel to TimePoint percentages array
|
|
8
|
+
attr_accessor :solvents
|
|
9
|
+
|
|
10
|
+
def initialize(pump_type, time_points=[], solvents=[])
|
|
11
|
+
@pump_type = pump_type
|
|
12
|
+
@time_points = time_points
|
|
13
|
+
@solvents = solvents
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def ==(other)
|
|
17
|
+
self.class == other.class and @pump_type==other.pump_type and @solvents == other.solvents and @time_points == other.time_points
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# gets the first gradient program encountered in the filehandle
|
|
21
|
+
def self.get_gradient_program(fh)
|
|
22
|
+
thermo_newline = "\n\000"
|
|
23
|
+
#gtable = "g\000r\000a\000d\000i\000e\000n\000t\000 \000t\000a\000b\000l\000e"
|
|
24
|
+
gradient = "[Gg]\000r\000a\000d\000i\000e\000n\000t\000 \000"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
xcal_2x = gradient + "t\000a\000b\000l\000e\000:\000"
|
|
28
|
+
xcal_1x = gradient + "P\000r\000o\000g\000r\000a\000m\000:\000"
|
|
29
|
+
xcal_2x_regexp = Regexp.new(xcal_2x)
|
|
30
|
+
xcal_1x_regexp = Regexp.new(xcal_1x)
|
|
31
|
+
find_gtable_regexp = Regexp.new(gradient)
|
|
32
|
+
|
|
33
|
+
found_one_2x = false
|
|
34
|
+
found_one_1x = false
|
|
35
|
+
pump_type = ''
|
|
36
|
+
fh.each(thermo_newline) do |line|
|
|
37
|
+
# first identify the line, then
|
|
38
|
+
if line =~ find_gtable_regexp
|
|
39
|
+
if line =~ xcal_1x_regexp
|
|
40
|
+
pump_type = '' ## have to look way back in file for this
|
|
41
|
+
found_one_1x = true
|
|
42
|
+
break
|
|
43
|
+
elsif line =~ xcal_2x_regexp
|
|
44
|
+
grab_pump_type_regexp = /(.*) .g.r.a.d.i.e.n.t. .t.a.b.l.e/
|
|
45
|
+
pump_type = read_thermo_string(grab_pump_type_regexp.match(line).captures[0])
|
|
46
|
+
found_one_2x = true
|
|
47
|
+
break
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
if found_one_2x
|
|
52
|
+
fh.gets(thermo_newline) # nothing
|
|
53
|
+
table_headers = fh.gets(thermo_newline)
|
|
54
|
+
time_points = []
|
|
55
|
+
while (line = fh.gets(thermo_newline)) != thermo_newline
|
|
56
|
+
# 0 0.00 95.0 5.0 0.0 0.0 38.0 x
|
|
57
|
+
# 1 1.00 90.0 10.0 0.0 0.0 38.0 o
|
|
58
|
+
|
|
59
|
+
pieces = table_row_to_pieces(line, '2.0')
|
|
60
|
+
time_points << TimePoint.new(pieces[1].to_f, pieces[6].to_f, pieces[2,4].map{|x| x.to_f })
|
|
61
|
+
end
|
|
62
|
+
GradientProgram.new(pump_type, time_points, %w(A B C D))
|
|
63
|
+
elsif found_one_1x
|
|
64
|
+
fh.gets(thermo_newline) # nothing
|
|
65
|
+
table_headers = fh.gets(thermo_newline)
|
|
66
|
+
time_points = []
|
|
67
|
+
null_char_regexp = Regexp.new("^\000\000\000\000")
|
|
68
|
+
while (line = fh.gets(thermo_newline)) !~ null_char_regexp
|
|
69
|
+
pieces = table_row_to_pieces(line, '1.0')
|
|
70
|
+
time_points << TimePoint.new(pieces[1].to_f, pieces[6].to_f, pieces[2,4].map{|x| x.to_f })
|
|
71
|
+
end
|
|
72
|
+
GradientProgram.new(pump_type, time_points, %w(A B C D))
|
|
73
|
+
else
|
|
74
|
+
nil
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# returns the elements of a gradient table row properly cast
|
|
79
|
+
# NOTE: Xcal 2.X starts index with 0, 1.X starts with 1
|
|
80
|
+
# (and this is how it will be returned!)
|
|
81
|
+
# NOTE: Xcal 1.X will be shorter by one (doesn't have the o/x string!)
|
|
82
|
+
# [(Int) index, time (Float), %A (Float), %B (Float), %C (Float), %D (Float),
|
|
83
|
+
# FlowRate (Float), o/x (String)]
|
|
84
|
+
def self.table_row_to_pieces(line,xcal_version='2.0')
|
|
85
|
+
string = read_thermo_string(line)
|
|
86
|
+
if xcal_version >= '2.0'
|
|
87
|
+
# at first, I thought you could just split on spaces, but the table is
|
|
88
|
+
# designed to have a certain number of chars per column padded with
|
|
89
|
+
# spaces. This is hte way to do it.
|
|
90
|
+
index = string[0,4].to_i
|
|
91
|
+
(tm, a, b, c, d) = (0...5).to_a.map do |x|
|
|
92
|
+
string[(x*6)+4,6].rstrip.to_f
|
|
93
|
+
end
|
|
94
|
+
fr = string[34,7].rstrip.to_f
|
|
95
|
+
ox = string[41,4].rstrip
|
|
96
|
+
[index, tm, a, b, c, d, fr, ox]
|
|
97
|
+
else
|
|
98
|
+
index = string[0,5].lstrip.to_i # correct
|
|
99
|
+
tm = string[5,13].lstrip.to_f # correct
|
|
100
|
+
#puts "**" + string[18,16] + "**"
|
|
101
|
+
fr = string[18,16].lstrip.to_f
|
|
102
|
+
(a,b,c,d) = (0..3).to_a.map do |x|
|
|
103
|
+
string[(x*8)+34, 8].lstrip.to_f # correct
|
|
104
|
+
end
|
|
105
|
+
[index, tm, a, b, c, d, fr]
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# takes a filehandle
|
|
110
|
+
# returns an array of gradient programs from a thermo filehandle.
|
|
111
|
+
# Acceptable file types include a .meth file and a .raw file
|
|
112
|
+
def self.all_from_handle(fh)
|
|
113
|
+
# 0005340: 3000 2e00 3000 3000 0a00 0a00 5300 6100 0...0.0.....S.a.
|
|
114
|
+
# 0005350: 6d00 7000 6c00 6500 2000 5000 7500 6d00 m.p.l.e. .P.u.m.
|
|
115
|
+
# 0005360: 7000 2000 6700 7200 6100 6400 6900 6500 p. .g.r.a.d.i.e.
|
|
116
|
+
# 0005370: 6e00 7400 2000 7400 6100 6200 6c00 6500 n.t. .t.a.b.l.e.
|
|
117
|
+
# 0005380: 3a00 0a00 0a00 4e00 6f00 2e00 2000 5400 :.....N.o... .T.
|
|
118
|
+
# 0005390: 6900 6d00 6500 2000 2000 4100 2500 2000 i.m.e. . .A.%. .
|
|
119
|
+
# 00053a0: 2000 2000 2000 4200 2500 2000 2000 2000 . . .B.%. . . .
|
|
120
|
+
# 00053b0: 2000 4300 2500 2000 2000 2000 2000 4400 .C.%. . . . .D.
|
|
121
|
+
# 00053c0: 2500 2000 2000 2000 2000 b500 6c00 2f00 %. . . . ...l./.
|
|
122
|
+
# 00053d0: 6d00 6900 6e00 2000 0a00 3000 2000 2000 m.i.n. ...0. . .
|
|
123
|
+
# 00053e0: 2000 3000 2e00 3000 3000 2000 2000 3000 .0...0.0. . .0.
|
|
124
|
+
# 00053f0: 2e00 3000 2000 2000 2000 3000 2e00 3000 ..0. . . .0...0.
|
|
125
|
+
# 0005400: 2000 2000 2000 3100 3000 3000 2e00 3000 . . .1.0.0...0.
|
|
126
|
+
programs = []
|
|
127
|
+
while (gp = get_gradient_program(fh))
|
|
128
|
+
programs << gp
|
|
129
|
+
end
|
|
130
|
+
programs
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def self.read_thermo_string(string)
|
|
134
|
+
chars = []
|
|
135
|
+
(0...string.size).step(2) do |i|
|
|
136
|
+
chars << string[i,1]
|
|
137
|
+
end
|
|
138
|
+
chars.join
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def self.read_thermo_string_as_hex(string)
|
|
142
|
+
chars = []
|
|
143
|
+
(0...string.size).step(4) do |i|
|
|
144
|
+
chars << string[i,2]
|
|
145
|
+
end
|
|
146
|
+
[chars.join].pack('H*')
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
class GradientProgram::TimePoint
|
|
153
|
+
# time in minutes
|
|
154
|
+
attr_accessor :time
|
|
155
|
+
# flow_rate in ul/min
|
|
156
|
+
attr_accessor :flow_rate
|
|
157
|
+
# percentages
|
|
158
|
+
attr_accessor :percentages
|
|
159
|
+
|
|
160
|
+
def initialize(time=nil, flow_rate=nil, percentages=[])
|
|
161
|
+
@time = time
|
|
162
|
+
@flow_rate = flow_rate
|
|
163
|
+
@percentages = percentages
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def ==(other)
|
|
167
|
+
self.class == other.class and @time==other.time and @flow_rate == other.flow_rate and @percentages == other.percentages
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
|
data/lib/ms/msrun.rb
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
|
|
2
|
+
require 'ms/scan'
|
|
3
|
+
require 'ms/parser'
|
|
4
|
+
require 'ms/msrun_index'
|
|
5
|
+
require 'ms/converter/mzxml'
|
|
6
|
+
|
|
7
|
+
#require 'ms/parser/mzxml'
|
|
8
|
+
#require 'ms/parser/mzdata'
|
|
9
|
+
|
|
10
|
+
module MS; end
|
|
11
|
+
class MS::MSRun
|
|
12
|
+
|
|
13
|
+
attr_accessor :start_time, :end_time
|
|
14
|
+
attr_accessor :scans
|
|
15
|
+
# (just for reference) the type of file this is (as symbol)
|
|
16
|
+
attr_accessor :filetype
|
|
17
|
+
# (just for reference) the version string of this type of file
|
|
18
|
+
attr_accessor :version
|
|
19
|
+
# the total number of scans
|
|
20
|
+
attr_writer :scan_count
|
|
21
|
+
|
|
22
|
+
# should be able to read basic information from a variety of files
|
|
23
|
+
# this will be written in regexp's because REXML is way too slow, xmlparser
|
|
24
|
+
# is not guaranteed to be on every system, xmlib is not on win32.
|
|
25
|
+
# spectra is false, then spectra are not parsed out and included
|
|
26
|
+
# OPTIONS:
|
|
27
|
+
# :spectra => *true|false # whether to parse out spectra
|
|
28
|
+
# [note: precursor intensities not guaranteed to exist unless :spectra == true]
|
|
29
|
+
def initialize(file=nil, opts={})
|
|
30
|
+
myopts = opts.dup ; myopts[:msrun] = self
|
|
31
|
+
if file
|
|
32
|
+
filetype_and_version = MS::Parser.filetype_and_version(file)
|
|
33
|
+
MS::Parser.new(filetype_and_version, :msrun).parse(file, myopts)
|
|
34
|
+
(@filetype, @version) = filetype_and_version
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# returns an array, whose indices provide the number of scans in each index level the ms_levels, [0] = all the scans, [1] = mslevel 1, [2] = mslevel 2,
|
|
39
|
+
# ...
|
|
40
|
+
def scan_counts
|
|
41
|
+
ar = []
|
|
42
|
+
ar[0] = 0
|
|
43
|
+
scans.each do |sc|
|
|
44
|
+
level = sc.ms_level
|
|
45
|
+
unless ar[level]
|
|
46
|
+
ar[level] = 0
|
|
47
|
+
end
|
|
48
|
+
ar[level] += 1
|
|
49
|
+
ar[0] += 1
|
|
50
|
+
end
|
|
51
|
+
ar
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def scan_count(mslevel=0)
|
|
55
|
+
if mslevel == 0
|
|
56
|
+
@scan_count
|
|
57
|
+
else
|
|
58
|
+
num = 0
|
|
59
|
+
scans.each do |sc|
|
|
60
|
+
if sc.ms_level == mslevel
|
|
61
|
+
num += 1
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
num
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# for level 1, finds first scan and asks if it has start_mz/end_mz
|
|
69
|
+
# attributes. for other levels, asks for start_mz/ end_mz and takes the
|
|
70
|
+
# min/max. If start_mz and end_mz are not found, goes through every scan
|
|
71
|
+
# finding the max/min first and last m/z. returns [start_mz (rounded down to
|
|
72
|
+
# nearest int), end_mz (rounded up to nearest int)]
|
|
73
|
+
def start_and_end_mz(mslevel=1)
|
|
74
|
+
if mslevel == 1
|
|
75
|
+
# special case for mslevel 1 (where we expect scans to be same length)
|
|
76
|
+
scans.each do |sc|
|
|
77
|
+
if sc.ms_level == mslevel
|
|
78
|
+
if sc.start_mz && sc.end_mz
|
|
79
|
+
return [sc.start_mz, sc.end_mz]
|
|
80
|
+
end
|
|
81
|
+
break
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
hi_mz = nil
|
|
86
|
+
lo_mz = nil
|
|
87
|
+
# see if we have start_mz and end_mz for the level we want
|
|
88
|
+
# set the initial hi_mz and lo_mz in any case
|
|
89
|
+
have_start_end_mz = false
|
|
90
|
+
scans.each do |sc|
|
|
91
|
+
if sc.ms_level == mslevel
|
|
92
|
+
if sc.start_mz && sc.end_mz
|
|
93
|
+
lo_mz = sc.start_mz
|
|
94
|
+
hi_mz = sc.end_mz
|
|
95
|
+
else
|
|
96
|
+
mz = sc.spectrum.mz
|
|
97
|
+
hi_mz = mz.last
|
|
98
|
+
lo_mz = mz.first
|
|
99
|
+
end
|
|
100
|
+
break
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
if have_start_end_mz
|
|
104
|
+
scans.each do |sc|
|
|
105
|
+
if sc.ms_level == mslevel
|
|
106
|
+
if sc.start_mz < lo_mz
|
|
107
|
+
lo_mz = sc.start_mz
|
|
108
|
+
end
|
|
109
|
+
if sc.end_mz > hi_mz
|
|
110
|
+
hi_mz = sc.end_mz
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
else
|
|
115
|
+
# didn't have the attributes (find by brute force)
|
|
116
|
+
scans.each do |sc|
|
|
117
|
+
if sc.ms_level == mslevel
|
|
118
|
+
mz = sc.spectrum.mz
|
|
119
|
+
if mz.last > hi_mz
|
|
120
|
+
hi_mz = mz.last
|
|
121
|
+
end
|
|
122
|
+
if mz.last < lo_mz
|
|
123
|
+
lo_mz = mz.last
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
[lo_mz.floor, hi_mz.ceil]
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# returns an array of precursor mz by scan number
|
|
132
|
+
# returns only the m/z of the FIRST precursor if multiple
|
|
133
|
+
def precursor_mz_by_scan_num
|
|
134
|
+
ar = Array.new(@scans.size + 1)
|
|
135
|
+
@scans.each do |scan|
|
|
136
|
+
if prec = scan.precursors.first
|
|
137
|
+
ar[scan.num] = prec.mz
|
|
138
|
+
else
|
|
139
|
+
ar[scan.num] = nil
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
ar
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# returns an array of times and parallel array of spectra objects.
|
|
146
|
+
# ms_level = 0 then all spectra and times
|
|
147
|
+
# ms_level = 1 then all spectra of ms_level 1
|
|
148
|
+
def times_and_spectra(ms_level=0)
|
|
149
|
+
spectra = []
|
|
150
|
+
if ms_level == 0
|
|
151
|
+
times = @scans.map do |scan|
|
|
152
|
+
spectra << scan.spectrum
|
|
153
|
+
scan.time
|
|
154
|
+
end
|
|
155
|
+
[times, spectra]
|
|
156
|
+
else # choose a particular ms_level
|
|
157
|
+
times = []
|
|
158
|
+
@scans.each do |scan|
|
|
159
|
+
if ms_level == scan.ms_level
|
|
160
|
+
spectra << scan.spectrum
|
|
161
|
+
times << scan.time
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
[times, spectra]
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# same as the instance method (creates an object without spectrum and calls
|
|
169
|
+
# instance method of the same name)
|
|
170
|
+
def self.precursor_mz_by_scan_num(file)
|
|
171
|
+
self.new(file, :spectra => false).precursor_mz_by_scan_num
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# only adds the parent if one is not already present!
|
|
175
|
+
def self.add_parent_scan(scans, add_intensities=false)
|
|
176
|
+
#start = Time.now
|
|
177
|
+
prev_scan = nil
|
|
178
|
+
parent_stack = [nil]
|
|
179
|
+
## we want to set the level to be the first mslevel we come to
|
|
180
|
+
prev_level = scans.first.ms_level
|
|
181
|
+
scans.each do |scan|
|
|
182
|
+
#next unless scan ## the first one is nil, (others?)
|
|
183
|
+
level = scan.ms_level
|
|
184
|
+
if prev_level < level
|
|
185
|
+
parent_stack.unshift prev_scan
|
|
186
|
+
end
|
|
187
|
+
if prev_level > level
|
|
188
|
+
(prev_level - level).times do parent_stack.shift end
|
|
189
|
+
end
|
|
190
|
+
if scan.ms_level > 1
|
|
191
|
+
scan.precursors.each do |precursor|
|
|
192
|
+
#precursor.parent = parent_stack.first # that's the next line's
|
|
193
|
+
precursor[2] = parent_stack.first unless precursor[2]
|
|
194
|
+
#precursor.intensity
|
|
195
|
+
if add_intensities
|
|
196
|
+
precursor[1] = precursor[2].spectrum.intensity_at_mz(precursor[0])
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
prev_level = level
|
|
201
|
+
prev_scan = scan
|
|
202
|
+
end
|
|
203
|
+
#puts "TOOK #{Time.now - start} secs"
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
|
|
@@ -1,11 +1,7 @@
|
|
|
1
|
+
require 'ms/scan'
|
|
2
|
+
require 'ms/parser'
|
|
1
3
|
|
|
2
|
-
|
|
3
|
-
require 'spec/mzxml/parser'
|
|
4
|
-
require 'spec/mzdata/parser'
|
|
5
|
-
|
|
6
|
-
module Spec; end
|
|
7
|
-
|
|
8
|
-
class Spec::MSRunIndex
|
|
4
|
+
class MS::MSRunIndex
|
|
9
5
|
# basename_noext is the base name of the file (with NO extensions)
|
|
10
6
|
attr_accessor :scans_by_num
|
|
11
7
|
attr_reader :basename_noext
|
|
@@ -32,7 +28,7 @@ class Spec::MSRunIndex
|
|
|
32
28
|
# index_file has one row for each scan:
|
|
33
29
|
# ms_level scan_num time [prec_mz prec_inten]
|
|
34
30
|
# also consider getting this data directly from the mzXML file
|
|
35
|
-
# via the
|
|
31
|
+
# via the MS::MzXML::Parser.get_msrun_index command
|
|
36
32
|
def set_from_index_file(index_file)
|
|
37
33
|
self.basename_noext = index_file
|
|
38
34
|
@scans_by_num = []
|
|
@@ -41,7 +37,7 @@ class Spec::MSRunIndex
|
|
|
41
37
|
next if line !~ /\d/ || line =~ /^#/
|
|
42
38
|
line.chomp!
|
|
43
39
|
arr = line.split(" ")
|
|
44
|
-
scan =
|
|
40
|
+
scan = MS::Scan.new(arr[1].to_i, arr[0].to_i, arr[2].to_f)
|
|
45
41
|
if scan.ms_level > 1
|
|
46
42
|
scan.prec_mz = arr[3].to_f
|
|
47
43
|
scan.prec_inten = arr[4].to_f
|
|
@@ -49,7 +45,7 @@ class Spec::MSRunIndex
|
|
|
49
45
|
@scans_by_num[scan.num] = scan
|
|
50
46
|
end
|
|
51
47
|
end
|
|
52
|
-
|
|
48
|
+
MS::Scan.add_parent_scan(@scans_by_num)
|
|
53
49
|
end
|
|
54
50
|
|
|
55
51
|
# Takes a .mzXML file or .timeIndex file (currently)
|
|
@@ -73,7 +69,7 @@ class Spec::MSRunIndex
|
|
|
73
69
|
# returns a new
|
|
74
70
|
def set_from_mzxml(file)
|
|
75
71
|
self.basename_noext = file
|
|
76
|
-
@scans_by_num =
|
|
72
|
+
@scans_by_num = MS::Parser.new(file, :scans_by_num).parse(file)
|
|
77
73
|
end
|
|
78
74
|
|
|
79
75
|
# writes the index to filename
|
|
@@ -109,33 +105,4 @@ class Spec::MSRunIndex
|
|
|
109
105
|
end
|
|
110
106
|
|
|
111
107
|
|
|
112
|
-
class Spec::MSRun
|
|
113
|
-
|
|
114
|
-
# scan_count is an array [0] is all the scans, [1] is mslevel 1, [2] is mslevel 2, etc
|
|
115
|
-
attr_accessor :scan_count, :start_time, :end_time, :start_mz, :end_mz
|
|
116
|
-
|
|
117
|
-
# returns an array indexed by scan number where the precursor mz is recorded
|
|
118
|
-
# for each fragment (ms2) ion
|
|
119
|
-
# The precursor mz will be a String!
|
|
120
|
-
def self.precursor_mz_by_scan(file)
|
|
121
|
-
extname = File.extname(file)
|
|
122
|
-
case extname
|
|
123
|
-
when '.mzXML' || '.timeIndex'
|
|
124
|
-
klass = Spec::MzXML::Parser
|
|
125
|
-
when '.xml'
|
|
126
|
-
klass = Spec::MzData::Parser
|
|
127
|
-
when '' # they want us to figure out the right extension
|
|
128
|
-
if File.exist? file + '.xml'
|
|
129
|
-
klass = Spec::MzData::Parser
|
|
130
|
-
else
|
|
131
|
-
# This will cover .timeIndex, .mzXML and .RAW
|
|
132
|
-
klass = Spec::MzXML::Parser
|
|
133
|
-
end
|
|
134
|
-
else
|
|
135
|
-
abort "files of extension #{extname} are not currently supported"
|
|
136
|
-
end
|
|
137
|
-
klass.new.precursor_mz_by_scan(file)
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
end
|
|
141
108
|
|