mspire 0.3.9 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +24 -7
- data/README +15 -13
- data/README.rdoc +18 -0
- data/Rakefile +50 -14
- data/bin/aafreqs.rb +0 -0
- data/bin/bioworks2excel.rb +0 -0
- data/bin/bioworks_to_pepxml.rb +2 -1
- data/bin/bioworks_to_pepxml_gui.rb +0 -0
- data/bin/fasta_shaker.rb +0 -0
- data/bin/filter_and_validate.rb +0 -0
- data/bin/gi2annot.rb +0 -0
- data/bin/id_class_anal.rb +0 -0
- data/bin/id_precision.rb +0 -0
- data/bin/ms_to_lmat.rb +0 -0
- data/bin/pepproph_filter.rb +0 -0
- data/bin/protein_summary.rb +0 -0
- data/bin/protxml2prots_peps.rb +0 -0
- data/bin/raw_to_mzXML.rb +3 -3
- data/bin/run_percolator.rb +122 -0
- data/bin/sqt_group.rb +0 -0
- data/bin/srf_group.rb +0 -0
- data/changelog.txt +29 -0
- data/lib/ms/gradient_program.rb +0 -1
- data/lib/ms/msrun.rb +62 -29
- data/lib/ms/parser/mzdata/axml.rb +55 -0
- data/lib/ms/parser/mzdata/dom.rb +51 -36
- data/lib/ms/parser/mzdata.rb +8 -2
- data/lib/ms/parser/mzxml/axml.rb +59 -0
- data/lib/ms/parser/mzxml/dom.rb +80 -57
- data/lib/ms/parser/mzxml/hpricot.rb +1 -1
- data/lib/ms/parser/mzxml/libxml.rb +6 -2
- data/lib/ms/parser/mzxml.rb +110 -3
- data/lib/ms/parser.rb +4 -4
- data/lib/ms/precursor.rb +19 -4
- data/lib/ms/scan.rb +7 -7
- data/lib/ms/spectrum.rb +249 -58
- data/lib/mspire.rb +1 -1
- data/lib/spec_id/bioworks.rb +2 -2
- data/lib/spec_id/precision/filter/cmdline.rb +8 -1
- data/lib/spec_id/precision/prob/cmdline.rb +2 -2
- data/lib/spec_id/precision/prob.rb +1 -0
- data/lib/spec_id/proph/pep_summary.rb +3 -4
- data/lib/spec_id/proph/prot_summary.rb +3 -3
- data/lib/spec_id/protein_summary.rb +1 -1
- data/lib/spec_id/sequest/pepxml.rb +5 -5
- data/lib/spec_id/sqt.rb +4 -4
- data/lib/spec_id/srf.rb +49 -8
- data/lib/spec_id.rb +5 -0
- data/lib/xml_style_parser.rb +16 -2
- data/script/compile_and_plot_smriti_final.rb +0 -0
- data/script/create_little_pepxml.rb +0 -0
- data/script/degenerate_peptides.rb +0 -0
- data/script/estimate_fpr_by_cysteine.rb +0 -0
- data/script/extract_gradient_programs.rb +1 -1
- data/script/find_cysteine_background.rb +0 -0
- data/script/genuine_tps_and_probs.rb +0 -0
- data/script/get_apex_values_rexml.rb +0 -0
- data/script/mascot_fix_pepxml.rb +123 -0
- data/script/msvis.rb +0 -0
- data/script/mzXML2timeIndex.rb +0 -0
- data/script/peps_per_bin.rb +0 -0
- data/script/prep_dir.rb +0 -0
- data/script/simple_protein_digestion.rb +0 -0
- data/script/smriti_final_analysis.rb +0 -0
- data/script/sqt_to_meta.rb +0 -0
- data/script/top_hit_per_scan.rb +0 -0
- data/script/toppred_to_yaml.rb +0 -0
- data/script/tpp_installer.rb +0 -0
- data/specs/bin/prob_validate_spec.rb +5 -2
- data/specs/bin/protein_summary_spec.rb +5 -1
- data/specs/ms/msrun_spec.rb +176 -133
- data/specs/ms/parser_spec.rb +3 -3
- data/specs/ms/spectrum_spec.rb +0 -2
- data/specs/spec_id/precision/filter_spec.rb +4 -1
- data/specs/spec_id/precision/prob_spec.rb +2 -2
- data/specs/spec_id/sequest/pepxml_spec.rb +1 -1
- data/specs/spec_id/sqt_spec.rb +5 -5
- data/specs/spec_id/srf_spec.rb +56 -93
- data/specs/spec_id/srf_spec_helper.rb +121 -284
- data/specs/spec_id_spec.rb +3 -0
- data/specs/transmem/toppred_spec.rb +1 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +683 -0
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +382 -0
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +683 -0
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +382 -0
- data/test_files/opd1_2runs_2mods/data/README.txt +6 -0
- metadata +247 -229
data/lib/ms/parser.rb
CHANGED
|
@@ -86,23 +86,23 @@ module MS::Parser
|
|
|
86
86
|
|
|
87
87
|
# filetype_version is an example file to parse, or it is an array: [type, version].
|
|
88
88
|
# parse_type is the information to be gleaned (as symbol).
|
|
89
|
-
def self.new(filetype_version, parse_type)
|
|
89
|
+
def self.new(filetype_version, parse_type, opts={})
|
|
90
90
|
unless filetype_version.is_a? Array
|
|
91
91
|
filetype_version = filetype_and_version(filetype_version)
|
|
92
92
|
end
|
|
93
|
-
require_and_create_parser(filetype_version, parse_type)
|
|
93
|
+
require_and_create_parser(filetype_version, parse_type, opts)
|
|
94
94
|
end
|
|
95
95
|
|
|
96
96
|
private
|
|
97
97
|
|
|
98
98
|
# returns a working parser.
|
|
99
|
-
def self.require_and_create_parser(filetype_version, parse_type)
|
|
99
|
+
def self.require_and_create_parser(filetype_version, parse_type, opts)
|
|
100
100
|
(filetype, version) = filetype_version
|
|
101
101
|
#puts "FT: #{filetype} VERSION: #{version}"
|
|
102
102
|
reply = require @@filetypes_to_require[filetype]
|
|
103
103
|
@@filetypes_to_require[filetype]
|
|
104
104
|
parser_class = MS::Parser.const_get(@@filetypes_to_upcase[filetype])
|
|
105
|
-
parser_class.new(parse_type, version)
|
|
105
|
+
parser_class.new(parse_type, version, opts)
|
|
106
106
|
end
|
|
107
107
|
|
|
108
108
|
end
|
data/lib/ms/precursor.rb
CHANGED
|
@@ -1,10 +1,25 @@
|
|
|
1
|
-
require '
|
|
1
|
+
require 'arrayclass'
|
|
2
2
|
|
|
3
3
|
module MS; end
|
|
4
4
|
|
|
5
|
-
#
|
|
6
|
-
#
|
|
7
|
-
|
|
5
|
+
# charge_states are the possible charge states of the precursor
|
|
6
|
+
# parent references a scan
|
|
7
|
+
# 0 1 2 3
|
|
8
|
+
MS::Precursor = Arrayclass.new(%w(mz intensity parent charge_states))
|
|
8
9
|
|
|
9
10
|
class MS::Precursor
|
|
11
|
+
|
|
12
|
+
undef :intensity
|
|
13
|
+
|
|
14
|
+
def intensity
|
|
15
|
+
if self[1].nil?
|
|
16
|
+
if s = self[2].spectrum
|
|
17
|
+
self[1] = s.intensity_at_mz(self[0])
|
|
18
|
+
else
|
|
19
|
+
nil # if we didn't read in the spectra, we can't get this value!
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
self[1]
|
|
23
|
+
end
|
|
24
|
+
|
|
10
25
|
end
|
data/lib/ms/scan.rb
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
require '
|
|
1
|
+
require 'arrayclass'
|
|
2
2
|
require 'ms/precursor'
|
|
3
3
|
|
|
4
4
|
module MS ; end
|
|
5
5
|
|
|
6
6
|
# 0 1 2 3 4 5 6
|
|
7
|
-
MS::Scan =
|
|
7
|
+
MS::Scan = Arrayclass.new( %w(num ms_level time start_mz end_mz precursor spectrum) )
|
|
8
8
|
|
|
9
9
|
# time in seconds
|
|
10
|
-
# everything else in float/int
|
|
10
|
+
# everything else in float/int
|
|
11
11
|
|
|
12
12
|
class MS::Scan
|
|
13
13
|
#@@order = %w(num ms_level time start_mz end_mz prec_mz prec_inten parent spectrum)
|
|
@@ -36,18 +36,18 @@ class MS::Scan
|
|
|
36
36
|
display.compact!
|
|
37
37
|
spec_display =
|
|
38
38
|
if spectrum
|
|
39
|
-
spectrum.
|
|
39
|
+
spectrum.mzs.size
|
|
40
40
|
else
|
|
41
41
|
'nil'
|
|
42
42
|
end
|
|
43
|
-
"<MS::Scan:#{__id__} " + display.join(", ") + "
|
|
43
|
+
"<MS::Scan:#{__id__} " + display.join(", ") + " precursor=#{precursor.inspect}" + " spectrum(size)=#{spec_display}" + " >"
|
|
44
44
|
end
|
|
45
45
|
|
|
46
46
|
# returns the string (space delimited): "ms_level num time [prec_mz prec_inten]"
|
|
47
47
|
def to_index_file_string
|
|
48
48
|
arr = [ms_level, num, time]
|
|
49
|
-
if
|
|
50
|
-
if x =
|
|
49
|
+
if precursor then arr << precursor.mz end
|
|
50
|
+
if x = precursor.intensity then arr << x end
|
|
51
51
|
arr.join(" ")
|
|
52
52
|
end
|
|
53
53
|
|
data/lib/ms/spectrum.rb
CHANGED
|
@@ -10,27 +10,16 @@ class MS::Spectrum
|
|
|
10
10
|
Unpack_little_endian_double = 'E*'
|
|
11
11
|
|
|
12
12
|
# m/z's
|
|
13
|
-
attr_accessor :
|
|
13
|
+
attr_accessor :mzs
|
|
14
14
|
# intensities
|
|
15
|
-
attr_accessor :
|
|
15
|
+
attr_accessor :intensities
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def has_mz_data?
|
|
24
|
-
(@mz.size > 0) && (@mz.first.is_a?(Numeric))
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
def has_intensity_data?
|
|
28
|
-
(@intensity.size > 0) && (@intensity.first.is_a?(Numeric))
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# takes a base64 string and returns an array
|
|
32
|
-
def self.base64_to_array(string, precision=32, network_order=true)
|
|
33
|
-
b64d = Base64.decode64(string)
|
|
17
|
+
#######################
|
|
18
|
+
## CLASS METHODS:
|
|
19
|
+
#######################
|
|
20
|
+
|
|
21
|
+
# an already decoded string (ready to be unpacked as floating point numbers)
|
|
22
|
+
def self.string_to_array(string, precision=32, network_order=true)
|
|
34
23
|
unpack_code =
|
|
35
24
|
if network_order
|
|
36
25
|
if precision == 32
|
|
@@ -45,57 +34,57 @@ class MS::Spectrum
|
|
|
45
34
|
Unpack_little_endian_double
|
|
46
35
|
end
|
|
47
36
|
end
|
|
48
|
-
|
|
37
|
+
string.unpack(unpack_code)
|
|
49
38
|
end
|
|
50
39
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
self.new(mz, inten)
|
|
40
|
+
# takes a base64 string and returns an array
|
|
41
|
+
def self.base64_to_array(b64_string, precision=32, network_order=true)
|
|
42
|
+
self.string_to_array(Base64.decode64(b64_string), precision, network_order)
|
|
55
43
|
end
|
|
56
44
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
data = base64_to_array(string, precision, network_order)
|
|
45
|
+
|
|
46
|
+
def self.mzs_and_intensities_from_base64_peaks(b64_string, precision=32, network_order=true)
|
|
47
|
+
data = base64_to_array(b64_string, precision, network_order)
|
|
61
48
|
sz = data.size/2
|
|
62
|
-
|
|
63
|
-
|
|
49
|
+
mz_ar = Array.new(sz)
|
|
50
|
+
intensity_ar = Array.new(sz)
|
|
64
51
|
ndata = []
|
|
65
52
|
my_ind = 0
|
|
66
53
|
data.each_with_index do |dat,ind|
|
|
67
54
|
if (ind % 2) == 0 # even
|
|
68
|
-
|
|
55
|
+
mz_ar[my_ind] = dat
|
|
69
56
|
else
|
|
70
|
-
|
|
57
|
+
intensity_ar[my_ind] = dat
|
|
71
58
|
my_ind += 1
|
|
72
59
|
end
|
|
73
60
|
end
|
|
74
|
-
|
|
61
|
+
[mz_ar, intensity_ar]
|
|
75
62
|
end
|
|
76
63
|
|
|
64
|
+
# takes a base64 peaks string and sets spectrum
|
|
65
|
+
# returns self for chaining
|
|
66
|
+
def self.from_base64_peaks(b64_string, precision=32, network_order=true)
|
|
67
|
+
(mz_ar, intensity_ar) = self.mzs_and_intensities_from_base64_peaks(b64_string, precision, network_order)
|
|
68
|
+
self.new(mz_ar, intensity_ar)
|
|
69
|
+
end
|
|
77
70
|
|
|
71
|
+
def self.from_base64_pair(mz_string, mz_precision, mz_network_order, intensity_string, intensity_precision, intensity_network_order)
|
|
72
|
+
mz_ar = base64_to_array(mz_string, mz_precision, mz_network_order)
|
|
73
|
+
inten_ar = base64_to_array(intensity_string, intensity_precision, intensity_network_order)
|
|
74
|
+
self.new(mz_ar, inten_ar)
|
|
75
|
+
end
|
|
78
76
|
|
|
77
|
+
def initialize(mz_ar=[], intensity_ar=[])
|
|
78
|
+
@mzs = mz_ar
|
|
79
|
+
@intensities = intensity_ar
|
|
80
|
+
end
|
|
79
81
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
82
|
+
def has_mz_data?
|
|
83
|
+
@mzs && (@mzs.size > 0) && (@mzs.first.is_a?(Numeric))
|
|
84
|
+
end
|
|
83
85
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def base64_peaks_to_pairs(string, precision=32)
|
|
87
|
-
data = base64_peaks_to_array(string, precision)
|
|
88
|
-
ndata = []
|
|
89
|
-
data.each_with_index do |dat,ind|
|
|
90
|
-
if (ind % 2) == 0 # even
|
|
91
|
-
arr = Array.new(2)
|
|
92
|
-
arr[0] = dat
|
|
93
|
-
ndata.push( arr )
|
|
94
|
-
else
|
|
95
|
-
ndata.last[1] = dat
|
|
96
|
-
end
|
|
97
|
-
end
|
|
98
|
-
ndata
|
|
86
|
+
def has_intensity_data?
|
|
87
|
+
@intensities && (@intensities.size > 0) && (@intensities.first.is_a?(Numeric))
|
|
99
88
|
end
|
|
100
89
|
|
|
101
90
|
# returns the index of the first value matching that m/z. the argument m/z
|
|
@@ -103,17 +92,18 @@ class MS::Spectrum
|
|
|
103
92
|
# given) but must be at least integer precision (after rounding)
|
|
104
93
|
# implemented as binary search (bsearch from the web)
|
|
105
94
|
def index(mz)
|
|
95
|
+
mz_ar = mzs
|
|
106
96
|
return_val = nil
|
|
107
|
-
ind =
|
|
108
|
-
if
|
|
97
|
+
ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
|
|
98
|
+
if mz_ar[ind] == mz
|
|
109
99
|
return_val = ind
|
|
110
100
|
else
|
|
111
101
|
# do a rounding game to see which one is it, or nil
|
|
112
102
|
# find all the values rounding to the same integer in the locale
|
|
113
103
|
# test each one fully in turn
|
|
114
104
|
mz = mz.to_f
|
|
115
|
-
mz_size =
|
|
116
|
-
if ((ind < mz_size) and equal_after_rounding?(
|
|
105
|
+
mz_size = mz_ar.size
|
|
106
|
+
if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
|
|
117
107
|
return_val = ind
|
|
118
108
|
else # run the loop
|
|
119
109
|
up = ind
|
|
@@ -122,7 +112,7 @@ class MS::Spectrum
|
|
|
122
112
|
if up >= mz_size
|
|
123
113
|
break
|
|
124
114
|
end
|
|
125
|
-
mz_up =
|
|
115
|
+
mz_up = mz_ar[up]
|
|
126
116
|
if (mz_up.ceil - mz.ceil >= 2)
|
|
127
117
|
break
|
|
128
118
|
else
|
|
@@ -138,7 +128,7 @@ class MS::Spectrum
|
|
|
138
128
|
if dn < 0
|
|
139
129
|
break
|
|
140
130
|
end
|
|
141
|
-
mz_dn =
|
|
131
|
+
mz_dn = mz_ar[dn]
|
|
142
132
|
if (mz.floor - mz_dn.floor >= 2)
|
|
143
133
|
break
|
|
144
134
|
else
|
|
@@ -156,7 +146,7 @@ class MS::Spectrum
|
|
|
156
146
|
# uses index function and returns the intensity at that value
|
|
157
147
|
def intensity_at_mz(mz)
|
|
158
148
|
if x = index(mz)
|
|
159
|
-
|
|
149
|
+
intensities[x]
|
|
160
150
|
else
|
|
161
151
|
nil
|
|
162
152
|
end
|
|
@@ -189,5 +179,206 @@ class MS::Spectrum
|
|
|
189
179
|
neg_exp10
|
|
190
180
|
end
|
|
191
181
|
|
|
182
|
+
######
|
|
183
|
+
# NOT REALLY USING RIGHT NOW:
|
|
184
|
+
######
|
|
185
|
+
|
|
186
|
+
# takes a base64 peaks string and returns an array of [m/z,intensity] doublets
|
|
187
|
+
# mzXML as network ordered
|
|
188
|
+
def base64_peaks_to_pairs(string, precision=32)
|
|
189
|
+
data = base64_peaks_to_array(string, precision)
|
|
190
|
+
ndata = []
|
|
191
|
+
data.each_with_index do |dat,ind|
|
|
192
|
+
if (ind % 2) == 0 # even
|
|
193
|
+
arr = Array.new(2)
|
|
194
|
+
arr[0] = dat
|
|
195
|
+
ndata.push( arr )
|
|
196
|
+
else
|
|
197
|
+
ndata.last[1] = dat
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
ndata
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# This implements a spectrum that stores itself as string data and only
|
|
206
|
+
# evaluates the information when it is called
|
|
207
|
+
class MS::Spectrum::LazyString < MS::Spectrum
|
|
208
|
+
|
|
209
|
+
undef mzs=
|
|
210
|
+
undef intensities=
|
|
211
|
+
|
|
212
|
+
# beware that this converts the information in @mz_string every time it is
|
|
213
|
+
# called
|
|
214
|
+
def mzs
|
|
215
|
+
MS::Spectrum.string_to_array(@mz_string, @mz_precision, @mz_network_order)
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
# beware that this converts the information in @intensity_string every time
|
|
219
|
+
# it is
|
|
220
|
+
def intensities
|
|
221
|
+
MS::Spectrum.string_to_array(@intensity_string, @intensity_precision, @intensity_network_order)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# this takes a decoded base64 string that is then interpreted when
|
|
225
|
+
# information is accessed
|
|
226
|
+
def initialize(mz_string, mz_precision, mz_network_order, intensity_string, intensity_precision, intensity_network_order)
|
|
227
|
+
@mz_string = mz_string
|
|
228
|
+
@mz_precision = mz_precision
|
|
229
|
+
@mz_network_order = mz_network_order
|
|
230
|
+
@intensity_string = intensity_string
|
|
231
|
+
@intensity_precision = intensity_precision
|
|
232
|
+
@intensity_network_order = intensity_network_order
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# from mzXML files where information is held in peaks (m/z, intensity,
|
|
236
|
+
# m/z...)
|
|
237
|
+
def self.from_base64_peaks(b64_string, precision=32, network_order=true)
|
|
238
|
+
# decode
|
|
239
|
+
string = Base64.decode64(b64_string)
|
|
240
|
+
# split into two strings:
|
|
241
|
+
bytes_per_number = precision / 8
|
|
242
|
+
s_size = string.size
|
|
243
|
+
num_numbers = s_size / bytes_per_number
|
|
244
|
+
mz_pieces = Array.new(num_numbers)
|
|
245
|
+
intensity_pieces = Array.new(num_numbers)
|
|
246
|
+
index = 0
|
|
247
|
+
(0...string.size).step(bytes_per_number) do |i|
|
|
248
|
+
if index % 2 == 0
|
|
249
|
+
mz_pieces[index] = string[i,bytes_per_number]
|
|
250
|
+
else
|
|
251
|
+
intensity_pieces[index] = string[i,bytes_per_number]
|
|
252
|
+
end
|
|
253
|
+
index += 1
|
|
254
|
+
end
|
|
255
|
+
self.new(mz_pieces.join, precision, network_order, intensity_pieces.join, precision, network_order)
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# from mzML and mzData style files where mz and intensity information are
|
|
259
|
+
# kept in different strings.
|
|
260
|
+
def self.from_base64_pair(b64_mz_string, mz_precision, mz_network_order, b64_intensity_string, intensity_precision, intensity_network_order)
|
|
261
|
+
self.new(Base64.decode64(b64_mz_string), mz_precision, mz_network_order, Base64.decode64(b64_intensity_string), intensity_precision, intensity_network_order)
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
def has_mz_data?
|
|
265
|
+
@mz_string.is_a?(String) && @mz_precision && !@mz_network_order.nil?
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
def has_intensity_data?
|
|
269
|
+
@intensity_string.is_a?(String) && @intensity_precision && !@intensity_network_order.nil?
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
module MS::Spectrum::LazyIO
|
|
275
|
+
def self.new(*args)
|
|
276
|
+
if args.size == 5 # mzXMl
|
|
277
|
+
MS::Spectrum::LazyIO::Peaks.new(*args)
|
|
278
|
+
elsif args.size == 9 # other
|
|
279
|
+
MS::Spectrum::LazyIO::Pair.new(*args)
|
|
280
|
+
else
|
|
281
|
+
raise RunTimeError, "must give 5 or 7 args for peak data and pair data respectively"
|
|
282
|
+
end
|
|
283
|
+
end
|
|
192
284
|
end
|
|
193
285
|
|
|
286
|
+
|
|
287
|
+
# stores an io object and the start and end indices and only evaluates the
|
|
288
|
+
# spectrum when information is requested
|
|
289
|
+
class MS::Spectrum::LazyIO::Pair < MS::Spectrum
|
|
290
|
+
include MS::Spectrum::LazyIO
|
|
291
|
+
|
|
292
|
+
undef mzs=
|
|
293
|
+
undef intensities=
|
|
294
|
+
|
|
295
|
+
def initialize(io, mz_start_index, mz_num_bytes, mz_precision, mz_network_order, intensity_start_index, intensity_num_bytes, intensity_precision, intensity_network_order)
|
|
296
|
+
@io = io
|
|
297
|
+
|
|
298
|
+
@mz_start_index = mz_start_index
|
|
299
|
+
@mz_num_bytes = mz_num_bytes
|
|
300
|
+
@mz_precision = mz_precision
|
|
301
|
+
@mz_network_order = mz_network_order
|
|
302
|
+
|
|
303
|
+
@intensity_start_index = intensity_start_index
|
|
304
|
+
@intensity_num_bytes = intensity_num_bytes
|
|
305
|
+
@intensity_precision = intensity_precision
|
|
306
|
+
@intensity_network_order = intensity_network_order
|
|
307
|
+
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# beware that this converts the information on disk every time it is called.
|
|
311
|
+
def mzs
|
|
312
|
+
@io.pos = @mz_start_index
|
|
313
|
+
b64_string = @io.read(@mz_num_bytes)
|
|
314
|
+
MS::Spectrum.base64_to_array(b64_string, @mz_precision, @mz_network_order)
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
# beware that this converts the information in @intensity_string every time
|
|
318
|
+
# it is called.
|
|
319
|
+
def intensities
|
|
320
|
+
@io.pos = @intensity_start_index
|
|
321
|
+
b64_string = @io.read(@intensity_num_bytes)
|
|
322
|
+
MS::Spectrum.base64_to_array(b64_string, @intensity_precision, @intensity_network_order)
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
def has_mz_data?
|
|
326
|
+
(!@io.closed?) && @mz_start_index && @mz_num_bytes && @mz_precision && !@mz_network_order.nil?
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
def has_intensity_data?
|
|
330
|
+
(!@io.closed?) && @intensity_start_index && @intensity_num_bytes && @intensity_precision && !@intensity_network_order.nil?
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
class MS::Spectrum::LazyIO::Peaks < MS::Spectrum
|
|
336
|
+
include MS::Spectrum::LazyIO
|
|
337
|
+
|
|
338
|
+
undef mzs=
|
|
339
|
+
undef intensities=
|
|
340
|
+
|
|
341
|
+
def initialize(io, start_index, num_bytes, precision, network_order)
|
|
342
|
+
@io = io
|
|
343
|
+
@start_index = start_index
|
|
344
|
+
@num_bytes = num_bytes
|
|
345
|
+
@precision = precision
|
|
346
|
+
@network_order = network_order
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
# returns two arrays: an array of m/z values and an array of intensity
|
|
350
|
+
# values. This is the preferred way to access mzXML file information under
|
|
351
|
+
# lazy evaluation
|
|
352
|
+
def mzs_and_intensities
|
|
353
|
+
@io.pos = @start_index
|
|
354
|
+
b64_string = @io.read(@num_bytes)
|
|
355
|
+
MS::Spectrum.mzs_and_intensities_from_base64_peaks(b64_string, @precision, @network_order)
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
# when using 'io' lazy evaluation on files with m/z and intensity data
|
|
359
|
+
# interwoven (i.e., mzXML) it is more efficient to call 'mzs_and_intensities'
|
|
360
|
+
# if you are using both mz and intensity data.
|
|
361
|
+
def mzs
|
|
362
|
+
# TODO: this can be made slightly faster
|
|
363
|
+
mzs_and_intensities.first
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
# when using 'io' lazy evaluation on files with m/z and intensity data
|
|
367
|
+
# interwoven (i.e., mzXML) it is more efficient to call
|
|
368
|
+
# 'mzs_and_intensities'
|
|
369
|
+
# if you are using both mz and intensity data.
|
|
370
|
+
def intensities
|
|
371
|
+
# TODO: this can be made slightly faster
|
|
372
|
+
mzs_and_intensities.last
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def has_mz_data?
|
|
377
|
+
(!@io.closed?) && @start_index && @num_bytes && @precision && !@network_order.nil?
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
def has_intensity_data?
|
|
381
|
+
(!@io.closed?) && @start_index && @num_bytes && @precision && !@network_order.nil?
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
end
|
data/lib/mspire.rb
CHANGED
data/lib/spec_id/bioworks.rb
CHANGED
|
@@ -5,7 +5,7 @@ require 'xmlparser'
|
|
|
5
5
|
require 'spec_id'
|
|
6
6
|
require 'zlib'
|
|
7
7
|
require 'hash_by'
|
|
8
|
-
require '
|
|
8
|
+
require 'arrayclass'
|
|
9
9
|
require 'fasta'
|
|
10
10
|
|
|
11
11
|
## have to pre-declare some guys
|
|
@@ -377,7 +377,7 @@ class Bioworks::Prot
|
|
|
377
377
|
end
|
|
378
378
|
end
|
|
379
379
|
|
|
380
|
-
Bioworks::Pep =
|
|
380
|
+
Bioworks::Pep = Arrayclass.new( %w(sequence mass deltamass charge xcorr deltacn sp rsp ions count tic prots base_name first_scan last_scan peptide_probability file _num_prots _first_prot aaseq) )
|
|
381
381
|
# 0=sequence 1=mass 2=deltamass 3=charge 4=xcorr 5=deltacn 6=sp 7=rsp 8=ions 9=count 10=tic 11=prots 12=base_name 13=first_scan 14=last_scan 15=peptide_probability 16=file 17=_num_prots 18=_first_prot 19=aaseq
|
|
382
382
|
|
|
383
383
|
class Bioworks::Pep
|
|
@@ -145,7 +145,14 @@ module SpecID
|
|
|
145
145
|
|
|
146
146
|
op.separator ""
|
|
147
147
|
op.separator "OTHER OPTIONS: "
|
|
148
|
-
op.opt(:interactive)
|
|
148
|
+
op.opt(:interactive) do |v|
|
|
149
|
+
opts[:interactive] =
|
|
150
|
+
if v
|
|
151
|
+
v
|
|
152
|
+
else
|
|
153
|
+
true
|
|
154
|
+
end
|
|
155
|
+
end
|
|
149
156
|
op.opt(:interactive_verbose) {|v| opts[:interactive_verbose] = v }
|
|
150
157
|
|
|
151
158
|
op.opt(:top_hit_by) {|v| opts[:top_hit_by] = v.to_sym}
|
|
@@ -123,10 +123,10 @@ module SpecID
|
|
|
123
123
|
end
|
|
124
124
|
postfilter =
|
|
125
125
|
if spec_id_obj.class == SQTGroup or spec_id_obj.class == Proph::PepSummary
|
|
126
|
-
puts 'making background estimates with: top_per_scan'
|
|
126
|
+
#puts 'making background estimates with: top_per_scan'
|
|
127
127
|
:top_per_scan
|
|
128
128
|
else
|
|
129
|
-
puts 'making background estimates with: top_per_aaseq_charge'
|
|
129
|
+
#puts 'making background estimates with: top_per_aaseq_charge'
|
|
130
130
|
:top_per_aaseq_charge
|
|
131
131
|
end
|
|
132
132
|
opts[:validators] = Validator::Cmdline.prepare_validators(opts, !opts[:ties], opts[:interactive], postfilter, spec_id_obj)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
require '
|
|
2
|
+
require 'arrayclass'
|
|
3
3
|
require 'spec_id/sequest/pepxml'
|
|
4
4
|
require 'spec_id/parser/proph'
|
|
5
5
|
|
|
@@ -61,11 +61,10 @@ module Proph
|
|
|
61
61
|
|
|
62
62
|
# this is a SpecID::Pep (by interface: not including stuff yet)
|
|
63
63
|
class PepSummary::Pep < Sequest::PepXML::SearchHit
|
|
64
|
-
|
|
65
64
|
# aaseq is defined in SearchHit
|
|
66
65
|
|
|
67
66
|
%w(probability fval ntt nmc massd prots).each do |guy|
|
|
68
|
-
self.add_member(guy)
|
|
67
|
+
self.add_member(guy)
|
|
69
68
|
end
|
|
70
69
|
|
|
71
70
|
# returns self
|
|
@@ -91,7 +90,7 @@ module Proph
|
|
|
91
90
|
end
|
|
92
91
|
end
|
|
93
92
|
|
|
94
|
-
::Proph::PepSummary::Prot =
|
|
93
|
+
::Proph::PepSummary::Prot = Arrayclass.new(%w(name protein_descr peps))
|
|
95
94
|
|
|
96
95
|
class PepSummary::Prot
|
|
97
96
|
def first_entry ; self[0] end ## name
|
|
@@ -3,7 +3,7 @@ require 'hash_by'
|
|
|
3
3
|
require 'instance_var_set_from_hash'
|
|
4
4
|
require 'axml'
|
|
5
5
|
require 'spec_id'
|
|
6
|
-
require '
|
|
6
|
+
require 'arrayclass'
|
|
7
7
|
|
|
8
8
|
require 'spec_id/parser/proph'
|
|
9
9
|
|
|
@@ -122,7 +122,7 @@ end # Proph
|
|
|
122
122
|
|
|
123
123
|
|
|
124
124
|
|
|
125
|
-
Proph::Prot =
|
|
125
|
+
Proph::Prot = Arrayclass.new(%w(protein_name probability n_indistinguishable_proteins percent_coverage unique_stripped_peptides group_sibling_id total_number_peptides pct_spectrum_ids description peps))
|
|
126
126
|
|
|
127
127
|
# note that 'description' is found in the element 'annotation', attribute 'protein_description'
|
|
128
128
|
# NOTE!: unique_stripped peptides is an array rather than + joined string
|
|
@@ -142,7 +142,7 @@ end
|
|
|
142
142
|
|
|
143
143
|
# this is a pep from a -prot.xml file
|
|
144
144
|
|
|
145
|
-
Proph::Prot::Pep =
|
|
145
|
+
Proph::Prot::Pep = Arrayclass.new(%w(peptide_sequence charge initial_probability nsp_adjusted_probability weight is_nondegenerate_evidence n_enzymatic_termini n_sibling_peptides n_sibling_peptides_bin n_instances is_contributing_evidence calc_neutral_pep_mass modification_info prots))
|
|
146
146
|
|
|
147
147
|
class Proph::Prot::Pep
|
|
148
148
|
include SpecID::Pep
|
|
@@ -428,7 +428,7 @@ class ProteinSummary
|
|
|
428
428
|
op.on("--#{PRECISION_PROGRAM_BASE}", "include output of #{PRECISION_PROGRAM_BASE}.rb,") {|v| opt.precision = v}
|
|
429
429
|
op.separator(" type '#{PRECISION_PROGRAM_BASE}.rb' for details")
|
|
430
430
|
op.separator ""
|
|
431
|
-
op.separator "
|
|
431
|
+
op.separator "specific to ProteinProphet (with no concatenated DB):"
|
|
432
432
|
op.on("-c", "--cutoff percent", "false positive predictive rate (FPPR)% for given cutoff") {|v| opt.c = v }
|
|
433
433
|
op.on("--cut_at percent", "only reports proteins within FPPR %") {|v| opt.cut_at = v }
|
|
434
434
|
op.on("--get_annotation", "retrieves annotation by gi code") {|v| opt.get_annotation = v}
|
|
@@ -1206,7 +1206,7 @@ class Sequest::PepXML::SearchDatabase
|
|
|
1206
1206
|
|
|
1207
1207
|
end
|
|
1208
1208
|
|
|
1209
|
-
Sequest::PepXML::SpectrumQuery =
|
|
1209
|
+
Sequest::PepXML::SpectrumQuery = Arrayclass.new(%w(spectrum start_scan end_scan precursor_neutral_mass index assumed_charge search_results pepxml_version))
|
|
1210
1210
|
|
|
1211
1211
|
class Sequest::PepXML::SpectrumQuery
|
|
1212
1212
|
include SpecIDXML
|
|
@@ -1292,7 +1292,7 @@ class Sequest::PepXML::SpectrumQuery
|
|
|
1292
1292
|
end
|
|
1293
1293
|
|
|
1294
1294
|
|
|
1295
|
-
Sequest::PepXML::SearchHit =
|
|
1295
|
+
Sequest::PepXML::SearchHit = Arrayclass.new( %w( hit_rank peptide peptide_prev_aa peptide_next_aa protein num_tot_proteins num_matched_ions tot_num_ions calc_neutral_pep_mass massdiff num_tol_term num_missed_cleavages is_rejected deltacnstar xcorr deltacn spscore sprank modification_info spectrum_query) )
|
|
1296
1296
|
|
|
1297
1297
|
# 0=hit_rank 1=peptide 2=peptide_prev_aa 3=peptide_next_aa 4=protein 5=num_tot_proteins 6=num_matched_ions 7=tot_num_ions 8=calc_neutral_pep_mass 9=massdiff 10=num_tol_term 11=num_missed_cleavages 12=is_rejected 13=deltacnstar 14=xcorr 15=deltacn 16=spscore 17=sprank 18=modification_info 19=spectrum_query
|
|
1298
1298
|
|
|
@@ -1312,7 +1312,7 @@ class Sequest::PepXML::SearchHit
|
|
|
1312
1312
|
tmp_verb = $VERBOSE
|
|
1313
1313
|
$VERBOSE = nil
|
|
1314
1314
|
def initialize(hash=nil)
|
|
1315
|
-
super(
|
|
1315
|
+
super(self.class.size)
|
|
1316
1316
|
if hash
|
|
1317
1317
|
self[0,20] = [hash[:hit_rank], hash[:peptide], hash[:peptide_prev_aa], hash[:peptide_next_aa], hash[:protein], hash[:num_tot_proteins], hash[:num_matched_ions], hash[:tot_num_ions], hash[:calc_neutral_pep_mass], hash[:massdiff], hash[:num_tol_term], hash[:num_missed_cleavages], hash[:is_rejected], hash[:deltacnstar], hash[:xcorr], hash[:deltacn], hash[:spscore], hash[:sprank], hash[:modification_info], hash[:spectrum_query]]
|
|
1318
1318
|
end
|
|
@@ -1379,7 +1379,7 @@ class Sequest::PepXML::SearchHit
|
|
|
1379
1379
|
end
|
|
1380
1380
|
|
|
1381
1381
|
|
|
1382
|
-
Sequest::PepXML::SearchHit::ModificationInfo =
|
|
1382
|
+
Sequest::PepXML::SearchHit::ModificationInfo = Arrayclass.new(%w(modified_peptide mod_aminoacid_masses mod_nterm_mass mod_cterm_mass))
|
|
1383
1383
|
|
|
1384
1384
|
# Positions and masses of modifications
|
|
1385
1385
|
class Sequest::PepXML::SearchHit::ModificationInfo
|
|
@@ -1455,4 +1455,4 @@ class Sequest::PepXML::SearchHit::ModificationInfo
|
|
|
1455
1455
|
# </modification_info>
|
|
1456
1456
|
end
|
|
1457
1457
|
|
|
1458
|
-
Sequest::PepXML::SearchHit::ModificationInfo::ModAminoacidMass =
|
|
1458
|
+
Sequest::PepXML::SearchHit::ModificationInfo::ModAminoacidMass = Arrayclass.new(%w(position mass))
|