ms-msrun 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +3 -4
- data/Rakefile +20 -5
- data/bin/base64_to_array.rb +3 -4
- data/bin/ms_to_obiwarp.rb +6 -15
- data/bin/ms_to_search.rb +15 -22
- data/lib/lmat.rb +47 -53
- data/lib/ms/msrun.rb +98 -108
- data/lib/ms/msrun/axml/mzxml.rb +6 -8
- data/lib/ms/msrun/index.rb +130 -0
- data/lib/ms/msrun/mzxml.rb +12 -0
- data/lib/ms/msrun/nokogiri.rb +12 -0
- data/lib/ms/msrun/nokogiri/mzxml.rb +168 -0
- data/lib/ms/msrun/regexp/mzxml.rb +126 -0
- data/lib/ms/msrun/search.rb +25 -21
- data/lib/ms/msrun/sha1.rb +36 -0
- data/lib/ms/mzxml.rb +12 -0
- data/lib/ms/precursor.rb +3 -2
- data/lib/ms/precursor/lazy_parent.rb +28 -0
- data/lib/ms/scan.rb +2 -29
- data/lib/ms/spectrum/compare.rb +42 -2
- data/lib/ms/spectrum/filter.rb +1 -1
- data/spec/ms/msrun/index_spec.rb +60 -0
- data/spec/ms/msrun/scan_spec.rb +78 -0
- data/spec/ms/msrun/search_spec.rb +6 -7
- data/spec/ms/msrun/sha1_spec.rb +23 -0
- data/spec/ms/msrun_spec.rb +111 -3
- data/spec/ms/scan_spec.rb +2 -2
- data/spec/ms/spectrum/compare_spec.rb +13 -6
- data/spec/ms/spectrum/filter_spec.rb +3 -3
- metadata +42 -21
- data/lib/bsearch.rb +0 -120
- data/lib/ms/spectrum.rb +0 -373
data/lib/ms/spectrum.rb
DELETED
@@ -1,373 +0,0 @@
|
|
1
|
-
require 'base64'
|
2
|
-
require 'bsearch'
|
3
|
-
|
4
|
-
require 'ms/spectrum/compare'
|
5
|
-
require 'ms/spectrum/filter'
|
6
|
-
|
7
|
-
module Ms ; end
|
8
|
-
|
9
|
-
class Ms::Spectrum
|
10
|
-
|
11
|
-
# m/z's
|
12
|
-
attr_accessor :mzs
|
13
|
-
# intensities
|
14
|
-
attr_accessor :intensities
|
15
|
-
|
16
|
-
alias_method :ints, :intensities
|
17
|
-
|
18
|
-
#######################
|
19
|
-
## CLASS METHODS:
|
20
|
-
#######################
|
21
|
-
|
22
|
-
def self.lazy(*args)
|
23
|
-
Ms::Spectrum::LazyIO.new(*args)
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.from_peaks(ar_of_doublets)
|
27
|
-
_mzs = []
|
28
|
-
_ints = []
|
29
|
-
ar_of_doublets.each do |mz, int|
|
30
|
-
_mzs << mz
|
31
|
-
_ints << int
|
32
|
-
end
|
33
|
-
self.new(_mzs, _ints)
|
34
|
-
end
|
35
|
-
|
36
|
-
def initialize(mz_ar=[], intensity_ar=[])
|
37
|
-
@mzs = mz_ar
|
38
|
-
@intensities = intensity_ar
|
39
|
-
end
|
40
|
-
|
41
|
-
def mzs_and_intensities
|
42
|
-
[@mzs, @intensities]
|
43
|
-
end
|
44
|
-
|
45
|
-
def ==(other)
|
46
|
-
mzs == other.mzs && ints == other.ints
|
47
|
-
end
|
48
|
-
|
49
|
-
def [](array_index)
|
50
|
-
[mzs[array_index], intensities[array_index]]
|
51
|
-
end
|
52
|
-
|
53
|
-
# yields(mz, inten) across the spectrum, or array of doublets if no block
|
54
|
-
def peaks(&block)
|
55
|
-
(m, i) = mzs_and_intensities
|
56
|
-
m.zip(i, &block)
|
57
|
-
end
|
58
|
-
|
59
|
-
alias_method :each, :peaks
|
60
|
-
alias_method :each_peak, :peaks
|
61
|
-
|
62
|
-
# uses index function and returns the intensity at that value
|
63
|
-
def intensity_at_mz(mz)
|
64
|
-
if x = index(mz)
|
65
|
-
intensities[x]
|
66
|
-
else
|
67
|
-
nil
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
# less_precise should be a float
|
72
|
-
# precise should be a float
|
73
|
-
def equal_after_rounding?(precise, less_precise)
|
74
|
-
# determine the precision of less_precise
|
75
|
-
exp10 = precision_as_neg_int(less_precise)
|
76
|
-
#puts "EXP10: #{exp10}"
|
77
|
-
answ = ((precise*exp10).round == (less_precise*exp10).round)
|
78
|
-
#puts "TESTING FOR EQUAL: #{precise} #{less_precise}"
|
79
|
-
#puts answ
|
80
|
-
(precise*exp10).round == (less_precise*exp10).round
|
81
|
-
end
|
82
|
-
|
83
|
-
|
84
|
-
# returns the index of the first value matching that m/z. the argument m/z
|
85
|
-
# may be less precise than the actual m/z (rounding to the same precision
|
86
|
-
# given) but must be at least integer precision (after rounding)
|
87
|
-
# implemented as binary search (bsearch from the web)
|
88
|
-
def index(mz)
|
89
|
-
mz_ar = mzs
|
90
|
-
return_val = nil
|
91
|
-
ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
|
92
|
-
if mz_ar[ind] == mz
|
93
|
-
return_val = ind
|
94
|
-
else
|
95
|
-
# do a rounding game to see which one is it, or nil
|
96
|
-
# find all the values rounding to the same integer in the locale
|
97
|
-
# test each one fully in turn
|
98
|
-
mz = mz.to_f
|
99
|
-
mz_size = mz_ar.size
|
100
|
-
if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
|
101
|
-
return_val = ind
|
102
|
-
else # run the loop
|
103
|
-
up = ind
|
104
|
-
loop do
|
105
|
-
up += 1
|
106
|
-
if up >= mz_size
|
107
|
-
break
|
108
|
-
end
|
109
|
-
mz_up = mz_ar[up]
|
110
|
-
if (mz_up.ceil - mz.ceil >= 2)
|
111
|
-
break
|
112
|
-
else
|
113
|
-
if equal_after_rounding?(mz_up, mz)
|
114
|
-
return_val = up
|
115
|
-
return return_val
|
116
|
-
end
|
117
|
-
end
|
118
|
-
end
|
119
|
-
dn= ind
|
120
|
-
loop do
|
121
|
-
dn -= 1
|
122
|
-
if dn < 0
|
123
|
-
break
|
124
|
-
end
|
125
|
-
mz_dn = mz_ar[dn]
|
126
|
-
if (mz.floor - mz_dn.floor >= 2)
|
127
|
-
break
|
128
|
-
else
|
129
|
-
if equal_after_rounding?(mz_dn, mz)
|
130
|
-
return_val = dn
|
131
|
-
return return_val
|
132
|
-
end
|
133
|
-
end
|
134
|
-
end
|
135
|
-
end
|
136
|
-
end
|
137
|
-
return_val
|
138
|
-
end
|
139
|
-
|
140
|
-
# returns 1 for ones place, 10 for tenths, 100 for hundredths
|
141
|
-
# to a precision exceeding 1e-6
|
142
|
-
def precision_as_neg_int(float) # :nodoc:
|
143
|
-
neg_exp10 = 1
|
144
|
-
loop do
|
145
|
-
over = float * neg_exp10
|
146
|
-
rounded = over.round
|
147
|
-
if (over - rounded).abs <= 1e-6
|
148
|
-
break
|
149
|
-
end
|
150
|
-
neg_exp10 *= 10
|
151
|
-
end
|
152
|
-
neg_exp10
|
153
|
-
end
|
154
|
-
|
155
|
-
|
156
|
-
end
|
157
|
-
|
158
|
-
module Ms::Spectrum::LazyIO
|
159
|
-
|
160
|
-
# Saves the spectrum after reading it from disk (default=false). [Set to
|
161
|
-
# true if you want to do a few operations on a spectrum and don't want to
|
162
|
-
# re-read from disk each time. Use Spectrum#flush! when you think you are
|
163
|
-
# done with it.]
|
164
|
-
attr_accessor :save
|
165
|
-
|
166
|
-
# sets save to true and returns the spectrum object for chaining commands
|
167
|
-
def save!
|
168
|
-
save = true
|
169
|
-
self
|
170
|
-
end
|
171
|
-
|
172
|
-
def self.new(*args)
|
173
|
-
if args.size == 5 # mzXMl
|
174
|
-
Ms::Spectrum::LazyIO::Peaks.new(*args)
|
175
|
-
elsif args.size == 9 # other
|
176
|
-
Ms::Spectrum::LazyIO::Pair.new(*args)
|
177
|
-
else
|
178
|
-
raise RunTimeError, "must give 5 or 7 args for peak data and pair data respectively"
|
179
|
-
end
|
180
|
-
end
|
181
|
-
|
182
|
-
end
|
183
|
-
|
184
|
-
# stores an io object and the start and end indices and only evaluates the
|
185
|
-
# spectrum when information is requested
|
186
|
-
class Ms::Spectrum::LazyIO::Pair < Ms::Spectrum
|
187
|
-
include Ms::Spectrum::LazyIO
|
188
|
-
|
189
|
-
undef mzs=
|
190
|
-
undef intensities=
|
191
|
-
|
192
|
-
def initialize(io, mz_start_index, mz_num_bytes, mz_precision, mz_network_order, intensity_start_index, intensity_num_bytes, intensity_precision, intensity_network_order)
|
193
|
-
@save = false
|
194
|
-
@mzs = nil
|
195
|
-
@intensities = nil
|
196
|
-
@io = io
|
197
|
-
|
198
|
-
@mz_start_index = mz_start_index
|
199
|
-
@mz_num_bytes = mz_num_bytes
|
200
|
-
@mz_precision = mz_precision
|
201
|
-
@mz_network_order = mz_network_order
|
202
|
-
|
203
|
-
@intensity_start_index = intensity_start_index
|
204
|
-
@intensity_num_bytes = intensity_num_bytes
|
205
|
-
@intensity_precision = intensity_precision
|
206
|
-
@intensity_network_order = intensity_network_order
|
207
|
-
|
208
|
-
end
|
209
|
-
|
210
|
-
# beware that this converts the information on disk every time it is called.
|
211
|
-
def mzs
|
212
|
-
return @mzs if @mzs
|
213
|
-
@io.pos = @mz_start_index
|
214
|
-
b64_string = @io.read(@mz_num_bytes)
|
215
|
-
mzs_ar = Ms::Spectrum.base64_to_array(b64_string, @mz_precision, @mz_network_order)
|
216
|
-
if save
|
217
|
-
@mzs = mzs_ar
|
218
|
-
else
|
219
|
-
mzs_ar
|
220
|
-
end
|
221
|
-
end
|
222
|
-
|
223
|
-
def flush!
|
224
|
-
@mzs = nil
|
225
|
-
@intensities = nil
|
226
|
-
end
|
227
|
-
|
228
|
-
# beware that this converts the information in @intensity_string every time
|
229
|
-
# it is called.
|
230
|
-
def intensities
|
231
|
-
return @intensities if @intensities
|
232
|
-
@io.pos = @intensity_start_index
|
233
|
-
b64_string = @io.read(@intensity_num_bytes)
|
234
|
-
inten_ar = Ms::Spectrum.base64_to_array(b64_string, @intensity_precision, @intensity_network_order)
|
235
|
-
if save
|
236
|
-
@intensities = inten_ar
|
237
|
-
else
|
238
|
-
inten_ar
|
239
|
-
end
|
240
|
-
end
|
241
|
-
|
242
|
-
end
|
243
|
-
|
244
|
-
class Ms::Spectrum::LazyIO::Peaks < Ms::Spectrum
|
245
|
-
include Ms::Spectrum::LazyIO
|
246
|
-
|
247
|
-
undef mzs=
|
248
|
-
undef intensities=
|
249
|
-
|
250
|
-
def initialize(io, start_index, num_bytes, precision, network_order)
|
251
|
-
@data = nil
|
252
|
-
@io = io
|
253
|
-
@start_index = start_index
|
254
|
-
@num_bytes = num_bytes
|
255
|
-
@precision = precision
|
256
|
-
@network_order = network_order
|
257
|
-
end
|
258
|
-
|
259
|
-
# removes any stored data
|
260
|
-
def flush!
|
261
|
-
@data = nil
|
262
|
-
end
|
263
|
-
|
264
|
-
# returns an array of alternating values: [mz, intensity, mz, intensity]
|
265
|
-
def flat_peaks
|
266
|
-
@io.pos = @start_index
|
267
|
-
Ms::Spectrum.base64_to_array(@io.read(@num_bytes), @precision, @network_order)
|
268
|
-
end
|
269
|
-
|
270
|
-
# returns two arrays: an array of m/z values and an array of intensity
|
271
|
-
# values. This is the preferred way to access mzXML file information under
|
272
|
-
# lazy evaluation
|
273
|
-
def mzs_and_intensities
|
274
|
-
return @data if @data
|
275
|
-
@io.pos = @start_index
|
276
|
-
b64_string = @io.read(@num_bytes)
|
277
|
-
data = Ms::Spectrum.mzs_and_intensities_from_base64_peaks(b64_string, @precision, @network_order)
|
278
|
-
if save
|
279
|
-
@data = data
|
280
|
-
else
|
281
|
-
data
|
282
|
-
end
|
283
|
-
end
|
284
|
-
|
285
|
-
# when using 'io' lazy evaluation on files with m/z and intensity data
|
286
|
-
# interwoven (i.e., mzXML) it is more efficient to call 'mzs_and_intensities'
|
287
|
-
# if you are using both mz and intensity data.
|
288
|
-
def mzs
|
289
|
-
return @data.first if @data
|
290
|
-
data = mzs_and_intensities
|
291
|
-
if save
|
292
|
-
@data = data
|
293
|
-
@data.first
|
294
|
-
else
|
295
|
-
data.first
|
296
|
-
end
|
297
|
-
# TODO: this can be made slightly faster
|
298
|
-
end
|
299
|
-
|
300
|
-
# when using 'io' lazy evaluation on files with m/z and intensity data
|
301
|
-
# interwoven (i.e., mzXML) it is more efficient to call
|
302
|
-
# 'mzs_and_intensities'
|
303
|
-
# if you are using both mz and intensity data.
|
304
|
-
def intensities(save=false)
|
305
|
-
return @data.last if @data
|
306
|
-
data = mzs_and_intensities
|
307
|
-
if save
|
308
|
-
@data = data
|
309
|
-
@data.last
|
310
|
-
else
|
311
|
-
data.last
|
312
|
-
end
|
313
|
-
# TODO: this can be made slightly faster
|
314
|
-
end
|
315
|
-
|
316
|
-
end
|
317
|
-
|
318
|
-
|
319
|
-
module Ms::Spectrum::Utils
|
320
|
-
|
321
|
-
Unpack_network_float = 'g*'
|
322
|
-
Unpack_network_double = 'G*'
|
323
|
-
Unpack_little_endian_float = 'e*'
|
324
|
-
Unpack_little_endian_double = 'E*'
|
325
|
-
|
326
|
-
# an already decoded string (ready to be unpacked as floating point numbers)
|
327
|
-
def string_to_array(string, precision=32, network_order=true)
|
328
|
-
unpack_code =
|
329
|
-
if network_order
|
330
|
-
if precision == 32
|
331
|
-
Unpack_network_float
|
332
|
-
elsif precision == 64
|
333
|
-
Unpack_network_double
|
334
|
-
end
|
335
|
-
else ## little endian
|
336
|
-
if precision == 32
|
337
|
-
Unpack_little_endian_float
|
338
|
-
elsif precision == 64
|
339
|
-
Unpack_little_endian_double
|
340
|
-
end
|
341
|
-
end
|
342
|
-
string.unpack(unpack_code)
|
343
|
-
end
|
344
|
-
|
345
|
-
# takes a base64 string and returns an array
|
346
|
-
def base64_to_array(b64_string, precision=32, network_order=true)
|
347
|
-
self.string_to_array(Base64.decode64(b64_string), precision, network_order)
|
348
|
-
end
|
349
|
-
|
350
|
-
def mzs_and_intensities_from_base64_peaks(b64_string, precision=32, network_order=true)
|
351
|
-
data = base64_to_array(b64_string, precision, network_order)
|
352
|
-
sz = data.size/2
|
353
|
-
mz_ar = Array.new(sz)
|
354
|
-
intensity_ar = Array.new(sz)
|
355
|
-
ndata = []
|
356
|
-
my_ind = 0
|
357
|
-
data.each_with_index do |dat,ind|
|
358
|
-
if (ind % 2) == 0 # even
|
359
|
-
mz_ar[my_ind] = dat
|
360
|
-
else
|
361
|
-
intensity_ar[my_ind] = dat
|
362
|
-
my_ind += 1
|
363
|
-
end
|
364
|
-
end
|
365
|
-
[mz_ar, intensity_ar]
|
366
|
-
end
|
367
|
-
end
|
368
|
-
|
369
|
-
class Ms::Spectrum
|
370
|
-
extend Utils
|
371
|
-
end
|
372
|
-
|
373
|
-
|