ms-msrun 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ms/spectrum.rb DELETED
@@ -1,373 +0,0 @@
1
- require 'base64'
2
- require 'bsearch'
3
-
4
- require 'ms/spectrum/compare'
5
- require 'ms/spectrum/filter'
6
-
7
- module Ms ; end
8
-
9
- class Ms::Spectrum
10
-
11
- # m/z's
12
- attr_accessor :mzs
13
- # intensities
14
- attr_accessor :intensities
15
-
16
- alias_method :ints, :intensities
17
-
18
- #######################
19
- ## CLASS METHODS:
20
- #######################
21
-
22
- def self.lazy(*args)
23
- Ms::Spectrum::LazyIO.new(*args)
24
- end
25
-
26
- def self.from_peaks(ar_of_doublets)
27
- _mzs = []
28
- _ints = []
29
- ar_of_doublets.each do |mz, int|
30
- _mzs << mz
31
- _ints << int
32
- end
33
- self.new(_mzs, _ints)
34
- end
35
-
36
- def initialize(mz_ar=[], intensity_ar=[])
37
- @mzs = mz_ar
38
- @intensities = intensity_ar
39
- end
40
-
41
- def mzs_and_intensities
42
- [@mzs, @intensities]
43
- end
44
-
45
- def ==(other)
46
- mzs == other.mzs && ints == other.ints
47
- end
48
-
49
- def [](array_index)
50
- [mzs[array_index], intensities[array_index]]
51
- end
52
-
53
- # yields(mz, inten) across the spectrum, or array of doublets if no block
54
- def peaks(&block)
55
- (m, i) = mzs_and_intensities
56
- m.zip(i, &block)
57
- end
58
-
59
- alias_method :each, :peaks
60
- alias_method :each_peak, :peaks
61
-
62
- # uses index function and returns the intensity at that value
63
- def intensity_at_mz(mz)
64
- if x = index(mz)
65
- intensities[x]
66
- else
67
- nil
68
- end
69
- end
70
-
71
- # less_precise should be a float
72
- # precise should be a float
73
- def equal_after_rounding?(precise, less_precise)
74
- # determine the precision of less_precise
75
- exp10 = precision_as_neg_int(less_precise)
76
- #puts "EXP10: #{exp10}"
77
- answ = ((precise*exp10).round == (less_precise*exp10).round)
78
- #puts "TESTING FOR EQUAL: #{precise} #{less_precise}"
79
- #puts answ
80
- (precise*exp10).round == (less_precise*exp10).round
81
- end
82
-
83
-
84
- # returns the index of the first value matching that m/z. the argument m/z
85
- # may be less precise than the actual m/z (rounding to the same precision
86
- # given) but must be at least integer precision (after rounding)
87
- # implemented as binary search (bsearch from the web)
88
- def index(mz)
89
- mz_ar = mzs
90
- return_val = nil
91
- ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
92
- if mz_ar[ind] == mz
93
- return_val = ind
94
- else
95
- # do a rounding game to see which one is it, or nil
96
- # find all the values rounding to the same integer in the locale
97
- # test each one fully in turn
98
- mz = mz.to_f
99
- mz_size = mz_ar.size
100
- if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
101
- return_val = ind
102
- else # run the loop
103
- up = ind
104
- loop do
105
- up += 1
106
- if up >= mz_size
107
- break
108
- end
109
- mz_up = mz_ar[up]
110
- if (mz_up.ceil - mz.ceil >= 2)
111
- break
112
- else
113
- if equal_after_rounding?(mz_up, mz)
114
- return_val = up
115
- return return_val
116
- end
117
- end
118
- end
119
- dn= ind
120
- loop do
121
- dn -= 1
122
- if dn < 0
123
- break
124
- end
125
- mz_dn = mz_ar[dn]
126
- if (mz.floor - mz_dn.floor >= 2)
127
- break
128
- else
129
- if equal_after_rounding?(mz_dn, mz)
130
- return_val = dn
131
- return return_val
132
- end
133
- end
134
- end
135
- end
136
- end
137
- return_val
138
- end
139
-
140
- # returns 1 for ones place, 10 for tenths, 100 for hundredths
141
- # to a precision exceeding 1e-6
142
- def precision_as_neg_int(float) # :nodoc:
143
- neg_exp10 = 1
144
- loop do
145
- over = float * neg_exp10
146
- rounded = over.round
147
- if (over - rounded).abs <= 1e-6
148
- break
149
- end
150
- neg_exp10 *= 10
151
- end
152
- neg_exp10
153
- end
154
-
155
-
156
- end
157
-
158
- module Ms::Spectrum::LazyIO
159
-
160
- # Saves the spectrum after reading it from disk (default=false). [Set to
161
- # true if you want to do a few operations on a spectrum and don't want to
162
- # re-read from disk each time. Use Spectrum#flush! when you think you are
163
- # done with it.]
164
- attr_accessor :save
165
-
166
- # sets save to true and returns the spectrum object for chaining commands
167
- def save!
168
- save = true
169
- self
170
- end
171
-
172
- def self.new(*args)
173
- if args.size == 5 # mzXMl
174
- Ms::Spectrum::LazyIO::Peaks.new(*args)
175
- elsif args.size == 9 # other
176
- Ms::Spectrum::LazyIO::Pair.new(*args)
177
- else
178
- raise RunTimeError, "must give 5 or 7 args for peak data and pair data respectively"
179
- end
180
- end
181
-
182
- end
183
-
184
- # stores an io object and the start and end indices and only evaluates the
185
- # spectrum when information is requested
186
- class Ms::Spectrum::LazyIO::Pair < Ms::Spectrum
187
- include Ms::Spectrum::LazyIO
188
-
189
- undef mzs=
190
- undef intensities=
191
-
192
- def initialize(io, mz_start_index, mz_num_bytes, mz_precision, mz_network_order, intensity_start_index, intensity_num_bytes, intensity_precision, intensity_network_order)
193
- @save = false
194
- @mzs = nil
195
- @intensities = nil
196
- @io = io
197
-
198
- @mz_start_index = mz_start_index
199
- @mz_num_bytes = mz_num_bytes
200
- @mz_precision = mz_precision
201
- @mz_network_order = mz_network_order
202
-
203
- @intensity_start_index = intensity_start_index
204
- @intensity_num_bytes = intensity_num_bytes
205
- @intensity_precision = intensity_precision
206
- @intensity_network_order = intensity_network_order
207
-
208
- end
209
-
210
- # beware that this converts the information on disk every time it is called.
211
- def mzs
212
- return @mzs if @mzs
213
- @io.pos = @mz_start_index
214
- b64_string = @io.read(@mz_num_bytes)
215
- mzs_ar = Ms::Spectrum.base64_to_array(b64_string, @mz_precision, @mz_network_order)
216
- if save
217
- @mzs = mzs_ar
218
- else
219
- mzs_ar
220
- end
221
- end
222
-
223
- def flush!
224
- @mzs = nil
225
- @intensities = nil
226
- end
227
-
228
- # beware that this converts the information in @intensity_string every time
229
- # it is called.
230
- def intensities
231
- return @intensities if @intensities
232
- @io.pos = @intensity_start_index
233
- b64_string = @io.read(@intensity_num_bytes)
234
- inten_ar = Ms::Spectrum.base64_to_array(b64_string, @intensity_precision, @intensity_network_order)
235
- if save
236
- @intensities = inten_ar
237
- else
238
- inten_ar
239
- end
240
- end
241
-
242
- end
243
-
244
- class Ms::Spectrum::LazyIO::Peaks < Ms::Spectrum
245
- include Ms::Spectrum::LazyIO
246
-
247
- undef mzs=
248
- undef intensities=
249
-
250
- def initialize(io, start_index, num_bytes, precision, network_order)
251
- @data = nil
252
- @io = io
253
- @start_index = start_index
254
- @num_bytes = num_bytes
255
- @precision = precision
256
- @network_order = network_order
257
- end
258
-
259
- # removes any stored data
260
- def flush!
261
- @data = nil
262
- end
263
-
264
- # returns an array of alternating values: [mz, intensity, mz, intensity]
265
- def flat_peaks
266
- @io.pos = @start_index
267
- Ms::Spectrum.base64_to_array(@io.read(@num_bytes), @precision, @network_order)
268
- end
269
-
270
- # returns two arrays: an array of m/z values and an array of intensity
271
- # values. This is the preferred way to access mzXML file information under
272
- # lazy evaluation
273
- def mzs_and_intensities
274
- return @data if @data
275
- @io.pos = @start_index
276
- b64_string = @io.read(@num_bytes)
277
- data = Ms::Spectrum.mzs_and_intensities_from_base64_peaks(b64_string, @precision, @network_order)
278
- if save
279
- @data = data
280
- else
281
- data
282
- end
283
- end
284
-
285
- # when using 'io' lazy evaluation on files with m/z and intensity data
286
- # interwoven (i.e., mzXML) it is more efficient to call 'mzs_and_intensities'
287
- # if you are using both mz and intensity data.
288
- def mzs
289
- return @data.first if @data
290
- data = mzs_and_intensities
291
- if save
292
- @data = data
293
- @data.first
294
- else
295
- data.first
296
- end
297
- # TODO: this can be made slightly faster
298
- end
299
-
300
- # when using 'io' lazy evaluation on files with m/z and intensity data
301
- # interwoven (i.e., mzXML) it is more efficient to call
302
- # 'mzs_and_intensities'
303
- # if you are using both mz and intensity data.
304
- def intensities(save=false)
305
- return @data.last if @data
306
- data = mzs_and_intensities
307
- if save
308
- @data = data
309
- @data.last
310
- else
311
- data.last
312
- end
313
- # TODO: this can be made slightly faster
314
- end
315
-
316
- end
317
-
318
-
319
- module Ms::Spectrum::Utils
320
-
321
- Unpack_network_float = 'g*'
322
- Unpack_network_double = 'G*'
323
- Unpack_little_endian_float = 'e*'
324
- Unpack_little_endian_double = 'E*'
325
-
326
- # an already decoded string (ready to be unpacked as floating point numbers)
327
- def string_to_array(string, precision=32, network_order=true)
328
- unpack_code =
329
- if network_order
330
- if precision == 32
331
- Unpack_network_float
332
- elsif precision == 64
333
- Unpack_network_double
334
- end
335
- else ## little endian
336
- if precision == 32
337
- Unpack_little_endian_float
338
- elsif precision == 64
339
- Unpack_little_endian_double
340
- end
341
- end
342
- string.unpack(unpack_code)
343
- end
344
-
345
- # takes a base64 string and returns an array
346
- def base64_to_array(b64_string, precision=32, network_order=true)
347
- self.string_to_array(Base64.decode64(b64_string), precision, network_order)
348
- end
349
-
350
- def mzs_and_intensities_from_base64_peaks(b64_string, precision=32, network_order=true)
351
- data = base64_to_array(b64_string, precision, network_order)
352
- sz = data.size/2
353
- mz_ar = Array.new(sz)
354
- intensity_ar = Array.new(sz)
355
- ndata = []
356
- my_ind = 0
357
- data.each_with_index do |dat,ind|
358
- if (ind % 2) == 0 # even
359
- mz_ar[my_ind] = dat
360
- else
361
- intensity_ar[my_ind] = dat
362
- my_ind += 1
363
- end
364
- end
365
- [mz_ar, intensity_ar]
366
- end
367
- end
368
-
369
- class Ms::Spectrum
370
- extend Utils
371
- end
372
-
373
-