ms-msrun 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ms/spectrum.rb DELETED
@@ -1,373 +0,0 @@
1
- require 'base64'
2
- require 'bsearch'
3
-
4
- require 'ms/spectrum/compare'
5
- require 'ms/spectrum/filter'
6
-
7
- module Ms ; end
8
-
9
- class Ms::Spectrum
10
-
11
- # m/z's
12
- attr_accessor :mzs
13
- # intensities
14
- attr_accessor :intensities
15
-
16
- alias_method :ints, :intensities
17
-
18
- #######################
19
- ## CLASS METHODS:
20
- #######################
21
-
22
- def self.lazy(*args)
23
- Ms::Spectrum::LazyIO.new(*args)
24
- end
25
-
26
- def self.from_peaks(ar_of_doublets)
27
- _mzs = []
28
- _ints = []
29
- ar_of_doublets.each do |mz, int|
30
- _mzs << mz
31
- _ints << int
32
- end
33
- self.new(_mzs, _ints)
34
- end
35
-
36
- def initialize(mz_ar=[], intensity_ar=[])
37
- @mzs = mz_ar
38
- @intensities = intensity_ar
39
- end
40
-
41
- def mzs_and_intensities
42
- [@mzs, @intensities]
43
- end
44
-
45
- def ==(other)
46
- mzs == other.mzs && ints == other.ints
47
- end
48
-
49
- def [](array_index)
50
- [mzs[array_index], intensities[array_index]]
51
- end
52
-
53
- # yields(mz, inten) across the spectrum, or array of doublets if no block
54
- def peaks(&block)
55
- (m, i) = mzs_and_intensities
56
- m.zip(i, &block)
57
- end
58
-
59
- alias_method :each, :peaks
60
- alias_method :each_peak, :peaks
61
-
62
- # uses index function and returns the intensity at that value
63
- def intensity_at_mz(mz)
64
- if x = index(mz)
65
- intensities[x]
66
- else
67
- nil
68
- end
69
- end
70
-
71
- # less_precise should be a float
72
- # precise should be a float
73
- def equal_after_rounding?(precise, less_precise)
74
- # determine the precision of less_precise
75
- exp10 = precision_as_neg_int(less_precise)
76
- #puts "EXP10: #{exp10}"
77
- answ = ((precise*exp10).round == (less_precise*exp10).round)
78
- #puts "TESTING FOR EQUAL: #{precise} #{less_precise}"
79
- #puts answ
80
- (precise*exp10).round == (less_precise*exp10).round
81
- end
82
-
83
-
84
- # returns the index of the first value matching that m/z. the argument m/z
85
- # may be less precise than the actual m/z (rounding to the same precision
86
- # given) but must be at least integer precision (after rounding)
87
- # implemented as binary search (bsearch from the web)
88
- def index(mz)
89
- mz_ar = mzs
90
- return_val = nil
91
- ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
92
- if mz_ar[ind] == mz
93
- return_val = ind
94
- else
95
- # do a rounding game to see which one is it, or nil
96
- # find all the values rounding to the same integer in the locale
97
- # test each one fully in turn
98
- mz = mz.to_f
99
- mz_size = mz_ar.size
100
- if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
101
- return_val = ind
102
- else # run the loop
103
- up = ind
104
- loop do
105
- up += 1
106
- if up >= mz_size
107
- break
108
- end
109
- mz_up = mz_ar[up]
110
- if (mz_up.ceil - mz.ceil >= 2)
111
- break
112
- else
113
- if equal_after_rounding?(mz_up, mz)
114
- return_val = up
115
- return return_val
116
- end
117
- end
118
- end
119
- dn= ind
120
- loop do
121
- dn -= 1
122
- if dn < 0
123
- break
124
- end
125
- mz_dn = mz_ar[dn]
126
- if (mz.floor - mz_dn.floor >= 2)
127
- break
128
- else
129
- if equal_after_rounding?(mz_dn, mz)
130
- return_val = dn
131
- return return_val
132
- end
133
- end
134
- end
135
- end
136
- end
137
- return_val
138
- end
139
-
140
- # returns 1 for ones place, 10 for tenths, 100 for hundredths
141
- # to a precision exceeding 1e-6
142
- def precision_as_neg_int(float) # :nodoc:
143
- neg_exp10 = 1
144
- loop do
145
- over = float * neg_exp10
146
- rounded = over.round
147
- if (over - rounded).abs <= 1e-6
148
- break
149
- end
150
- neg_exp10 *= 10
151
- end
152
- neg_exp10
153
- end
154
-
155
-
156
- end
157
-
158
- module Ms::Spectrum::LazyIO
159
-
160
- # Saves the spectrum after reading it from disk (default=false). [Set to
161
- # true if you want to do a few operations on a spectrum and don't want to
162
- # re-read from disk each time. Use Spectrum#flush! when you think you are
163
- # done with it.]
164
- attr_accessor :save
165
-
166
- # sets save to true and returns the spectrum object for chaining commands
167
- def save!
168
- save = true
169
- self
170
- end
171
-
172
- def self.new(*args)
173
- if args.size == 5 # mzXMl
174
- Ms::Spectrum::LazyIO::Peaks.new(*args)
175
- elsif args.size == 9 # other
176
- Ms::Spectrum::LazyIO::Pair.new(*args)
177
- else
178
- raise RunTimeError, "must give 5 or 7 args for peak data and pair data respectively"
179
- end
180
- end
181
-
182
- end
183
-
184
- # stores an io object and the start and end indices and only evaluates the
185
- # spectrum when information is requested
186
- class Ms::Spectrum::LazyIO::Pair < Ms::Spectrum
187
- include Ms::Spectrum::LazyIO
188
-
189
- undef mzs=
190
- undef intensities=
191
-
192
- def initialize(io, mz_start_index, mz_num_bytes, mz_precision, mz_network_order, intensity_start_index, intensity_num_bytes, intensity_precision, intensity_network_order)
193
- @save = false
194
- @mzs = nil
195
- @intensities = nil
196
- @io = io
197
-
198
- @mz_start_index = mz_start_index
199
- @mz_num_bytes = mz_num_bytes
200
- @mz_precision = mz_precision
201
- @mz_network_order = mz_network_order
202
-
203
- @intensity_start_index = intensity_start_index
204
- @intensity_num_bytes = intensity_num_bytes
205
- @intensity_precision = intensity_precision
206
- @intensity_network_order = intensity_network_order
207
-
208
- end
209
-
210
- # beware that this converts the information on disk every time it is called.
211
- def mzs
212
- return @mzs if @mzs
213
- @io.pos = @mz_start_index
214
- b64_string = @io.read(@mz_num_bytes)
215
- mzs_ar = Ms::Spectrum.base64_to_array(b64_string, @mz_precision, @mz_network_order)
216
- if save
217
- @mzs = mzs_ar
218
- else
219
- mzs_ar
220
- end
221
- end
222
-
223
- def flush!
224
- @mzs = nil
225
- @intensities = nil
226
- end
227
-
228
- # beware that this converts the information in @intensity_string every time
229
- # it is called.
230
- def intensities
231
- return @intensities if @intensities
232
- @io.pos = @intensity_start_index
233
- b64_string = @io.read(@intensity_num_bytes)
234
- inten_ar = Ms::Spectrum.base64_to_array(b64_string, @intensity_precision, @intensity_network_order)
235
- if save
236
- @intensities = inten_ar
237
- else
238
- inten_ar
239
- end
240
- end
241
-
242
- end
243
-
244
- class Ms::Spectrum::LazyIO::Peaks < Ms::Spectrum
245
- include Ms::Spectrum::LazyIO
246
-
247
- undef mzs=
248
- undef intensities=
249
-
250
- def initialize(io, start_index, num_bytes, precision, network_order)
251
- @data = nil
252
- @io = io
253
- @start_index = start_index
254
- @num_bytes = num_bytes
255
- @precision = precision
256
- @network_order = network_order
257
- end
258
-
259
- # removes any stored data
260
- def flush!
261
- @data = nil
262
- end
263
-
264
- # returns an array of alternating values: [mz, intensity, mz, intensity]
265
- def flat_peaks
266
- @io.pos = @start_index
267
- Ms::Spectrum.base64_to_array(@io.read(@num_bytes), @precision, @network_order)
268
- end
269
-
270
- # returns two arrays: an array of m/z values and an array of intensity
271
- # values. This is the preferred way to access mzXML file information under
272
- # lazy evaluation
273
- def mzs_and_intensities
274
- return @data if @data
275
- @io.pos = @start_index
276
- b64_string = @io.read(@num_bytes)
277
- data = Ms::Spectrum.mzs_and_intensities_from_base64_peaks(b64_string, @precision, @network_order)
278
- if save
279
- @data = data
280
- else
281
- data
282
- end
283
- end
284
-
285
- # when using 'io' lazy evaluation on files with m/z and intensity data
286
- # interwoven (i.e., mzXML) it is more efficient to call 'mzs_and_intensities'
287
- # if you are using both mz and intensity data.
288
- def mzs
289
- return @data.first if @data
290
- data = mzs_and_intensities
291
- if save
292
- @data = data
293
- @data.first
294
- else
295
- data.first
296
- end
297
- # TODO: this can be made slightly faster
298
- end
299
-
300
- # when using 'io' lazy evaluation on files with m/z and intensity data
301
- # interwoven (i.e., mzXML) it is more efficient to call
302
- # 'mzs_and_intensities'
303
- # if you are using both mz and intensity data.
304
- def intensities(save=false)
305
- return @data.last if @data
306
- data = mzs_and_intensities
307
- if save
308
- @data = data
309
- @data.last
310
- else
311
- data.last
312
- end
313
- # TODO: this can be made slightly faster
314
- end
315
-
316
- end
317
-
318
-
319
- module Ms::Spectrum::Utils
320
-
321
- Unpack_network_float = 'g*'
322
- Unpack_network_double = 'G*'
323
- Unpack_little_endian_float = 'e*'
324
- Unpack_little_endian_double = 'E*'
325
-
326
- # an already decoded string (ready to be unpacked as floating point numbers)
327
- def string_to_array(string, precision=32, network_order=true)
328
- unpack_code =
329
- if network_order
330
- if precision == 32
331
- Unpack_network_float
332
- elsif precision == 64
333
- Unpack_network_double
334
- end
335
- else ## little endian
336
- if precision == 32
337
- Unpack_little_endian_float
338
- elsif precision == 64
339
- Unpack_little_endian_double
340
- end
341
- end
342
- string.unpack(unpack_code)
343
- end
344
-
345
- # takes a base64 string and returns an array
346
- def base64_to_array(b64_string, precision=32, network_order=true)
347
- self.string_to_array(Base64.decode64(b64_string), precision, network_order)
348
- end
349
-
350
- def mzs_and_intensities_from_base64_peaks(b64_string, precision=32, network_order=true)
351
- data = base64_to_array(b64_string, precision, network_order)
352
- sz = data.size/2
353
- mz_ar = Array.new(sz)
354
- intensity_ar = Array.new(sz)
355
- ndata = []
356
- my_ind = 0
357
- data.each_with_index do |dat,ind|
358
- if (ind % 2) == 0 # even
359
- mz_ar[my_ind] = dat
360
- else
361
- intensity_ar[my_ind] = dat
362
- my_ind += 1
363
- end
364
- end
365
- [mz_ar, intensity_ar]
366
- end
367
- end
368
-
369
- class Ms::Spectrum
370
- extend Utils
371
- end
372
-
373
-