ms-msrun 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,27 @@
1
+
2
+ module Ms
3
+ # charge_states are the possible charge states of the precursor
4
+ # parent references a scan
5
+ PrecursorAtts = [:mz, :intensity, :parent, :charge_states]
6
+ end
7
+
8
+ Ms::Precursor = Struct.new(*Ms::PrecursorAtts)
9
+
10
+ class Ms::Precursor
11
+
12
+ undef :intensity
13
+
14
+ def intensity
15
+ if self[1].nil?
16
+ if s = self[2].spectrum
17
+ self[1] = s.intensity_at_mz(self[0])
18
+ else
19
+ nil # if we didn't read in the spectra, we can't get this value!
20
+ end
21
+ end
22
+ self[1]
23
+ end
24
+
25
+ alias_method :inten, :intensity
26
+
27
+ end
@@ -0,0 +1,93 @@
1
+ require 'ms/precursor'
2
+
3
+ module Ms ; end
4
+
5
+ # 0 1 2 3 4 5 6 7
6
+ # 8
7
+ MsScanAtts = [:num, :ms_level, :time, :start_mz, :end_mz, :num_peaks, :tic, :precursor, :spectrum]
8
+
9
+ Ms::Scan = Struct.new(*MsScanAtts)
10
+
11
+ # time in seconds
12
+ # everything else in float/int
13
+
14
+ class Ms::Scan
15
+
16
+ def to_s
17
+ "<Scan num=#{num} ms_level=#{ms_level} time=#{time}>"
18
+ end
19
+
20
+ undef_method :inspect
21
+ def inspect
22
+ atts = %w(num ms_level time start_mz end_mz)
23
+ display = atts.map do |att|
24
+ if val = send(att.to_sym)
25
+ "#{att}=#{val}"
26
+ else
27
+ nil
28
+ end
29
+ end
30
+ display.compact!
31
+ spec_display =
32
+ if spectrum
33
+ spectrum.mzs.size
34
+ else
35
+ 'nil'
36
+ end
37
+ "<Ms::Scan:#{__id__} " + display.join(", ") + " precursor=#{precursor.inspect}" + " spectrum(size)=#{spec_display}" + " >"
38
+ end
39
+
40
+ # if > cutoff is below the precusure, then it is considered a +1 charge,
41
+ # otherwise > 1
42
+ # Algorithm from the MzXML2Search code by Jimmy Eng
43
+ def plus1?(cutoff=0.95)
44
+ prec_mz = precursor.mz
45
+ mzs, intens = spectrum.mzs_and_intensities
46
+ tic = 0.0
47
+ below = 0.0
48
+ mzs.zip(intens) do |mz, int|
49
+ if mz < prec_mz
50
+ below += int
51
+ end
52
+ tic += int
53
+ end
54
+ tic == 0.0 || below/tic > cutoff
55
+ end
56
+
57
+
58
+ # returns the string (space delimited): "ms_level num time [prec_mz prec_inten]"
59
+ def to_index_file_string
60
+ arr = [ms_level, num, time]
61
+ if precursor then arr << precursor.mz end
62
+ if x = precursor.intensity then arr << x end
63
+ arr.join(" ")
64
+ end
65
+
66
+ # adds the attribute parent to each scan with a parent
67
+ # (level 1 = no parent; level 2 = prev level 1, etc.
68
+ def self.add_parent_scan(scans)
69
+ prev_scan = nil
70
+ parent_stack = [nil]
71
+ ## we want to set the level to be the first mslevel we come to
72
+ prev_level = 1
73
+ scans.each do |scan|
74
+ if scan then prev_level = scan.ms_level; break; end
75
+ end
76
+ scans.each do |scan|
77
+ next unless scan ## the first one is nil, (others?)
78
+ level = scan.ms_level
79
+ if prev_level < level
80
+ parent_stack.unshift prev_scan
81
+ end
82
+ if prev_level > level
83
+ (prev_level - level).times do parent_stack.shift end
84
+ end
85
+ scan.parent = parent_stack.first
86
+ prev_level = level
87
+ prev_scan = scan
88
+ end
89
+ end
90
+
91
+ end
92
+
93
+
@@ -0,0 +1,373 @@
1
+ require 'base64'
2
+ require 'bsearch'
3
+
4
+ require 'ms/spectrum/compare'
5
+ require 'ms/spectrum/filter'
6
+
7
+ module Ms ; end
8
+
9
+ class Ms::Spectrum
10
+
11
+ # m/z's
12
+ attr_accessor :mzs
13
+ # intensities
14
+ attr_accessor :intensities
15
+
16
+ alias_method :ints, :intensities
17
+
18
+ #######################
19
+ ## CLASS METHODS:
20
+ #######################
21
+
22
+ def self.lazy(*args)
23
+ Ms::Spectrum::LazyIO.new(*args)
24
+ end
25
+
26
+ def self.from_peaks(ar_of_doublets)
27
+ _mzs = []
28
+ _ints = []
29
+ ar_of_doublets.each do |mz, int|
30
+ _mzs << mz
31
+ _ints << int
32
+ end
33
+ self.new(_mzs, _ints)
34
+ end
35
+
36
+ def initialize(mz_ar=[], intensity_ar=[])
37
+ @mzs = mz_ar
38
+ @intensities = intensity_ar
39
+ end
40
+
41
+ def mzs_and_intensities
42
+ [@mzs, @intensities]
43
+ end
44
+
45
+ def ==(other)
46
+ mzs == other.mzs && ints == other.ints
47
+ end
48
+
49
+ def [](array_index)
50
+ [mzs[array_index], intensities[array_index]]
51
+ end
52
+
53
+ # yields(mz, inten) across the spectrum, or array of doublets if no block
54
+ def peaks(&block)
55
+ (m, i) = mzs_and_intensities
56
+ m.zip(i, &block)
57
+ end
58
+
59
+ alias_method :each, :peaks
60
+ alias_method :each_peak, :peaks
61
+
62
+ # uses index function and returns the intensity at that value
63
+ def intensity_at_mz(mz)
64
+ if x = index(mz)
65
+ intensities[x]
66
+ else
67
+ nil
68
+ end
69
+ end
70
+
71
+ # less_precise should be a float
72
+ # precise should be a float
73
+ def equal_after_rounding?(precise, less_precise)
74
+ # determine the precision of less_precise
75
+ exp10 = precision_as_neg_int(less_precise)
76
+ #puts "EXP10: #{exp10}"
77
+ answ = ((precise*exp10).round == (less_precise*exp10).round)
78
+ #puts "TESTING FOR EQUAL: #{precise} #{less_precise}"
79
+ #puts answ
80
+ (precise*exp10).round == (less_precise*exp10).round
81
+ end
82
+
83
+
84
+ # returns the index of the first value matching that m/z. the argument m/z
85
+ # may be less precise than the actual m/z (rounding to the same precision
86
+ # given) but must be at least integer precision (after rounding)
87
+ # implemented as binary search (bsearch from the web)
88
+ def index(mz)
89
+ mz_ar = mzs
90
+ return_val = nil
91
+ ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
92
+ if mz_ar[ind] == mz
93
+ return_val = ind
94
+ else
95
+ # do a rounding game to see which one is it, or nil
96
+ # find all the values rounding to the same integer in the locale
97
+ # test each one fully in turn
98
+ mz = mz.to_f
99
+ mz_size = mz_ar.size
100
+ if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
101
+ return_val = ind
102
+ else # run the loop
103
+ up = ind
104
+ loop do
105
+ up += 1
106
+ if up >= mz_size
107
+ break
108
+ end
109
+ mz_up = mz_ar[up]
110
+ if (mz_up.ceil - mz.ceil >= 2)
111
+ break
112
+ else
113
+ if equal_after_rounding?(mz_up, mz)
114
+ return_val = up
115
+ return return_val
116
+ end
117
+ end
118
+ end
119
+ dn= ind
120
+ loop do
121
+ dn -= 1
122
+ if dn < 0
123
+ break
124
+ end
125
+ mz_dn = mz_ar[dn]
126
+ if (mz.floor - mz_dn.floor >= 2)
127
+ break
128
+ else
129
+ if equal_after_rounding?(mz_dn, mz)
130
+ return_val = dn
131
+ return return_val
132
+ end
133
+ end
134
+ end
135
+ end
136
+ end
137
+ return_val
138
+ end
139
+
140
+ # returns 1 for ones place, 10 for tenths, 100 for hundredths
141
+ # to a precision exceeding 1e-6
142
+ def precision_as_neg_int(float) # :nodoc:
143
+ neg_exp10 = 1
144
+ loop do
145
+ over = float * neg_exp10
146
+ rounded = over.round
147
+ if (over - rounded).abs <= 1e-6
148
+ break
149
+ end
150
+ neg_exp10 *= 10
151
+ end
152
+ neg_exp10
153
+ end
154
+
155
+
156
+ end
157
+
158
+ module Ms::Spectrum::LazyIO
159
+
160
+ # Saves the spectrum after reading it from disk (default=false). [Set to
161
+ # true if you want to do a few operations on a spectrum and don't want to
162
+ # re-read from disk each time. Use Spectrum#flush! when you think you are
163
+ # done with it.]
164
+ attr_accessor :save
165
+
166
+ # sets save to true and returns the spectrum object for chaining commands
167
+ def save!
168
+ save = true
169
+ self
170
+ end
171
+
172
+ def self.new(*args)
173
+ if args.size == 5 # mzXMl
174
+ Ms::Spectrum::LazyIO::Peaks.new(*args)
175
+ elsif args.size == 9 # other
176
+ Ms::Spectrum::LazyIO::Pair.new(*args)
177
+ else
178
+ raise RunTimeError, "must give 5 or 7 args for peak data and pair data respectively"
179
+ end
180
+ end
181
+
182
+ end
183
+
184
+ # stores an io object and the start and end indices and only evaluates the
185
+ # spectrum when information is requested
186
+ class Ms::Spectrum::LazyIO::Pair < Ms::Spectrum
187
+ include Ms::Spectrum::LazyIO
188
+
189
+ undef mzs=
190
+ undef intensities=
191
+
192
+ def initialize(io, mz_start_index, mz_num_bytes, mz_precision, mz_network_order, intensity_start_index, intensity_num_bytes, intensity_precision, intensity_network_order)
193
+ @save = false
194
+ @mzs = nil
195
+ @intensities = nil
196
+ @io = io
197
+
198
+ @mz_start_index = mz_start_index
199
+ @mz_num_bytes = mz_num_bytes
200
+ @mz_precision = mz_precision
201
+ @mz_network_order = mz_network_order
202
+
203
+ @intensity_start_index = intensity_start_index
204
+ @intensity_num_bytes = intensity_num_bytes
205
+ @intensity_precision = intensity_precision
206
+ @intensity_network_order = intensity_network_order
207
+
208
+ end
209
+
210
+ # beware that this converts the information on disk every time it is called.
211
+ def mzs
212
+ return @mzs if @mzs
213
+ @io.pos = @mz_start_index
214
+ b64_string = @io.read(@mz_num_bytes)
215
+ mzs_ar = Ms::Spectrum.base64_to_array(b64_string, @mz_precision, @mz_network_order)
216
+ if save
217
+ @mzs = mzs_ar
218
+ else
219
+ mzs_ar
220
+ end
221
+ end
222
+
223
+ def flush!
224
+ @mzs = nil
225
+ @intensities = nil
226
+ end
227
+
228
+ # beware that this converts the information in @intensity_string every time
229
+ # it is called.
230
+ def intensities
231
+ return @intensities if @intensities
232
+ @io.pos = @intensity_start_index
233
+ b64_string = @io.read(@intensity_num_bytes)
234
+ inten_ar = Ms::Spectrum.base64_to_array(b64_string, @intensity_precision, @intensity_network_order)
235
+ if save
236
+ @intensities = inten_ar
237
+ else
238
+ inten_ar
239
+ end
240
+ end
241
+
242
+ end
243
+
244
+ class Ms::Spectrum::LazyIO::Peaks < Ms::Spectrum
245
+ include Ms::Spectrum::LazyIO
246
+
247
+ undef mzs=
248
+ undef intensities=
249
+
250
+ def initialize(io, start_index, num_bytes, precision, network_order)
251
+ @data = nil
252
+ @io = io
253
+ @start_index = start_index
254
+ @num_bytes = num_bytes
255
+ @precision = precision
256
+ @network_order = network_order
257
+ end
258
+
259
+ # removes any stored data
260
+ def flush!
261
+ @data = nil
262
+ end
263
+
264
+ # returns an array of alternating values: [mz, intensity, mz, intensity]
265
+ def flat_peaks
266
+ @io.pos = @start_index
267
+ Ms::Spectrum.base64_to_array(@io.read(@num_bytes), @precision, @network_order)
268
+ end
269
+
270
+ # returns two arrays: an array of m/z values and an array of intensity
271
+ # values. This is the preferred way to access mzXML file information under
272
+ # lazy evaluation
273
+ def mzs_and_intensities
274
+ return @data if @data
275
+ @io.pos = @start_index
276
+ b64_string = @io.read(@num_bytes)
277
+ data = Ms::Spectrum.mzs_and_intensities_from_base64_peaks(b64_string, @precision, @network_order)
278
+ if save
279
+ @data = data
280
+ else
281
+ data
282
+ end
283
+ end
284
+
285
+ # when using 'io' lazy evaluation on files with m/z and intensity data
286
+ # interwoven (i.e., mzXML) it is more efficient to call 'mzs_and_intensities'
287
+ # if you are using both mz and intensity data.
288
+ def mzs
289
+ return @data.first if @data
290
+ data = mzs_and_intensities
291
+ if save
292
+ @data = data
293
+ @data.first
294
+ else
295
+ data.first
296
+ end
297
+ # TODO: this can be made slightly faster
298
+ end
299
+
300
+ # when using 'io' lazy evaluation on files with m/z and intensity data
301
+ # interwoven (i.e., mzXML) it is more efficient to call
302
+ # 'mzs_and_intensities'
303
+ # if you are using both mz and intensity data.
304
+ def intensities(save=false)
305
+ return @data.last if @data
306
+ data = mzs_and_intensities
307
+ if save
308
+ @data = data
309
+ @data.last
310
+ else
311
+ data.last
312
+ end
313
+ # TODO: this can be made slightly faster
314
+ end
315
+
316
+ end
317
+
318
+
319
+ module Ms::Spectrum::Utils
320
+
321
+ Unpack_network_float = 'g*'
322
+ Unpack_network_double = 'G*'
323
+ Unpack_little_endian_float = 'e*'
324
+ Unpack_little_endian_double = 'E*'
325
+
326
+ # an already decoded string (ready to be unpacked as floating point numbers)
327
+ def string_to_array(string, precision=32, network_order=true)
328
+ unpack_code =
329
+ if network_order
330
+ if precision == 32
331
+ Unpack_network_float
332
+ elsif precision == 64
333
+ Unpack_network_double
334
+ end
335
+ else ## little endian
336
+ if precision == 32
337
+ Unpack_little_endian_float
338
+ elsif precision == 64
339
+ Unpack_little_endian_double
340
+ end
341
+ end
342
+ string.unpack(unpack_code)
343
+ end
344
+
345
+ # takes a base64 string and returns an array
346
+ def base64_to_array(b64_string, precision=32, network_order=true)
347
+ self.string_to_array(Base64.decode64(b64_string), precision, network_order)
348
+ end
349
+
350
+ def mzs_and_intensities_from_base64_peaks(b64_string, precision=32, network_order=true)
351
+ data = base64_to_array(b64_string, precision, network_order)
352
+ sz = data.size/2
353
+ mz_ar = Array.new(sz)
354
+ intensity_ar = Array.new(sz)
355
+ ndata = []
356
+ my_ind = 0
357
+ data.each_with_index do |dat,ind|
358
+ if (ind % 2) == 0 # even
359
+ mz_ar[my_ind] = dat
360
+ else
361
+ intensity_ar[my_ind] = dat
362
+ my_ind += 1
363
+ end
364
+ end
365
+ [mz_ar, intensity_ar]
366
+ end
367
+ end
368
+
369
+ class Ms::Spectrum
370
+ extend Utils
371
+ end
372
+
373
+