mspire 0.6.7 → 0.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/Rakefile +5 -0
  2. data/VERSION +1 -1
  3. data/lib/cv/param.rb +25 -5
  4. data/lib/cv/referenceable_param_group_ref.rb +13 -0
  5. data/lib/cv.rb +3 -1
  6. data/lib/ms/cv/param.rb +19 -24
  7. data/lib/ms/cv/paramable.rb +42 -0
  8. data/lib/ms/mzml/activation.rb +33 -0
  9. data/lib/ms/mzml/chromatogram.rb +29 -0
  10. data/lib/ms/mzml/chromatogram_list.rb +26 -0
  11. data/lib/ms/mzml/component.rb +21 -0
  12. data/lib/ms/mzml/contact.rb +23 -0
  13. data/lib/ms/mzml/cv.rb +46 -0
  14. data/lib/ms/mzml/data_array.rb +65 -0
  15. data/lib/ms/mzml/data_array_container_like.rb +57 -0
  16. data/lib/ms/mzml/data_processing.rb +27 -0
  17. data/lib/ms/mzml/file_content.rb +21 -0
  18. data/lib/ms/mzml/file_description.rb +47 -0
  19. data/lib/ms/mzml/instrument_configuration.rb +37 -0
  20. data/lib/ms/mzml/isolation_window.rb +21 -0
  21. data/lib/ms/mzml/list.rb +23 -0
  22. data/lib/ms/mzml/precursor.rb +42 -0
  23. data/lib/ms/mzml/processing_method.rb +24 -0
  24. data/lib/ms/mzml/product.rb +22 -0
  25. data/lib/ms/mzml/referenceable_param_group.rb +40 -0
  26. data/lib/ms/mzml/run.rb +54 -0
  27. data/lib/ms/mzml/sample.rb +27 -0
  28. data/lib/ms/mzml/scan.rb +44 -0
  29. data/lib/ms/mzml/scan_list.rb +33 -0
  30. data/lib/ms/mzml/scan_settings.rb +28 -0
  31. data/lib/ms/mzml/selected_ion.rb +18 -0
  32. data/lib/ms/mzml/software.rb +28 -0
  33. data/lib/ms/mzml/source_file.rb +48 -0
  34. data/lib/ms/mzml/spectrum.rb +91 -0
  35. data/lib/ms/mzml/spectrum_list.rb +42 -0
  36. data/lib/ms/mzml.rb +173 -6
  37. data/lib/ms/quant/qspec/protein_group_comparison.rb +3 -3
  38. data/lib/ms/quant/qspec.rb +4 -4
  39. data/lib/ms/spectrum.rb +137 -260
  40. data/lib/ms/spectrum_like.rb +133 -0
  41. data/lib/ms/user_param.rb +43 -0
  42. data/lib/mspire.rb +6 -0
  43. data/obo/ms.obo +670 -121
  44. data/obo/unit.obo +23 -1
  45. data/spec/ms/cv/param_spec.rb +33 -0
  46. data/spec/ms/mzml/cv_spec.rb +17 -0
  47. data/spec/ms/mzml/file_content_spec.rb +25 -0
  48. data/spec/ms/mzml/file_description_spec.rb +34 -0
  49. data/spec/ms/mzml/referenceable_param_group_spec.rb +33 -0
  50. data/spec/ms/mzml_spec.rb +65 -4
  51. data/spec/ms/user_param_spec.rb +51 -0
  52. data/spec/mspire_spec.rb +9 -0
  53. data/spec/testfiles/ms/mzml/mspire_simulated.noidx.check.mzML +81 -0
  54. metadata +57 -21
  55. data/lib/cv/description.rb +0 -19
  56. data/lib/ms/cv/description.rb +0 -44
  57. data/lib/msplat.rb +0 -2
  58. data/spec/ms/cv/description_spec.rb +0 -60
  59. data/spec/msplat_spec.rb +0 -24
data/lib/ms/mzml.rb CHANGED
@@ -1,10 +1,26 @@
1
+ require 'mspire'
2
+ require 'builder'
1
3
  require 'nokogiri'
2
4
  require 'io/bookmark'
3
5
  require 'zlib'
4
6
  require 'ms/mzml/index_list'
5
7
  require 'ms/spectrum'
8
+ require 'ms/mzml/file_description'
9
+ require 'ms/mzml/software'
10
+ require 'ms/mzml/scan_list'
11
+ require 'ms/mzml/scan'
12
+ require 'ms/mzml/run'
13
+ require 'ms/mzml/spectrum_list'
14
+ require 'ms/mzml/chromatogram_list'
15
+ require 'ms/mzml/instrument_configuration'
16
+ require 'ms/mzml/data_processing'
17
+ require 'ms/mzml/referenceable_param_group'
18
+ require 'ms/mzml/cv'
19
+ require 'ms/mzml/sample'
6
20
 
7
21
  module MS
22
+ # Reading an mzxml file:
23
+ #
8
24
  # MS::Mzml.open("somefile.mzML") do |mzml|
9
25
  # mzml.each do |spectrum|
10
26
  # scan = spectrum.scan
@@ -15,24 +31,131 @@ module MS
15
31
  # end
16
32
  # end
17
33
  # end
34
+ #
35
+ # Note that the mzml object supports random spectrum access (even if the
36
+ # mzml was not indexed):
37
+ #
38
+ # mzml[22] # retrieve spectrum at index 22
39
+ #
40
+ # Writing an mzml file from scratch:
41
+ #
42
+ # spec1 = MS::Mzml::Spectrum.new('scan=1', params: ['MS:1000127', ['MS:1000511', 1]]) do |spec|
43
+ # spec.data_arrays = [[1,2,3], [4,5,6]]
44
+ # spec.scan_list = MS::Mzml::ScanList.new do |sl|
45
+ # scan = MS::Mzml::Scan.new do |scan|
46
+ # # retention time of 40 seconds
47
+ # scan.describe! ['MS:1000016', 40.0, 'UO:0000010']
48
+ # end
49
+ # sl << scan
50
+ # end
51
+ # end
52
+ #
53
+ # mzml = MS::Mzml.new do |mzml|
54
+ # mzml.id = 'the_little_example'
55
+ # mzml.cvs = MS::Mzml::CV::DEFAULT_CVS
56
+ # mzml.file_description = MS::Mzml::FileDescription.new do |fd|
57
+ # fd.file_content = MS::Mzml::FileContent.new
58
+ # fd.source_files << MS::Mzml::SourceFile.new
59
+ # end
60
+ # default_instrument_config = MS::Mzml::InstrumentConfiguration.new("IC",[], params: ['MS:1000031'])
61
+ # mzml.instrument_configurations << default_instrument_config
62
+ # software = MS::Mzml::Software.new
63
+ # mzml.software_list << software
64
+ # default_data_processing = MS::Mzml::DataProcessing.new("did_nothing")
65
+ # mzml.data_processing_list << default_data_processing
66
+ # mzml.run = MS::Mzml::Run.new("little_run", default_instrument_config) do |run|
67
+ # spectrum_list = MS::Mzml::SpectrumList.new(default_data_processing)
68
+ # spectrum_list.push(spec1)
69
+ # run.spectrum_list = spectrum_list
70
+ # end
71
+ # end
18
72
  class Mzml
73
+
74
+ module Default
75
+ NAMESPACE = {
76
+ :xmlns => "http://psi.hupo.org/ms/mzml",
77
+ "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
78
+ "xmlns:xsd" => "http://www.w3.org/2001/XMLSchema",
79
+ }
80
+
81
+ VERSION = '1.1.0'
82
+ end
83
+
84
+ ###############################################
85
+ # ATTRIBUTES
86
+ ###############################################
87
+
88
+ # (optional) an id for accessing from external files
89
+ attr_accessor :id
90
+
91
+ # (required) the Mzml document version
92
+ attr_accessor :version
93
+
94
+ # (optional) e.g. a PRIDE accession number
95
+ attr_accessor :accession
96
+
97
+ ###############################################
98
+ # SUBELEMENTS
99
+ ###############################################
100
+
101
+ # (required) an array of MS::Mzml::CV objects
102
+ attr_accessor :cvs
103
+
104
+ # (required) an MS::Mzml::FileDescription
105
+ attr_accessor :file_description
106
+
107
+ # (optional) an array of CV::ReferenceableParamGroup objects
108
+ attr_accessor :referenceable_param_groups
109
+
110
+ # (optional) an array of MS::Mzml::Sample objects
111
+ attr_accessor :samples
112
+
113
+ # (required) an array of MS::Mzml::Software objects
114
+ attr_accessor :software_list
115
+
116
+ # (optional) an array of MS::Mzml::ScanSettings objects
117
+ attr_accessor :scan_settings_list
118
+
119
+ # (required) an array of MS::Mzml::InstrumentConfiguration objects
120
+ attr_accessor :instrument_configurations
121
+
122
+ # (required) an array of MS::Mzml::DataProcessing objects
123
+ attr_accessor :data_processing_list
124
+
125
+ # (required) an MS::Mzml::Run object
126
+ attr_accessor :run
127
+
19
128
  module Parser
20
129
  NOBLANKS = ::Nokogiri::XML::ParseOptions::DEFAULT_XML | ::Nokogiri::XML::ParseOptions::NOBLANKS
21
130
  end
22
131
  include Enumerable
23
132
 
24
- attr_accessor :filename
25
133
  attr_accessor :io
26
134
  attr_accessor :index_list
27
135
  attr_accessor :encoding
28
136
 
137
+ # arg must be an IO object for automatic index and header parsing to
138
+ # occur. If arg is a hash, then attributes are set. In addition (or
139
+ # alternatively) a block called that yields self to setup the object.
140
+ #
29
141
  # io must respond_to?(:size), giving the size of the io object in bytes
30
- # which allows seeking. #get_index_list is called to get or create the
142
+ # which allows seeking. get_index_list is called to get or create the
31
143
  # index list.
32
- def initialize(io)
33
- @io = io
34
- @encoding = @io.bookmark(true) {|io| io.readline.match(/encoding=["'](.*?)["']/)[1] }
35
- @index_list = get_index_list
144
+ def initialize(arg=nil, &block)
145
+ %w(cvs software_list instrument_configurations data_processing_list).each {|guy| self.send( guy + '=', [] ) }
146
+
147
+ case arg
148
+ when IO
149
+ @io = arg
150
+ @encoding = @io.bookmark(true) {|io| io.readline.match(/encoding=["'](.*?)["']/)[1] }
151
+ @index_list = get_index_list
152
+ # TODO: and read in 'header' info (everything until 'run'
153
+ when Hash
154
+ arg.each {|k,v| self.send("#{k}=", v) }
155
+ end
156
+ if block
157
+ block.call(self)
158
+ end
36
159
  end
37
160
 
38
161
  class << self
@@ -191,6 +314,50 @@ module MS
191
314
  read_index_list || create_index_list
192
315
  end
193
316
 
317
+ # Because mzml files are often very large, we try to avoid storing the
318
+ # entire object tree in memory before writing.
319
+ #
320
+ # takes a filename and uses builder to write to it
321
+ # if no filename is given, returns a string
322
+ def to_xml(filename=nil)
323
+ # TODO: support indexed mzml files
324
+ io = filename ? File.open(filename, 'w') : StringIO.new
325
+ xml = Builder::XmlMarkup.new(:target => io, :indent => 2)
326
+ xml.instruct!
327
+
328
+ mzml_atts = Default::NAMESPACE.dup
329
+ mzml_atts[:version] = @version || Default::VERSION
330
+ mzml_atts[:accession] = @accession if @accession
331
+ mzml_atts[:id] = @id if @id
332
+
333
+ xml.mzML(mzml_atts) do |mzml_n|
334
+ # the 'if' statements capture whether or not the list is required or not
335
+ raise "#{self.class}#cvs must have > 0 MS::Mzml::CV objects" unless @cvs.size > 0
336
+ MS::Mzml::CV.list_xml(@cvs, mzml_n)
337
+ @file_description.to_xml(mzml_n)
338
+ if @referenceable_param_groups
339
+ MS::Mzml::ReferenceableParamGroup.list_xml(@referenceable_param_groups, mzml_n)
340
+ end
341
+ if @samples
342
+ MS::Mzml::Sample.list_xml(@samples, mzml_n)
343
+ end
344
+ MS::Mzml::Software.list_xml(@software_list, mzml_n)
345
+ if @scan_settings_list && @scan_settings_list.size > 0
346
+ MS::Mzml::ScanSettings.list_xml(@scan_settings_list, mzml_n)
347
+ end
348
+ icl = MS::Mzml::InstrumentConfiguration.list_xml(@instrument_configurations, mzml_n)
349
+ MS::Mzml::DataProcessing.list_xml(@data_processing_list, mzml_n)
350
+ @run.to_xml(mzml_n)
351
+ end
352
+
353
+ if filename
354
+ io.close
355
+ self
356
+ else
357
+ io.string
358
+ end
359
+ end
360
+
194
361
  class ScanNumbersNotUnique < Exception
195
362
  end
196
363
  class ScanNumbersNotFound < Exception
@@ -1,14 +1,14 @@
1
1
  require 'ms/quant/protein_group_comparison'
2
2
 
3
- module Ms
3
+ module MS
4
4
  module Quant
5
5
  module ProteinGroupComparison
6
6
  end
7
7
  end
8
8
  end
9
9
 
10
- class Ms::Quant::ProteinGroupComparison::Qspec
11
- include Ms::Quant::ProteinGroupComparison
10
+ class MS::Quant::ProteinGroupComparison::Qspec
11
+ include MS::Quant::ProteinGroupComparison
12
12
 
13
13
  attr_accessor :qspec_results_struct
14
14
 
@@ -1,7 +1,7 @@
1
- module Ms ; end
2
- module Ms::Quant ; end
1
+ module MS ; end
2
+ module MS::Quant ; end
3
3
 
4
- class Ms::Quant::Qspec
4
+ class MS::Quant::Qspec
5
5
 
6
6
  # personal communication with Hyungwon Choi: "We typically use nburn=2000,
7
7
  # niter=10000, which is quite sufficient to guarantee the reproducibility of
@@ -62,7 +62,7 @@ class Ms::Quant::Qspec
62
62
 
63
63
  # writes a qspec formatted file to filename
64
64
  def write(filename)
65
- ints = Ms::Quant::Qspec.conditions_to_ints(conditions)
65
+ ints = MS::Quant::Qspec.conditions_to_ints(conditions)
66
66
  header_cats = INIT_HEADER + ints
67
67
  rows = @protname_length_pairs.map {|pair| pair.map.to_a }
68
68
  @condition_to_count_array.each do |cond,counts|
data/lib/ms/spectrum.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require 'ms/spectrum_like'
1
2
  require 'bsearch'
2
3
  require 'bin'
3
4
  require 'ms/peak'
@@ -6,7 +7,7 @@ module MS
6
7
  # note that a point is an [m/z, intensity] doublet.
7
8
  # A peak is considered a related string of points
8
9
  class Spectrum
9
- include Enumerable
10
+ include MS::SpectrumLike
10
11
 
11
12
  DEFAULT_MERGE = {
12
13
  :bin_width => 5,
@@ -16,284 +17,160 @@ module MS
16
17
  :split => :share
17
18
  }
18
19
 
19
- # returns a new spectrum which has been merged with the others. If the
20
- # spectra are centroided (just checks the first one and assumes the others
21
- # are the same) then it will bin the points (bin width determined by
22
- # opts[:resolution]) and then segment according to monotonicity (sharing
23
- # intensity between abutting points). The final m/z is the weighted
24
- # averaged of all the m/z's in each peak. Valid opts (with default listed
25
- # first):
26
- #
27
- # :bin_width => 5
28
- # :bin_unit => :ppm | :amu interpret bin_width as ppm or amu
29
- # :bins => array of Bin objects for custom bins (overides other bin options)
30
- # :normalize => false if true, divides total intensity by
31
- # number of spectra
32
- # :return_data => false returns a parallel array containing
33
- # the peaks associated with each returned point
34
- # :split => :share | :greedy_y see MS::Peak#split
35
- #
36
- # The binning algorithm is the fastest possible algorithm that would allow
37
- # for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))
38
- def self.merge(spectra, opts={})
39
- opt = DEFAULT_MERGE.merge(opts)
40
- (spectrum, returned_data) =
41
- if spectra.first.centroided?
42
- # find the min and max across all spectra
43
- first_mzs = spectra.first.mzs
44
- min = first_mzs.first ; max = first_mzs.last
45
- spectra.each do |spectrum|
46
- mzs = spectrum.mzs
47
- min = mzs.first if mzs.first < min
48
- max = mzs.last if mzs.last > max
49
- end
50
-
51
- # Create Bin objects
52
- bins =
53
- if opt[:bins]
54
- opt[:bins]
55
- else
56
- divisions = []
57
- bin_width = opt[:bin_width]
58
- use_ppm = (opt[:bin_unit] == :ppm)
59
- current_mz = min
60
- loop do
61
- if current_mz >= max
62
- divisions << max
63
- break
64
- else
65
- divisions << current_mz
66
- current_mz += ( use_ppm ? current_mz./(1e6).*(bin_width) : bin_width )
67
- end
68
- end
69
- # make each bin exclusive so there is no overlap
70
- bins = divisions.each_cons(2).map {|pair| Bin.new(*pair, true) }
71
- # make the last bin *inclusive* of the terminating value
72
- bins[-1] = Bin.new(bins.last.begin, bins.last.end)
73
- bins
74
- end
75
-
76
- spectra.each do |spectrum|
77
- Bin.bin(bins, spectrum.points, &:first)
78
- end
79
-
80
- pseudo_points = bins.map do |bin|
81
- #int = bin.data.reduce(0.0) {|sum,point| sum + point.last }.round(3) # <- just for info:
82
- [bin, bin.data.reduce(0.0) {|sum,point| sum + point.last }]
83
- end
20
+ class << self
84
21
 
85
- #p_mzs = []
86
- #p_ints = []
87
- #p_num_points = []
88
- #pseudo_points.each do |psp|
89
- # p_mzs << ((psp.first.begin + psp.first.end)/2)
90
- # p_ints << psp.last
91
- # p_num_points << psp.first.data.size
92
- #end
22
+ def from_points(ar_of_doublets)
23
+ _mzs = []
24
+ _ints = []
25
+ ar_of_doublets.each do |mz, int|
26
+ _mzs << mz
27
+ _ints << int
28
+ end
29
+ self.new([_mzs, _ints])
30
+ end
93
31
 
94
- #File.write("file_#{opt[:bin_width]}_to_plot.txt", [p_mzs, p_ints, p_num_points].map {|ar| ar.join(' ') }.join("\n"))
95
- #abort 'here'
96
32
 
33
+ # returns a new spectrum which has been merged with the others. If the
34
+ # spectra are centroided (just checks the first one and assumes the others
35
+ # are the same) then it will bin the points (bin width determined by
36
+ # opts[:resolution]) and then segment according to monotonicity (sharing
37
+ # intensity between abutting points). The final m/z is the weighted
38
+ # averaged of all the m/z's in each peak. Valid opts (with default listed
39
+ # first):
40
+ #
41
+ # :bin_width => 5
42
+ # :bin_unit => :ppm | :amu interpret bin_width as ppm or amu
43
+ # :bins => array of Bin objects for custom bins (overides other bin options)
44
+ # :normalize => false if true, divides total intensity by
45
+ # number of spectra
46
+ # :return_data => false returns a parallel array containing
47
+ # the peaks associated with each returned point
48
+ # :split => :share | :greedy_y see MS::Peak#split
49
+ #
50
+ # The binning algorithm is the fastest possible algorithm that would allow
51
+ # for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))
52
+ def merge(spectra, opts={})
53
+ opt = DEFAULT_MERGE.merge(opts)
54
+ (spectrum, returned_data) =
55
+ if spectra.first.centroided?
56
+ # find the min and max across all spectra
57
+ first_mzs = spectra.first.mzs
58
+ min = first_mzs.first ; max = first_mzs.last
59
+ spectra.each do |spectrum|
60
+ mzs = spectrum.mzs
61
+ min = mzs.first if mzs.first < min
62
+ max = mzs.last if mzs.last > max
63
+ end
97
64
 
98
- peaks = MS::Peak.new(pseudo_points).split(opt[:split])
65
+ # Create Bin objects
66
+ bins =
67
+ if opt[:bins]
68
+ opt[:bins]
69
+ else
70
+ divisions = []
71
+ bin_width = opt[:bin_width]
72
+ use_ppm = (opt[:bin_unit] == :ppm)
73
+ current_mz = min
74
+ loop do
75
+ if current_mz >= max
76
+ divisions << max
77
+ break
78
+ else
79
+ divisions << current_mz
80
+ current_mz += ( use_ppm ? current_mz./(1e6).*(bin_width) : bin_width )
81
+ end
82
+ end
83
+ # make each bin exclusive so there is no overlap
84
+ bins = divisions.each_cons(2).map {|pair| Bin.new(*pair, true) }
85
+ # make the last bin *inclusive* of the terminating value
86
+ bins[-1] = Bin.new(bins.last.begin, bins.last.end)
87
+ bins
88
+ end
99
89
 
100
- return_data = []
101
- _mzs = [] ; _ints = []
90
+ spectra.each do |spectrum|
91
+ Bin.bin(bins, spectrum.points, &:first)
92
+ end
102
93
 
103
- #p peaks[97]
104
- #puts "HIYA"
105
- #abort 'here'
94
+ pseudo_points = bins.map do |bin|
95
+ #int = bin.data.reduce(0.0) {|sum,point| sum + point.last }.round(3) # <- just for info:
96
+ [bin, bin.data.reduce(0.0) {|sum,point| sum + point.last }]
97
+ end
106
98
 
107
- peaks.each_with_index do |peak,i|
108
- #peaks.each do |peak|
109
- tot_intensity = peak.map(&:last).reduce(:+)
110
- return_data_per_peak = [] if opt[:return_data]
111
- weighted_mz = 0.0
112
- peak.each do |point|
113
- pre_scaled_intensity = point[0].data.reduce(0.0) {|sum,v| sum + v.last }
114
- post_scaled_intensity = point[1]
115
- # some peaks may have been shared. In this case the intensity
116
- # for that peak was downweighted. However, the actually data
117
- # composing that peak is not altered when the intensity is
118
- # shared. So, to calculate a proper weighted avg we need to
119
- # downweight the intensity of any data point found within a bin
120
- # whose intensity was scaled.
121
- correction_factor =
122
- if pre_scaled_intensity != post_scaled_intensity
123
- post_scaled_intensity / pre_scaled_intensity
124
- else
125
- 1.0
99
+ #p_mzs = []
100
+ #p_ints = []
101
+ #p_num_points = []
102
+ #pseudo_points.each do |psp|
103
+ # p_mzs << ((psp.first.begin + psp.first.end)/2)
104
+ # p_ints << psp.last
105
+ # p_num_points << psp.first.data.size
106
+ #end
107
+
108
+ #File.write("file_#{opt[:bin_width]}_to_plot.txt", [p_mzs, p_ints, p_num_points].map {|ar| ar.join(' ') }.join("\n"))
109
+ #abort 'here'
110
+
111
+
112
+ peaks = MS::Peak.new(pseudo_points).split(opt[:split])
113
+
114
+ return_data = []
115
+ _mzs = [] ; _ints = []
116
+
117
+ #p peaks[97]
118
+ #puts "HIYA"
119
+ #abort 'here'
120
+
121
+ peaks.each_with_index do |peak,i|
122
+ #peaks.each do |peak|
123
+ tot_intensity = peak.map(&:last).reduce(:+)
124
+ return_data_per_peak = [] if opt[:return_data]
125
+ weighted_mz = 0.0
126
+ peak.each do |point|
127
+ pre_scaled_intensity = point[0].data.reduce(0.0) {|sum,v| sum + v.last }
128
+ post_scaled_intensity = point[1]
129
+ # some peaks may have been shared. In this case the intensity
130
+ # for that peak was downweighted. However, the actually data
131
+ # composing that peak is not altered when the intensity is
132
+ # shared. So, to calculate a proper weighted avg we need to
133
+ # downweight the intensity of any data point found within a bin
134
+ # whose intensity was scaled.
135
+ correction_factor =
136
+ if pre_scaled_intensity != post_scaled_intensity
137
+ post_scaled_intensity / pre_scaled_intensity
138
+ else
139
+ 1.0
140
+ end
141
+
142
+ return_data_per_peak.push(*point[0].data) if opt[:return_data]
143
+
144
+ point[0].data.each do |lil_point|
145
+ weighted_mz += lil_point[0] * ( (lil_point[1].to_f * correction_factor) / tot_intensity)
126
146
  end
127
-
128
- return_data_per_peak.push(*point[0].data) if opt[:return_data]
129
-
130
- point[0].data.each do |lil_point|
131
- weighted_mz += lil_point[0] * ( (lil_point[1].to_f * correction_factor) / tot_intensity)
132
147
  end
148
+ return_data << return_data_per_peak if opt[:return_data]
149
+ _mzs << weighted_mz
150
+ _ints << tot_intensity
133
151
  end
134
- return_data << return_data_per_peak if opt[:return_data]
135
- _mzs << weighted_mz
136
- _ints << tot_intensity
152
+ [Spectrum.new([_mzs, _ints]), return_data]
153
+ else
154
+ raise NotImplementedError, "the way to do this is interpolate the profile evenly and sum"
137
155
  end
138
- [Spectrum.new([_mzs, _ints]), return_data]
139
- else
140
- raise NotImplementedError, "the way to do this is interpolate the profile evenly and sum"
141
- end
142
-
143
- if opt[:normalize]
144
- sz = spectra.size
145
- spectrum.data[1].map! {|v| v.to_f / sz }
146
- end
147
- if opt[:return_data]
148
- $stderr.puts "returning spectrum (#{spectrum.mzs.size}) and data" if $VERBOSE
149
- [spectrum, return_data]
150
- else
151
- $stderr.puts "returning spectrum (#{spectrum.mzs.size})" if $VERBOSE
152
- spectrum
153
- end
154
- end
155
-
156
-
157
- # boolean for if the spectrum represents centroided data or not
158
- attr_accessor :centroided
159
-
160
- def centroided?() centroided end
161
-
162
- # The underlying data store. methods are implemented so that data[0] is
163
- # the m/z's and data[1] is intensities
164
- attr_reader :data
165
-
166
-
167
-
168
-
169
- # data takes an array: [mzs, intensities]
170
- # @return [MS::Spectrum]
171
- # @param [Array] data two element array of mzs and intensities
172
- def initialize(data, centroided=true)
173
- @data = data
174
- @centroided = centroided
175
- end
176
-
177
- def self.from_points(ar_of_doublets)
178
- _mzs = []
179
- _ints = []
180
- ar_of_doublets.each do |mz, int|
181
- _mzs << mz
182
- _ints << int
183
- end
184
- self.new([_mzs, _ints])
185
- end
186
-
187
- # found by querying the size of the data store. This should almost always
188
- # be 2 (m/z and intensities)
189
- def size
190
- @data.size
191
- end
192
-
193
- def ==(other)
194
- mzs == other.mzs && intensities == other.intensities
195
- end
196
-
197
- # An array of the mz data.
198
- def mzs
199
- @data[0]
200
- end
201
-
202
- # An array of the intensities data, corresponding to mzs.
203
- def intensities
204
- @data[1]
205
- end
206
-
207
- def mzs_and_intensities
208
- [@data[0], @data[1]]
209
- end
210
-
211
- # retrieve an m/z and intensity doublet at that index
212
- def [](array_index)
213
- [@data[0][array_index], @data[1][array_index]]
214
- end
215
-
216
- # yields(mz, inten) across the spectrum, or array of doublets if no block
217
- def points(&block)
218
- @data[0].zip(@data[1], &block)
219
- end
220
-
221
- alias_method :each, :points
222
- alias_method :each_point, :points
223
-
224
- # if the mzs and intensities are the same then the spectra are considered
225
- # equal
226
- def ==(other)
227
- mzs == other.mzs && intensities == other.intensities
228
- end
229
-
230
- # returns a new spectrum whose intensities have been normalized by the tic
231
- # of another given value
232
- def normalize(norm_by=:tic)
233
- norm_by = tic if norm_by == :tic
234
- MS::Spectrum.new([self.mzs, self.intensities.map {|v| v / norm_by }])
235
- end
236
-
237
- def tic
238
- self.intensities.reduce(:+)
239
- end
240
-
241
- # ensures that the m/z values are monotonically ascending (some
242
- # instruments are bad about this)
243
- # returns self
244
- def sort!
245
- _points = points.to_a
246
- _points.sort!
247
- _points.each_with_index {|(mz,int), i| @data[0][i] = mz ; @data[1][i] = int }
248
- self
249
- end
250
-
251
- # returns the m/z that is closest to the value, favoring the lower m/z in
252
- # the case of a tie. Uses a binary search.
253
- def find_nearest(val)
254
- mzs[find_nearest_index(val)]
255
- end
256
-
257
- # same as find_nearest but returns the index of the point
258
- def find_nearest_index(val)
259
- find_all_nearest_index(val).first
260
- end
261
156
 
262
- def find_all_nearest_index(val)
263
- _mzs = mzs
264
- index = _mzs.bsearch_lower_boundary {|v| v <=> val }
265
- if index == _mzs.size
266
- [_mzs.size-1]
267
- else
268
- # if the previous m/z diff is smaller, use it
269
- if index == 0
270
- [index]
157
+ if opt[:normalize]
158
+ sz = spectra.size
159
+ spectrum.data[1].map! {|v| v.to_f / sz }
160
+ end
161
+ if opt[:return_data]
162
+ $stderr.puts "returning spectrum (#{spectrum.mzs.size}) and data" if $VERBOSE
163
+ [spectrum, return_data]
271
164
  else
272
- case (val - _mzs[index-1]).abs <=> (_mzs[index] - val).abs
273
- when -1
274
- [index-1]
275
- when 0
276
- [index-1, index]
277
- when 1
278
- [index]
279
- end
165
+ $stderr.puts "returning spectrum (#{spectrum.mzs.size})" if $VERBOSE
166
+ spectrum
280
167
  end
281
168
  end
282
- end
283
-
284
- def find_all_nearest(val)
285
- find_all_nearest_index(val).map {|i| mzs[i] }
286
- end
287
169
 
288
- # uses MS::Spectrum.merge
289
- def merge(other_spectra, opts={})
290
- MS::Spectrum.merge([self, *other_spectra], opts)
291
170
  end
292
-
293
-
294
171
  end
295
172
  end
296
173
 
297
174
 
298
-
175
+
299
176