mspire 0.6.7 → 0.6.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +5 -0
- data/VERSION +1 -1
- data/lib/cv/param.rb +25 -5
- data/lib/cv/referenceable_param_group_ref.rb +13 -0
- data/lib/cv.rb +3 -1
- data/lib/ms/cv/param.rb +19 -24
- data/lib/ms/cv/paramable.rb +42 -0
- data/lib/ms/mzml/activation.rb +33 -0
- data/lib/ms/mzml/chromatogram.rb +29 -0
- data/lib/ms/mzml/chromatogram_list.rb +26 -0
- data/lib/ms/mzml/component.rb +21 -0
- data/lib/ms/mzml/contact.rb +23 -0
- data/lib/ms/mzml/cv.rb +46 -0
- data/lib/ms/mzml/data_array.rb +65 -0
- data/lib/ms/mzml/data_array_container_like.rb +57 -0
- data/lib/ms/mzml/data_processing.rb +27 -0
- data/lib/ms/mzml/file_content.rb +21 -0
- data/lib/ms/mzml/file_description.rb +47 -0
- data/lib/ms/mzml/instrument_configuration.rb +37 -0
- data/lib/ms/mzml/isolation_window.rb +21 -0
- data/lib/ms/mzml/list.rb +23 -0
- data/lib/ms/mzml/precursor.rb +42 -0
- data/lib/ms/mzml/processing_method.rb +24 -0
- data/lib/ms/mzml/product.rb +22 -0
- data/lib/ms/mzml/referenceable_param_group.rb +40 -0
- data/lib/ms/mzml/run.rb +54 -0
- data/lib/ms/mzml/sample.rb +27 -0
- data/lib/ms/mzml/scan.rb +44 -0
- data/lib/ms/mzml/scan_list.rb +33 -0
- data/lib/ms/mzml/scan_settings.rb +28 -0
- data/lib/ms/mzml/selected_ion.rb +18 -0
- data/lib/ms/mzml/software.rb +28 -0
- data/lib/ms/mzml/source_file.rb +48 -0
- data/lib/ms/mzml/spectrum.rb +91 -0
- data/lib/ms/mzml/spectrum_list.rb +42 -0
- data/lib/ms/mzml.rb +173 -6
- data/lib/ms/quant/qspec/protein_group_comparison.rb +3 -3
- data/lib/ms/quant/qspec.rb +4 -4
- data/lib/ms/spectrum.rb +137 -260
- data/lib/ms/spectrum_like.rb +133 -0
- data/lib/ms/user_param.rb +43 -0
- data/lib/mspire.rb +6 -0
- data/obo/ms.obo +670 -121
- data/obo/unit.obo +23 -1
- data/spec/ms/cv/param_spec.rb +33 -0
- data/spec/ms/mzml/cv_spec.rb +17 -0
- data/spec/ms/mzml/file_content_spec.rb +25 -0
- data/spec/ms/mzml/file_description_spec.rb +34 -0
- data/spec/ms/mzml/referenceable_param_group_spec.rb +33 -0
- data/spec/ms/mzml_spec.rb +65 -4
- data/spec/ms/user_param_spec.rb +51 -0
- data/spec/mspire_spec.rb +9 -0
- data/spec/testfiles/ms/mzml/mspire_simulated.noidx.check.mzML +81 -0
- metadata +57 -21
- data/lib/cv/description.rb +0 -19
- data/lib/ms/cv/description.rb +0 -44
- data/lib/msplat.rb +0 -2
- data/spec/ms/cv/description_spec.rb +0 -60
- data/spec/msplat_spec.rb +0 -24
data/lib/ms/mzml.rb
CHANGED
@@ -1,10 +1,26 @@
|
|
1
|
+
require 'mspire'
|
2
|
+
require 'builder'
|
1
3
|
require 'nokogiri'
|
2
4
|
require 'io/bookmark'
|
3
5
|
require 'zlib'
|
4
6
|
require 'ms/mzml/index_list'
|
5
7
|
require 'ms/spectrum'
|
8
|
+
require 'ms/mzml/file_description'
|
9
|
+
require 'ms/mzml/software'
|
10
|
+
require 'ms/mzml/scan_list'
|
11
|
+
require 'ms/mzml/scan'
|
12
|
+
require 'ms/mzml/run'
|
13
|
+
require 'ms/mzml/spectrum_list'
|
14
|
+
require 'ms/mzml/chromatogram_list'
|
15
|
+
require 'ms/mzml/instrument_configuration'
|
16
|
+
require 'ms/mzml/data_processing'
|
17
|
+
require 'ms/mzml/referenceable_param_group'
|
18
|
+
require 'ms/mzml/cv'
|
19
|
+
require 'ms/mzml/sample'
|
6
20
|
|
7
21
|
module MS
|
22
|
+
# Reading an mzxml file:
|
23
|
+
#
|
8
24
|
# MS::Mzml.open("somefile.mzML") do |mzml|
|
9
25
|
# mzml.each do |spectrum|
|
10
26
|
# scan = spectrum.scan
|
@@ -15,24 +31,131 @@ module MS
|
|
15
31
|
# end
|
16
32
|
# end
|
17
33
|
# end
|
34
|
+
#
|
35
|
+
# Note that the mzml object supports random spectrum access (even if the
|
36
|
+
# mzml was not indexed):
|
37
|
+
#
|
38
|
+
# mzml[22] # retrieve spectrum at index 22
|
39
|
+
#
|
40
|
+
# Writing an mzml file from scratch:
|
41
|
+
#
|
42
|
+
# spec1 = MS::Mzml::Spectrum.new('scan=1', params: ['MS:1000127', ['MS:1000511', 1]]) do |spec|
|
43
|
+
# spec.data_arrays = [[1,2,3], [4,5,6]]
|
44
|
+
# spec.scan_list = MS::Mzml::ScanList.new do |sl|
|
45
|
+
# scan = MS::Mzml::Scan.new do |scan|
|
46
|
+
# # retention time of 40 seconds
|
47
|
+
# scan.describe! ['MS:1000016', 40.0, 'UO:0000010']
|
48
|
+
# end
|
49
|
+
# sl << scan
|
50
|
+
# end
|
51
|
+
# end
|
52
|
+
#
|
53
|
+
# mzml = MS::Mzml.new do |mzml|
|
54
|
+
# mzml.id = 'the_little_example'
|
55
|
+
# mzml.cvs = MS::Mzml::CV::DEFAULT_CVS
|
56
|
+
# mzml.file_description = MS::Mzml::FileDescription.new do |fd|
|
57
|
+
# fd.file_content = MS::Mzml::FileContent.new
|
58
|
+
# fd.source_files << MS::Mzml::SourceFile.new
|
59
|
+
# end
|
60
|
+
# default_instrument_config = MS::Mzml::InstrumentConfiguration.new("IC",[], params: ['MS:1000031'])
|
61
|
+
# mzml.instrument_configurations << default_instrument_config
|
62
|
+
# software = MS::Mzml::Software.new
|
63
|
+
# mzml.software_list << software
|
64
|
+
# default_data_processing = MS::Mzml::DataProcessing.new("did_nothing")
|
65
|
+
# mzml.data_processing_list << default_data_processing
|
66
|
+
# mzml.run = MS::Mzml::Run.new("little_run", default_instrument_config) do |run|
|
67
|
+
# spectrum_list = MS::Mzml::SpectrumList.new(default_data_processing)
|
68
|
+
# spectrum_list.push(spec1)
|
69
|
+
# run.spectrum_list = spectrum_list
|
70
|
+
# end
|
71
|
+
# end
|
18
72
|
class Mzml
|
73
|
+
|
74
|
+
module Default
|
75
|
+
NAMESPACE = {
|
76
|
+
:xmlns => "http://psi.hupo.org/ms/mzml",
|
77
|
+
"xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
|
78
|
+
"xmlns:xsd" => "http://www.w3.org/2001/XMLSchema",
|
79
|
+
}
|
80
|
+
|
81
|
+
VERSION = '1.1.0'
|
82
|
+
end
|
83
|
+
|
84
|
+
###############################################
|
85
|
+
# ATTRIBUTES
|
86
|
+
###############################################
|
87
|
+
|
88
|
+
# (optional) an id for accessing from external files
|
89
|
+
attr_accessor :id
|
90
|
+
|
91
|
+
# (required) the Mzml document version
|
92
|
+
attr_accessor :version
|
93
|
+
|
94
|
+
# (optional) e.g. a PRIDE accession number
|
95
|
+
attr_accessor :accession
|
96
|
+
|
97
|
+
###############################################
|
98
|
+
# SUBELEMENTS
|
99
|
+
###############################################
|
100
|
+
|
101
|
+
# (required) an array of MS::Mzml::CV objects
|
102
|
+
attr_accessor :cvs
|
103
|
+
|
104
|
+
# (required) an MS::Mzml::FileDescription
|
105
|
+
attr_accessor :file_description
|
106
|
+
|
107
|
+
# (optional) an array of CV::ReferenceableParamGroup objects
|
108
|
+
attr_accessor :referenceable_param_groups
|
109
|
+
|
110
|
+
# (optional) an array of MS::Mzml::Sample objects
|
111
|
+
attr_accessor :samples
|
112
|
+
|
113
|
+
# (required) an array of MS::Mzml::Software objects
|
114
|
+
attr_accessor :software_list
|
115
|
+
|
116
|
+
# (optional) an array of MS::Mzml::ScanSettings objects
|
117
|
+
attr_accessor :scan_settings_list
|
118
|
+
|
119
|
+
# (required) an array of MS::Mzml::InstrumentConfiguration objects
|
120
|
+
attr_accessor :instrument_configurations
|
121
|
+
|
122
|
+
# (required) an array of MS::Mzml::DataProcessing objects
|
123
|
+
attr_accessor :data_processing_list
|
124
|
+
|
125
|
+
# (required) an MS::Mzml::Run object
|
126
|
+
attr_accessor :run
|
127
|
+
|
19
128
|
module Parser
|
20
129
|
NOBLANKS = ::Nokogiri::XML::ParseOptions::DEFAULT_XML | ::Nokogiri::XML::ParseOptions::NOBLANKS
|
21
130
|
end
|
22
131
|
include Enumerable
|
23
132
|
|
24
|
-
attr_accessor :filename
|
25
133
|
attr_accessor :io
|
26
134
|
attr_accessor :index_list
|
27
135
|
attr_accessor :encoding
|
28
136
|
|
137
|
+
# arg must be an IO object for automatic index and header parsing to
|
138
|
+
# occur. If arg is a hash, then attributes are set. In addition (or
|
139
|
+
# alternatively) a block called that yields self to setup the object.
|
140
|
+
#
|
29
141
|
# io must respond_to?(:size), giving the size of the io object in bytes
|
30
|
-
# which allows seeking.
|
142
|
+
# which allows seeking. get_index_list is called to get or create the
|
31
143
|
# index list.
|
32
|
-
def initialize(
|
33
|
-
|
34
|
-
|
35
|
-
|
144
|
+
def initialize(arg=nil, &block)
|
145
|
+
%w(cvs software_list instrument_configurations data_processing_list).each {|guy| self.send( guy + '=', [] ) }
|
146
|
+
|
147
|
+
case arg
|
148
|
+
when IO
|
149
|
+
@io = arg
|
150
|
+
@encoding = @io.bookmark(true) {|io| io.readline.match(/encoding=["'](.*?)["']/)[1] }
|
151
|
+
@index_list = get_index_list
|
152
|
+
# TODO: and read in 'header' info (everything until 'run'
|
153
|
+
when Hash
|
154
|
+
arg.each {|k,v| self.send("#{k}=", v) }
|
155
|
+
end
|
156
|
+
if block
|
157
|
+
block.call(self)
|
158
|
+
end
|
36
159
|
end
|
37
160
|
|
38
161
|
class << self
|
@@ -191,6 +314,50 @@ module MS
|
|
191
314
|
read_index_list || create_index_list
|
192
315
|
end
|
193
316
|
|
317
|
+
# Because mzml files are often very large, we try to avoid storing the
|
318
|
+
# entire object tree in memory before writing.
|
319
|
+
#
|
320
|
+
# takes a filename and uses builder to write to it
|
321
|
+
# if no filename is given, returns a string
|
322
|
+
def to_xml(filename=nil)
|
323
|
+
# TODO: support indexed mzml files
|
324
|
+
io = filename ? File.open(filename, 'w') : StringIO.new
|
325
|
+
xml = Builder::XmlMarkup.new(:target => io, :indent => 2)
|
326
|
+
xml.instruct!
|
327
|
+
|
328
|
+
mzml_atts = Default::NAMESPACE.dup
|
329
|
+
mzml_atts[:version] = @version || Default::VERSION
|
330
|
+
mzml_atts[:accession] = @accession if @accession
|
331
|
+
mzml_atts[:id] = @id if @id
|
332
|
+
|
333
|
+
xml.mzML(mzml_atts) do |mzml_n|
|
334
|
+
# the 'if' statements capture whether or not the list is required or not
|
335
|
+
raise "#{self.class}#cvs must have > 0 MS::Mzml::CV objects" unless @cvs.size > 0
|
336
|
+
MS::Mzml::CV.list_xml(@cvs, mzml_n)
|
337
|
+
@file_description.to_xml(mzml_n)
|
338
|
+
if @referenceable_param_groups
|
339
|
+
MS::Mzml::ReferenceableParamGroup.list_xml(@referenceable_param_groups, mzml_n)
|
340
|
+
end
|
341
|
+
if @samples
|
342
|
+
MS::Mzml::Sample.list_xml(@samples, mzml_n)
|
343
|
+
end
|
344
|
+
MS::Mzml::Software.list_xml(@software_list, mzml_n)
|
345
|
+
if @scan_settings_list && @scan_settings_list.size > 0
|
346
|
+
MS::Mzml::ScanSettings.list_xml(@scan_settings_list, mzml_n)
|
347
|
+
end
|
348
|
+
icl = MS::Mzml::InstrumentConfiguration.list_xml(@instrument_configurations, mzml_n)
|
349
|
+
MS::Mzml::DataProcessing.list_xml(@data_processing_list, mzml_n)
|
350
|
+
@run.to_xml(mzml_n)
|
351
|
+
end
|
352
|
+
|
353
|
+
if filename
|
354
|
+
io.close
|
355
|
+
self
|
356
|
+
else
|
357
|
+
io.string
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
194
361
|
class ScanNumbersNotUnique < Exception
|
195
362
|
end
|
196
363
|
class ScanNumbersNotFound < Exception
|
@@ -1,14 +1,14 @@
|
|
1
1
|
require 'ms/quant/protein_group_comparison'
|
2
2
|
|
3
|
-
module
|
3
|
+
module MS
|
4
4
|
module Quant
|
5
5
|
module ProteinGroupComparison
|
6
6
|
end
|
7
7
|
end
|
8
8
|
end
|
9
9
|
|
10
|
-
class
|
11
|
-
include
|
10
|
+
class MS::Quant::ProteinGroupComparison::Qspec
|
11
|
+
include MS::Quant::ProteinGroupComparison
|
12
12
|
|
13
13
|
attr_accessor :qspec_results_struct
|
14
14
|
|
data/lib/ms/quant/qspec.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
module
|
2
|
-
module
|
1
|
+
module MS ; end
|
2
|
+
module MS::Quant ; end
|
3
3
|
|
4
|
-
class
|
4
|
+
class MS::Quant::Qspec
|
5
5
|
|
6
6
|
# personal communication with Hyungwon Choi: "We typically use nburn=2000,
|
7
7
|
# niter=10000, which is quite sufficient to guarantee the reproducibility of
|
@@ -62,7 +62,7 @@ class Ms::Quant::Qspec
|
|
62
62
|
|
63
63
|
# writes a qspec formatted file to filename
|
64
64
|
def write(filename)
|
65
|
-
ints =
|
65
|
+
ints = MS::Quant::Qspec.conditions_to_ints(conditions)
|
66
66
|
header_cats = INIT_HEADER + ints
|
67
67
|
rows = @protname_length_pairs.map {|pair| pair.map.to_a }
|
68
68
|
@condition_to_count_array.each do |cond,counts|
|
data/lib/ms/spectrum.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'ms/spectrum_like'
|
1
2
|
require 'bsearch'
|
2
3
|
require 'bin'
|
3
4
|
require 'ms/peak'
|
@@ -6,7 +7,7 @@ module MS
|
|
6
7
|
# note that a point is an [m/z, intensity] doublet.
|
7
8
|
# A peak is considered a related string of points
|
8
9
|
class Spectrum
|
9
|
-
include
|
10
|
+
include MS::SpectrumLike
|
10
11
|
|
11
12
|
DEFAULT_MERGE = {
|
12
13
|
:bin_width => 5,
|
@@ -16,284 +17,160 @@ module MS
|
|
16
17
|
:split => :share
|
17
18
|
}
|
18
19
|
|
19
|
-
|
20
|
-
# spectra are centroided (just checks the first one and assumes the others
|
21
|
-
# are the same) then it will bin the points (bin width determined by
|
22
|
-
# opts[:resolution]) and then segment according to monotonicity (sharing
|
23
|
-
# intensity between abutting points). The final m/z is the weighted
|
24
|
-
# averaged of all the m/z's in each peak. Valid opts (with default listed
|
25
|
-
# first):
|
26
|
-
#
|
27
|
-
# :bin_width => 5
|
28
|
-
# :bin_unit => :ppm | :amu interpret bin_width as ppm or amu
|
29
|
-
# :bins => array of Bin objects for custom bins (overides other bin options)
|
30
|
-
# :normalize => false if true, divides total intensity by
|
31
|
-
# number of spectra
|
32
|
-
# :return_data => false returns a parallel array containing
|
33
|
-
# the peaks associated with each returned point
|
34
|
-
# :split => :share | :greedy_y see MS::Peak#split
|
35
|
-
#
|
36
|
-
# The binning algorithm is the fastest possible algorithm that would allow
|
37
|
-
# for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))
|
38
|
-
def self.merge(spectra, opts={})
|
39
|
-
opt = DEFAULT_MERGE.merge(opts)
|
40
|
-
(spectrum, returned_data) =
|
41
|
-
if spectra.first.centroided?
|
42
|
-
# find the min and max across all spectra
|
43
|
-
first_mzs = spectra.first.mzs
|
44
|
-
min = first_mzs.first ; max = first_mzs.last
|
45
|
-
spectra.each do |spectrum|
|
46
|
-
mzs = spectrum.mzs
|
47
|
-
min = mzs.first if mzs.first < min
|
48
|
-
max = mzs.last if mzs.last > max
|
49
|
-
end
|
50
|
-
|
51
|
-
# Create Bin objects
|
52
|
-
bins =
|
53
|
-
if opt[:bins]
|
54
|
-
opt[:bins]
|
55
|
-
else
|
56
|
-
divisions = []
|
57
|
-
bin_width = opt[:bin_width]
|
58
|
-
use_ppm = (opt[:bin_unit] == :ppm)
|
59
|
-
current_mz = min
|
60
|
-
loop do
|
61
|
-
if current_mz >= max
|
62
|
-
divisions << max
|
63
|
-
break
|
64
|
-
else
|
65
|
-
divisions << current_mz
|
66
|
-
current_mz += ( use_ppm ? current_mz./(1e6).*(bin_width) : bin_width )
|
67
|
-
end
|
68
|
-
end
|
69
|
-
# make each bin exclusive so there is no overlap
|
70
|
-
bins = divisions.each_cons(2).map {|pair| Bin.new(*pair, true) }
|
71
|
-
# make the last bin *inclusive* of the terminating value
|
72
|
-
bins[-1] = Bin.new(bins.last.begin, bins.last.end)
|
73
|
-
bins
|
74
|
-
end
|
75
|
-
|
76
|
-
spectra.each do |spectrum|
|
77
|
-
Bin.bin(bins, spectrum.points, &:first)
|
78
|
-
end
|
79
|
-
|
80
|
-
pseudo_points = bins.map do |bin|
|
81
|
-
#int = bin.data.reduce(0.0) {|sum,point| sum + point.last }.round(3) # <- just for info:
|
82
|
-
[bin, bin.data.reduce(0.0) {|sum,point| sum + point.last }]
|
83
|
-
end
|
20
|
+
class << self
|
84
21
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
22
|
+
def from_points(ar_of_doublets)
|
23
|
+
_mzs = []
|
24
|
+
_ints = []
|
25
|
+
ar_of_doublets.each do |mz, int|
|
26
|
+
_mzs << mz
|
27
|
+
_ints << int
|
28
|
+
end
|
29
|
+
self.new([_mzs, _ints])
|
30
|
+
end
|
93
31
|
|
94
|
-
#File.write("file_#{opt[:bin_width]}_to_plot.txt", [p_mzs, p_ints, p_num_points].map {|ar| ar.join(' ') }.join("\n"))
|
95
|
-
#abort 'here'
|
96
32
|
|
33
|
+
# returns a new spectrum which has been merged with the others. If the
|
34
|
+
# spectra are centroided (just checks the first one and assumes the others
|
35
|
+
# are the same) then it will bin the points (bin width determined by
|
36
|
+
# opts[:resolution]) and then segment according to monotonicity (sharing
|
37
|
+
# intensity between abutting points). The final m/z is the weighted
|
38
|
+
# averaged of all the m/z's in each peak. Valid opts (with default listed
|
39
|
+
# first):
|
40
|
+
#
|
41
|
+
# :bin_width => 5
|
42
|
+
# :bin_unit => :ppm | :amu interpret bin_width as ppm or amu
|
43
|
+
# :bins => array of Bin objects for custom bins (overides other bin options)
|
44
|
+
# :normalize => false if true, divides total intensity by
|
45
|
+
# number of spectra
|
46
|
+
# :return_data => false returns a parallel array containing
|
47
|
+
# the peaks associated with each returned point
|
48
|
+
# :split => :share | :greedy_y see MS::Peak#split
|
49
|
+
#
|
50
|
+
# The binning algorithm is the fastest possible algorithm that would allow
|
51
|
+
# for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))
|
52
|
+
def merge(spectra, opts={})
|
53
|
+
opt = DEFAULT_MERGE.merge(opts)
|
54
|
+
(spectrum, returned_data) =
|
55
|
+
if spectra.first.centroided?
|
56
|
+
# find the min and max across all spectra
|
57
|
+
first_mzs = spectra.first.mzs
|
58
|
+
min = first_mzs.first ; max = first_mzs.last
|
59
|
+
spectra.each do |spectrum|
|
60
|
+
mzs = spectrum.mzs
|
61
|
+
min = mzs.first if mzs.first < min
|
62
|
+
max = mzs.last if mzs.last > max
|
63
|
+
end
|
97
64
|
|
98
|
-
|
65
|
+
# Create Bin objects
|
66
|
+
bins =
|
67
|
+
if opt[:bins]
|
68
|
+
opt[:bins]
|
69
|
+
else
|
70
|
+
divisions = []
|
71
|
+
bin_width = opt[:bin_width]
|
72
|
+
use_ppm = (opt[:bin_unit] == :ppm)
|
73
|
+
current_mz = min
|
74
|
+
loop do
|
75
|
+
if current_mz >= max
|
76
|
+
divisions << max
|
77
|
+
break
|
78
|
+
else
|
79
|
+
divisions << current_mz
|
80
|
+
current_mz += ( use_ppm ? current_mz./(1e6).*(bin_width) : bin_width )
|
81
|
+
end
|
82
|
+
end
|
83
|
+
# make each bin exclusive so there is no overlap
|
84
|
+
bins = divisions.each_cons(2).map {|pair| Bin.new(*pair, true) }
|
85
|
+
# make the last bin *inclusive* of the terminating value
|
86
|
+
bins[-1] = Bin.new(bins.last.begin, bins.last.end)
|
87
|
+
bins
|
88
|
+
end
|
99
89
|
|
100
|
-
|
101
|
-
|
90
|
+
spectra.each do |spectrum|
|
91
|
+
Bin.bin(bins, spectrum.points, &:first)
|
92
|
+
end
|
102
93
|
|
103
|
-
|
104
|
-
|
105
|
-
|
94
|
+
pseudo_points = bins.map do |bin|
|
95
|
+
#int = bin.data.reduce(0.0) {|sum,point| sum + point.last }.round(3) # <- just for info:
|
96
|
+
[bin, bin.data.reduce(0.0) {|sum,point| sum + point.last }]
|
97
|
+
end
|
106
98
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
99
|
+
#p_mzs = []
|
100
|
+
#p_ints = []
|
101
|
+
#p_num_points = []
|
102
|
+
#pseudo_points.each do |psp|
|
103
|
+
# p_mzs << ((psp.first.begin + psp.first.end)/2)
|
104
|
+
# p_ints << psp.last
|
105
|
+
# p_num_points << psp.first.data.size
|
106
|
+
#end
|
107
|
+
|
108
|
+
#File.write("file_#{opt[:bin_width]}_to_plot.txt", [p_mzs, p_ints, p_num_points].map {|ar| ar.join(' ') }.join("\n"))
|
109
|
+
#abort 'here'
|
110
|
+
|
111
|
+
|
112
|
+
peaks = MS::Peak.new(pseudo_points).split(opt[:split])
|
113
|
+
|
114
|
+
return_data = []
|
115
|
+
_mzs = [] ; _ints = []
|
116
|
+
|
117
|
+
#p peaks[97]
|
118
|
+
#puts "HIYA"
|
119
|
+
#abort 'here'
|
120
|
+
|
121
|
+
peaks.each_with_index do |peak,i|
|
122
|
+
#peaks.each do |peak|
|
123
|
+
tot_intensity = peak.map(&:last).reduce(:+)
|
124
|
+
return_data_per_peak = [] if opt[:return_data]
|
125
|
+
weighted_mz = 0.0
|
126
|
+
peak.each do |point|
|
127
|
+
pre_scaled_intensity = point[0].data.reduce(0.0) {|sum,v| sum + v.last }
|
128
|
+
post_scaled_intensity = point[1]
|
129
|
+
# some peaks may have been shared. In this case the intensity
|
130
|
+
# for that peak was downweighted. However, the actually data
|
131
|
+
# composing that peak is not altered when the intensity is
|
132
|
+
# shared. So, to calculate a proper weighted avg we need to
|
133
|
+
# downweight the intensity of any data point found within a bin
|
134
|
+
# whose intensity was scaled.
|
135
|
+
correction_factor =
|
136
|
+
if pre_scaled_intensity != post_scaled_intensity
|
137
|
+
post_scaled_intensity / pre_scaled_intensity
|
138
|
+
else
|
139
|
+
1.0
|
140
|
+
end
|
141
|
+
|
142
|
+
return_data_per_peak.push(*point[0].data) if opt[:return_data]
|
143
|
+
|
144
|
+
point[0].data.each do |lil_point|
|
145
|
+
weighted_mz += lil_point[0] * ( (lil_point[1].to_f * correction_factor) / tot_intensity)
|
126
146
|
end
|
127
|
-
|
128
|
-
return_data_per_peak.push(*point[0].data) if opt[:return_data]
|
129
|
-
|
130
|
-
point[0].data.each do |lil_point|
|
131
|
-
weighted_mz += lil_point[0] * ( (lil_point[1].to_f * correction_factor) / tot_intensity)
|
132
147
|
end
|
148
|
+
return_data << return_data_per_peak if opt[:return_data]
|
149
|
+
_mzs << weighted_mz
|
150
|
+
_ints << tot_intensity
|
133
151
|
end
|
134
|
-
|
135
|
-
|
136
|
-
|
152
|
+
[Spectrum.new([_mzs, _ints]), return_data]
|
153
|
+
else
|
154
|
+
raise NotImplementedError, "the way to do this is interpolate the profile evenly and sum"
|
137
155
|
end
|
138
|
-
[Spectrum.new([_mzs, _ints]), return_data]
|
139
|
-
else
|
140
|
-
raise NotImplementedError, "the way to do this is interpolate the profile evenly and sum"
|
141
|
-
end
|
142
|
-
|
143
|
-
if opt[:normalize]
|
144
|
-
sz = spectra.size
|
145
|
-
spectrum.data[1].map! {|v| v.to_f / sz }
|
146
|
-
end
|
147
|
-
if opt[:return_data]
|
148
|
-
$stderr.puts "returning spectrum (#{spectrum.mzs.size}) and data" if $VERBOSE
|
149
|
-
[spectrum, return_data]
|
150
|
-
else
|
151
|
-
$stderr.puts "returning spectrum (#{spectrum.mzs.size})" if $VERBOSE
|
152
|
-
spectrum
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
|
-
|
157
|
-
# boolean for if the spectrum represents centroided data or not
|
158
|
-
attr_accessor :centroided
|
159
|
-
|
160
|
-
def centroided?() centroided end
|
161
|
-
|
162
|
-
# The underlying data store. methods are implemented so that data[0] is
|
163
|
-
# the m/z's and data[1] is intensities
|
164
|
-
attr_reader :data
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
# data takes an array: [mzs, intensities]
|
170
|
-
# @return [MS::Spectrum]
|
171
|
-
# @param [Array] data two element array of mzs and intensities
|
172
|
-
def initialize(data, centroided=true)
|
173
|
-
@data = data
|
174
|
-
@centroided = centroided
|
175
|
-
end
|
176
|
-
|
177
|
-
def self.from_points(ar_of_doublets)
|
178
|
-
_mzs = []
|
179
|
-
_ints = []
|
180
|
-
ar_of_doublets.each do |mz, int|
|
181
|
-
_mzs << mz
|
182
|
-
_ints << int
|
183
|
-
end
|
184
|
-
self.new([_mzs, _ints])
|
185
|
-
end
|
186
|
-
|
187
|
-
# found by querying the size of the data store. This should almost always
|
188
|
-
# be 2 (m/z and intensities)
|
189
|
-
def size
|
190
|
-
@data.size
|
191
|
-
end
|
192
|
-
|
193
|
-
def ==(other)
|
194
|
-
mzs == other.mzs && intensities == other.intensities
|
195
|
-
end
|
196
|
-
|
197
|
-
# An array of the mz data.
|
198
|
-
def mzs
|
199
|
-
@data[0]
|
200
|
-
end
|
201
|
-
|
202
|
-
# An array of the intensities data, corresponding to mzs.
|
203
|
-
def intensities
|
204
|
-
@data[1]
|
205
|
-
end
|
206
|
-
|
207
|
-
def mzs_and_intensities
|
208
|
-
[@data[0], @data[1]]
|
209
|
-
end
|
210
|
-
|
211
|
-
# retrieve an m/z and intensity doublet at that index
|
212
|
-
def [](array_index)
|
213
|
-
[@data[0][array_index], @data[1][array_index]]
|
214
|
-
end
|
215
|
-
|
216
|
-
# yields(mz, inten) across the spectrum, or array of doublets if no block
|
217
|
-
def points(&block)
|
218
|
-
@data[0].zip(@data[1], &block)
|
219
|
-
end
|
220
|
-
|
221
|
-
alias_method :each, :points
|
222
|
-
alias_method :each_point, :points
|
223
|
-
|
224
|
-
# if the mzs and intensities are the same then the spectra are considered
|
225
|
-
# equal
|
226
|
-
def ==(other)
|
227
|
-
mzs == other.mzs && intensities == other.intensities
|
228
|
-
end
|
229
|
-
|
230
|
-
# returns a new spectrum whose intensities have been normalized by the tic
|
231
|
-
# of another given value
|
232
|
-
def normalize(norm_by=:tic)
|
233
|
-
norm_by = tic if norm_by == :tic
|
234
|
-
MS::Spectrum.new([self.mzs, self.intensities.map {|v| v / norm_by }])
|
235
|
-
end
|
236
|
-
|
237
|
-
def tic
|
238
|
-
self.intensities.reduce(:+)
|
239
|
-
end
|
240
|
-
|
241
|
-
# ensures that the m/z values are monotonically ascending (some
|
242
|
-
# instruments are bad about this)
|
243
|
-
# returns self
|
244
|
-
def sort!
|
245
|
-
_points = points.to_a
|
246
|
-
_points.sort!
|
247
|
-
_points.each_with_index {|(mz,int), i| @data[0][i] = mz ; @data[1][i] = int }
|
248
|
-
self
|
249
|
-
end
|
250
|
-
|
251
|
-
# returns the m/z that is closest to the value, favoring the lower m/z in
|
252
|
-
# the case of a tie. Uses a binary search.
|
253
|
-
def find_nearest(val)
|
254
|
-
mzs[find_nearest_index(val)]
|
255
|
-
end
|
256
|
-
|
257
|
-
# same as find_nearest but returns the index of the point
|
258
|
-
def find_nearest_index(val)
|
259
|
-
find_all_nearest_index(val).first
|
260
|
-
end
|
261
156
|
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
[
|
267
|
-
|
268
|
-
|
269
|
-
if index == 0
|
270
|
-
[index]
|
157
|
+
if opt[:normalize]
|
158
|
+
sz = spectra.size
|
159
|
+
spectrum.data[1].map! {|v| v.to_f / sz }
|
160
|
+
end
|
161
|
+
if opt[:return_data]
|
162
|
+
$stderr.puts "returning spectrum (#{spectrum.mzs.size}) and data" if $VERBOSE
|
163
|
+
[spectrum, return_data]
|
271
164
|
else
|
272
|
-
|
273
|
-
|
274
|
-
[index-1]
|
275
|
-
when 0
|
276
|
-
[index-1, index]
|
277
|
-
when 1
|
278
|
-
[index]
|
279
|
-
end
|
165
|
+
$stderr.puts "returning spectrum (#{spectrum.mzs.size})" if $VERBOSE
|
166
|
+
spectrum
|
280
167
|
end
|
281
168
|
end
|
282
|
-
end
|
283
|
-
|
284
|
-
def find_all_nearest(val)
|
285
|
-
find_all_nearest_index(val).map {|i| mzs[i] }
|
286
|
-
end
|
287
169
|
|
288
|
-
# uses MS::Spectrum.merge
|
289
|
-
def merge(other_spectra, opts={})
|
290
|
-
MS::Spectrum.merge([self, *other_spectra], opts)
|
291
170
|
end
|
292
|
-
|
293
|
-
|
294
171
|
end
|
295
172
|
end
|
296
173
|
|
297
174
|
|
298
|
-
|
175
|
+
|
299
176
|
|