mspire-lipidomics 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/LICENSE +21 -0
- data/README.rdoc +18 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/bin/lipidomic-search.rb +199 -0
- data/lib/mspire/lipid/ion/fragment.rb +68 -0
- data/lib/mspire/lipid/ion.rb +57 -0
- data/lib/mspire/lipid/modification.rb +125 -0
- data/lib/mspire/lipid/search/bin.rb +79 -0
- data/lib/mspire/lipid/search/db_isobar_group.rb +20 -0
- data/lib/mspire/lipid/search/hit.rb +79 -0
- data/lib/mspire/lipid/search/probability_distribution.rb +50 -0
- data/lib/mspire/lipid/search/query.rb +23 -0
- data/lib/mspire/lipid/search.rb +205 -0
- data/lib/mspire/lipid.rb +19 -0
- data/lib/mspire/lipid_maps.rb +87 -0
- data/mspire-lipidomics.gemspec +85 -0
- data/scratch/OBConversion_methods.txt +47 -0
- data/scratch/atom_methods.txt +145 -0
- data/scratch/bond_methods.txt +867 -0
- data/scratch/mol_methods.txt +183 -0
- data/scratch/split_molecules.rb +93 -0
- data/script/find_nearest_lipid.rb +114 -0
- data/spec/mspire/lipid/ion_spec.rb +83 -0
- data/spec/mspire/lipid/modification_spec.rb +41 -0
- data/spec/mspire/lipid/search_spec.rb +79 -0
- data/spec/mspire/lipid_maps_spec.rb +64 -0
- data/spec/mspire/lipid_spec.rb +16 -0
- data/spec/spec_helper.rb +13 -0
- data/spec/testfiles/lipidmaps_download.tsv +11 -0
- data/spec/testfiles/lipidmaps_programmatic_short.tsv +32 -0
- data/spec/testfiles/lipidmaps_sd_download.tsv +11 -0
- metadata +162 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2012 Brigham Young University
|
2
|
+
authored by: John T. Prince
|
3
|
+
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
a copy of this software and associated documentation files (the
|
6
|
+
"Software"), to deal in the Software without restriction, including
|
7
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
= mspire-lipidomics
|
2
|
+
|
3
|
+
Identify and quantify shotgun lipidomics samples.
|
4
|
+
|
5
|
+
== Examples
|
6
|
+
|
7
|
+
Use the commandline: lipidomic-search.rb
|
8
|
+
|
9
|
+
== Install
|
10
|
+
|
11
|
+
gem install mspire-lipidomics
|
12
|
+
|
13
|
+
If you want to do fragmentation prediction or work with molecules you need to
|
14
|
+
install the rubabel gem (which requires openbabel with ruby bindings).
|
15
|
+
|
16
|
+
== License
|
17
|
+
|
18
|
+
MIT (See LICENSE)
|
data/Rakefile
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rake'
|
5
|
+
|
6
|
+
require 'jeweler'
|
7
|
+
Jeweler::Tasks.new do |gem|
|
8
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
9
|
+
gem.name = "mspire-lipidomics"
|
10
|
+
gem.homepage = "http://github.com/princelab/mspire-lipidomics"
|
11
|
+
gem.license = "MIT"
|
12
|
+
gem.summary = %Q{mass spectrometry based lipidomics - especially shotgun lipidomics}
|
13
|
+
gem.description = %Q{does lipidomics}
|
14
|
+
gem.email = "jtprince@gmail.com"
|
15
|
+
gem.authors = ["John T. Prince"]
|
16
|
+
gem.add_dependency "mspire", ">= 0.7.8"
|
17
|
+
gem.add_development_dependency "rubabel", ">= 0.1.0"
|
18
|
+
gem.add_development_dependency "rspec", "~> 2.3.0"
|
19
|
+
gem.add_development_dependency "jeweler", "~> 1.6.4"
|
20
|
+
gem.add_development_dependency "rcov", ">= 0"
|
21
|
+
end
|
22
|
+
Jeweler::RubygemsDotOrgTasks.new
|
23
|
+
|
24
|
+
require 'rspec/core'
|
25
|
+
require 'rspec/core/rake_task'
|
26
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
27
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
28
|
+
end
|
29
|
+
|
30
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
31
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
32
|
+
spec.rcov = true
|
33
|
+
end
|
34
|
+
|
35
|
+
task :default => :spec
|
36
|
+
|
37
|
+
require 'rdoc/task'
|
38
|
+
Rake::RDocTask.new do |rdoc|
|
39
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
+
|
41
|
+
rdoc.rdoc_dir = 'rdoc'
|
42
|
+
rdoc.title = "ms-lipidomics #{version}"
|
43
|
+
rdoc.rdoc_files.include('README*')
|
44
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.4
|
@@ -0,0 +1,199 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'trollop'
|
4
|
+
require 'ms/mzml'
|
5
|
+
require 'ms/lipid/search'
|
6
|
+
require 'ms/lipid/ion'
|
7
|
+
require 'ms/lipid/search/query'
|
8
|
+
require 'ms/lipid_maps'
|
9
|
+
require 'ms/error_rate/qvalue'
|
10
|
+
|
11
|
+
# for html output: (just make the id clickable)
|
12
|
+
LIPIDMAPS_SEARCH = "http://www.lipidmaps.org/data/LMSDRecord.php?LMID="
|
13
|
+
|
14
|
+
DECOY_MODULATOR = 0.8319
|
15
|
+
|
16
|
+
DEFAULTS = {
|
17
|
+
:bin_width => 5,
|
18
|
+
:bin_unit => :ppm,
|
19
|
+
:search_unit => :ppm,
|
20
|
+
}
|
21
|
+
|
22
|
+
def LipidPoint < Array
|
23
|
+
attr_accessor :sample
|
24
|
+
end
|
25
|
+
|
26
|
+
class Sample
|
27
|
+
attr_accessor :file
|
28
|
+
attr_accessor :spectrum
|
29
|
+
def initialize(file, merge_opts={})
|
30
|
+
@file = file
|
31
|
+
@spectrum = merge_ms1_spectra(file, DEFAULTS.merge(merge_opts))
|
32
|
+
end
|
33
|
+
|
34
|
+
# returns a single spectrum object
|
35
|
+
def self.merge_ms1_spectra(files, opts)
|
36
|
+
files.map do |file|
|
37
|
+
MS::Mzml.foreach(file).select {|spec| spec.ms_level == 1 }.map(&:sort!)
|
38
|
+
end
|
39
|
+
MS::Spectrum.merge(spectra, opts)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
ext = ".lipidID.tsv"
|
44
|
+
|
45
|
+
parser = Trollop::Parser.new do
|
46
|
+
banner "usage: #{File.basename(__FILE__)} [OPTIONS] <lipidmaps>.tsv <file>.mzML ..."
|
47
|
+
text "output: <file>#{ext} ..."
|
48
|
+
text ""
|
49
|
+
text "note that sometimes you get an error from R like this:"
|
50
|
+
text "(`eval': voidEval failed: Packet[cmd=2130771970,len=<nil>, con='<nil>', status=error...)"
|
51
|
+
text "just re-run it and it will work"
|
52
|
+
text ""
|
53
|
+
opt :bin_width, "width of the bins for merging", :default => DEFAULTS[:bin_width]
|
54
|
+
opt :bin_unit, "units for binning (ppm or amu)", :default => DEFAULTS[:bin_unit].to_s
|
55
|
+
opt :search_unit, "unit for searching nearest hit (ppm or amu)", :default => DEFAULTS[:search_unit].to_s
|
56
|
+
opt :top_n_peaks, "the number of highest intensity peaks to query the DB with", :default => 1000
|
57
|
+
opt :display_n, "the number of best hits to display", :default => 20
|
58
|
+
text ""
|
59
|
+
text "modifications (at least 1 charged mod is required):"
|
60
|
+
opt :lithium, "search for lithium adducts"
|
61
|
+
opt :ammonium, "search for ammonium adducts"
|
62
|
+
opt :proton_gain, "search for proton gain"
|
63
|
+
opt :proton_loss, "search for proton loss"
|
64
|
+
opt :water_loss, "*all* mods are also considered with water loss"
|
65
|
+
opt :decoy, "search with an equal number of decoy modifications"
|
66
|
+
opt :verbose, "talk about it"
|
67
|
+
end
|
68
|
+
|
69
|
+
opts = parser.parse(ARGV)
|
70
|
+
opts[:bin_unit] = opts[:bin_unit].to_sym
|
71
|
+
opts[:search_unit] = opts[:search_unit].to_sym
|
72
|
+
|
73
|
+
if ARGV.size < 2
|
74
|
+
parser.educate
|
75
|
+
exit
|
76
|
+
end
|
77
|
+
|
78
|
+
CHARGED_MODS = [:lithium, :ammonium, :proton_gain, :proton_loss]
|
79
|
+
|
80
|
+
unless CHARGED_MODS.any? {|key| opts[key] }
|
81
|
+
puts "*" * 78
|
82
|
+
puts "ArgumentError: need at least one charged mod!"
|
83
|
+
puts "*" * 78
|
84
|
+
parser.educate
|
85
|
+
exit
|
86
|
+
end
|
87
|
+
|
88
|
+
(lipidmaps, *files) = ARGV
|
89
|
+
|
90
|
+
$VERBOSE = opts[:verbose]
|
91
|
+
|
92
|
+
MSLM = MS::Lipid::Modification
|
93
|
+
|
94
|
+
mods = {
|
95
|
+
proton_gain: MSLM.new(:proton),
|
96
|
+
water_loss: MSLM.new(:water, :loss => true),
|
97
|
+
lithium: MSLM.new(:lithium),
|
98
|
+
ammonium: MSLM.new(:ammonium),
|
99
|
+
proton_loss: MS::Lipid::Modification.new(:proton, :loss => true, :charge => -1)
|
100
|
+
}
|
101
|
+
|
102
|
+
lipids = MS::LipidMaps.parse_file(lipidmaps)
|
103
|
+
|
104
|
+
|
105
|
+
ions = []
|
106
|
+
lipids.each do |lipid|
|
107
|
+
CHARGED_MODS.each do |key|
|
108
|
+
if opts[key]
|
109
|
+
ions << MS::Lipid::Ion.new(lipid, [mods[key]])
|
110
|
+
if opts[:water_loss]
|
111
|
+
ions << MS::Lipid::Ion.new(lipid, [mods[key], mods[:water_loss]])
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
searcher = MS::Lipid::Search.new(ions, :ppm => (opts[:search_unit] == :ppm))
|
119
|
+
|
120
|
+
if opts[:decoy]
|
121
|
+
# assumes a mod group that is either the mod or a mod and water loss
|
122
|
+
decoy_ions = ions.map do |ion|
|
123
|
+
# modify the first mod and leave the second untouched (if any)
|
124
|
+
mod_group = ion.modifications
|
125
|
+
fake_mod = mod_group.first.dup
|
126
|
+
fake_mod.massdiff *= DECOY_MODULATOR
|
127
|
+
fake_mod.formula = "FAKE#{mod_group.first.formula}(#{fake_mod.massdiff})"
|
128
|
+
fake_mod.name = "fake_#{mod_group.first.name}".to_sym
|
129
|
+
new_mod_group = [fake_mod, *mod_group[1..-1]]
|
130
|
+
MS::Lipid::Ion.new(ion.lipid, new_mod_group)
|
131
|
+
end
|
132
|
+
decoy_searcher = MS::Lipid::Search.new(decoy_ions, :ppm => (opts[:search_unit] == :ppm))
|
133
|
+
end
|
134
|
+
|
135
|
+
files.each do |file|
|
136
|
+
base = file.chomp(File.extname(file))
|
137
|
+
puts "processing file: #{file}" if $VERBOSE
|
138
|
+
sample = Sample.new(file, opts)
|
139
|
+
|
140
|
+
num_points = sample.spectrum.mzs.size
|
141
|
+
puts "#{num_points} merged peaks in #{file}" if $VERBOSE
|
142
|
+
|
143
|
+
highest_points = sample.spectrum.points.sort_by(&:last).reverse[0,opts[:top_n_peaks]].sort
|
144
|
+
|
145
|
+
sample.spectrum = MS::Spectrum.from_points( highest_points )
|
146
|
+
|
147
|
+
queries = sample.spectrum.mzs.each_with_index.map {|mz,index| MS::Lipid::Search::Query.new(mz, index) }
|
148
|
+
hit_groups = searcher.search(queries, :return_order => :sorted)
|
149
|
+
if opts[:decoy]
|
150
|
+
decoy_hit_groups = decoy_searcher.search(queries, :return_order => :sorted)
|
151
|
+
hit_group_qvalue_pairs = MS::ErrorRate::Qvalue.target_decoy_qvalues(hit_groups, decoy_hit_groups, :monotonic => true, &:pvalue)
|
152
|
+
hit_group_qvalue_pairs.each do |hit_group, qval|
|
153
|
+
hit_group.first.decoy_qvalue = qval
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# all info is relative to the hit_group
|
158
|
+
info = {
|
159
|
+
decoy_qvalue: :decoy_qvalue.to_proc,
|
160
|
+
qvalue: :qvalue.to_proc,
|
161
|
+
pvalue: :pvalue.to_proc,
|
162
|
+
observed_mz: :observed_mz.to_proc,
|
163
|
+
theoretical_mz: :theoretical_mz.to_proc,
|
164
|
+
delta: :delta.to_proc,
|
165
|
+
ppm: :ppm.to_proc,
|
166
|
+
hit2_ppm: proc {|hg| hg[1].ppm },
|
167
|
+
first_isobar_name: proc {|hg| (lipid=hg.first.db_isobar_group.first.lipid).common_name || lipid.systematic_name },
|
168
|
+
num_isobars: proc {|hg| hg.first.db_isobar_group.size },
|
169
|
+
ions: proc {|hg|
|
170
|
+
hg.first.db_isobar_group.map do |ion|
|
171
|
+
[ion.lipid.lm_id, ion.modifications.map do |mod|
|
172
|
+
(mod.gain? ? '+' : '-') + "(#{mod.charged_formula})"
|
173
|
+
end.join
|
174
|
+
].join(":")
|
175
|
+
end.join(' ')
|
176
|
+
}
|
177
|
+
}
|
178
|
+
|
179
|
+
output = base + ext
|
180
|
+
puts "writing to #{output}" if $VERBOSE
|
181
|
+
File.open(output, 'w') do |out|
|
182
|
+
out.puts info.keys.join("\t")
|
183
|
+
hit_groups[0,opts[:display_n]].each do |hit_group|
|
184
|
+
out.puts info.values.map {|prc| prc.call(hit_group) }.join("\t")
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
if opts[:decoy]
|
189
|
+
decoy_output = base + '.decoy' + ext
|
190
|
+
File.open(decoy_output, 'w') do |dout|
|
191
|
+
decoy_info = info.dup
|
192
|
+
[:qvalue, :decoy_qvalue].each {|key| decoy_info.delete(key) }
|
193
|
+
dout.puts decoy_info.keys.join("\t")
|
194
|
+
decoy_hit_groups[0,opts[:display_n]].each do |hit_group|
|
195
|
+
dout.puts decoy_info.values.map {|prc| prc.call(hit_group) }.join("\t")
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
|
2
|
+
module Mspire
|
3
|
+
class Lipid
|
4
|
+
|
5
|
+
# goes from 1 to 99
|
6
|
+
CHAIN_PREFIXES = {
|
7
|
+
'meth' => 1,
|
8
|
+
'eth' => 2,
|
9
|
+
'prop' => 3,
|
10
|
+
'but' => 4,
|
11
|
+
'pent' => 5,
|
12
|
+
'hex' => 6,
|
13
|
+
'hept' => 7,
|
14
|
+
'oct' => 8,
|
15
|
+
'non' => 9,
|
16
|
+
'dec' => 10,
|
17
|
+
'undec' => 11,
|
18
|
+
'dodec' => 12,
|
19
|
+
'tridec' => 13,
|
20
|
+
'tetradec' => 14,
|
21
|
+
'pentadec' => 15,
|
22
|
+
'hexadec' => 16,
|
23
|
+
'heptadec' => 17,
|
24
|
+
'octadec' => 18,
|
25
|
+
'nonadec' => 19,
|
26
|
+
'eicos' => 20,
|
27
|
+
'heneicos' => 21,
|
28
|
+
'docos' => 22,
|
29
|
+
'tricos' => 23,
|
30
|
+
'tetracos' => 24,
|
31
|
+
'pentacos' => 25,
|
32
|
+
'hexacos' => 26,
|
33
|
+
'heptacos' => 27,
|
34
|
+
'octacos' => 28,
|
35
|
+
'nonacos' => 29
|
36
|
+
}
|
37
|
+
|
38
|
+
consistent = {
|
39
|
+
0 => '',
|
40
|
+
1 => 'hen',
|
41
|
+
2 => 'do',
|
42
|
+
3 => 'tri',
|
43
|
+
4 => 'tetra',
|
44
|
+
5 => 'penta',
|
45
|
+
6 => 'hexa',
|
46
|
+
7 => 'hepta',
|
47
|
+
8 => 'octa',
|
48
|
+
9 => 'nona',
|
49
|
+
}
|
50
|
+
|
51
|
+
(3..9).each do |tens_place|
|
52
|
+
(0..9).each do |ones_place|
|
53
|
+
key = consistent[ones_place] + consistent[tens_place] + "cont"
|
54
|
+
CHAIN_PREFIXES[key] = 10*tens_place + ones_place
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
class Ion
|
59
|
+
module Fragment
|
60
|
+
# predicts the MS/MS fragments for this ion
|
61
|
+
def predict_fragment_mzs
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
include Fragment
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'mspire/lipid/ion/fragment'
|
2
|
+
|
3
|
+
module Mspire
|
4
|
+
class Lipid
|
5
|
+
# a lipid with modifications (typically the mods give it a charge so that
|
6
|
+
# it can be seen in the mass spec)
|
7
|
+
class Ion
|
8
|
+
# an Mspire::Lipid object
|
9
|
+
attr_accessor :lipid
|
10
|
+
# an Mspire::Lipid::Modifications object
|
11
|
+
attr_accessor :modifications
|
12
|
+
# the key attribute of a query
|
13
|
+
|
14
|
+
def initialize(lipid, mods=[])
|
15
|
+
@lipid = lipid
|
16
|
+
@modifications = mods
|
17
|
+
@mz = nil
|
18
|
+
end
|
19
|
+
|
20
|
+
def charge
|
21
|
+
z = 0
|
22
|
+
@modifications.each do |mod|
|
23
|
+
z -= mod.charge
|
24
|
+
end
|
25
|
+
z
|
26
|
+
end
|
27
|
+
|
28
|
+
def formula
|
29
|
+
formula = lipid.formula.dup
|
30
|
+
modifications.each do |mod|
|
31
|
+
formula += mod.formula
|
32
|
+
end
|
33
|
+
formula
|
34
|
+
end
|
35
|
+
|
36
|
+
def mz
|
37
|
+
return @mz if @mz
|
38
|
+
mass = @lipid.mass
|
39
|
+
charge = 0
|
40
|
+
@modifications.each do |mod|
|
41
|
+
mass += mod.massdiff
|
42
|
+
charge += mod.charge
|
43
|
+
end
|
44
|
+
if charge == 0
|
45
|
+
@mz = nil
|
46
|
+
else
|
47
|
+
@mz = mass / charge
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def inspect
|
52
|
+
"<|| Ion mz=#{mz} #{lipid.inspect} + #{modifications.map(&:inspect).join(', ')} ||>"
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
require 'mspire/mass'
|
2
|
+
require 'mspire/molecular_formula'
|
3
|
+
|
4
|
+
module Mspire
|
5
|
+
class Lipid
|
6
|
+
|
7
|
+
|
8
|
+
# the convention is all mods are gains unless the name ends in an
|
9
|
+
# underscore
|
10
|
+
class Modification
|
11
|
+
|
12
|
+
# given a string with a formula and charge, returns the formula portion
|
13
|
+
# and the charges (as a signed integer)
|
14
|
+
def self.formula_and_charge(string)
|
15
|
+
md = string.match(/([^+]*)(\+*)$/)
|
16
|
+
charges_string = md[2]
|
17
|
+
if charges_string.nil?
|
18
|
+
0
|
19
|
+
else
|
20
|
+
charges_string.count(charges_string[0])
|
21
|
+
int = -int if charges_string[0] == '-'
|
22
|
+
end
|
23
|
+
[md[1], int]
|
24
|
+
end
|
25
|
+
|
26
|
+
# calculates the mass diff. For every positive charge the mass of an
|
27
|
+
# electron is subtracted; for every negative charge the mass of an
|
28
|
+
# electron is added. If gain is false, then the mass diff will be
|
29
|
+
# negative.
|
30
|
+
def self.massdiff(formula, charge, gain=true)
|
31
|
+
Mspire::Mass.formula_to_exact_mass(formula)
|
32
|
+
massdiff = Mspire::Mass.formula_to_exact_mass(formula)
|
33
|
+
massdiff -= (charge * Mspire::Mass::ELECTRON) # + charge subtracts, - charge adds
|
34
|
+
massdiff = -massdiff unless gain
|
35
|
+
massdiff
|
36
|
+
end
|
37
|
+
|
38
|
+
# the charge on the mod should be represented by the number of plusses
|
39
|
+
# or minuses after the formula (Li+ for a +1 charge Lithium or H2++, 2
|
40
|
+
# protons with a total of 2 charges)
|
41
|
+
FORMULAS = {
|
42
|
+
:proton => 'H',
|
43
|
+
:ammonium => 'NH4',
|
44
|
+
:lithium => 'Li',
|
45
|
+
:water => 'H2O',
|
46
|
+
}
|
47
|
+
CHARGE = {
|
48
|
+
:proton => 1,
|
49
|
+
:ammonium => 1,
|
50
|
+
:lithium => 1,
|
51
|
+
:water => 0,
|
52
|
+
}
|
53
|
+
|
54
|
+
# determined by running formulas through Mspire::Mass.massdiff
|
55
|
+
MASSDIFFS = {}
|
56
|
+
FORMULAS.each do |name, formula|
|
57
|
+
MASSDIFFS[name] = self.massdiff(formula, CHARGE[name])
|
58
|
+
end
|
59
|
+
|
60
|
+
# as a symbol
|
61
|
+
attr_accessor :name
|
62
|
+
# as a MolecularFormula object
|
63
|
+
attr_accessor :formula
|
64
|
+
# negative indicates a loss
|
65
|
+
attr_accessor :massdiff
|
66
|
+
# the charge
|
67
|
+
attr_accessor :charge
|
68
|
+
|
69
|
+
# if no mass or formula is given then it searches command mods for the name
|
70
|
+
# @param [Symbol] name the name of the mod
|
71
|
+
# A number of opts are expected if they are not found in the FORMULAS,
|
72
|
+
# CHARGE, or MASSDIFFS hashes:
|
73
|
+
#
|
74
|
+
# attributes:
|
75
|
+
# :formula = the chemical formula, lipidmaps style ("C2H4BrO") or
|
76
|
+
# any valid argument to MolecularFormula.new
|
77
|
+
# :massdiff = +/-Float
|
78
|
+
# :charge = +/- Integer
|
79
|
+
#
|
80
|
+
# instruction:
|
81
|
+
# :loss = true negates the mass diff sign and charge during initialization
|
82
|
+
# this option is typically only done for molecules
|
83
|
+
# already present in the FORMULA hash (e.g.)
|
84
|
+
#
|
85
|
+
# proton_loss = Mspire::Lipid::Modification.new(:proton, :loss => true)
|
86
|
+
# water_loss = Mspire::Lipid::Modification.new(:water, :loss => true)
|
87
|
+
#
|
88
|
+
def initialize(name, opts={})
|
89
|
+
@name = name
|
90
|
+
@formula =
|
91
|
+
if ( form_string = (opts[:formula] || FORMULAS[name]) )
|
92
|
+
Mspire::MolecularFormula.new( form_string )
|
93
|
+
end
|
94
|
+
@massdiff = opts[:massdiff] || MASSDIFFS[name]
|
95
|
+
@charge = opts[:charge] || CHARGE[name]
|
96
|
+
|
97
|
+
if opts[:loss]
|
98
|
+
@charge = -@charge
|
99
|
+
# necessary if you are using a named molecule and you want its loss
|
100
|
+
# rather than gain (i.e., you want a negative massdiff)
|
101
|
+
@massdiff = -@massdiff
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def charged_formula
|
106
|
+
@formula.to_s + @charge.abs.times.map { (@charge > 0) ? '+' : '-' }.join
|
107
|
+
end
|
108
|
+
|
109
|
+
def gain?
|
110
|
+
massdiff > 0
|
111
|
+
end
|
112
|
+
|
113
|
+
def loss?
|
114
|
+
!gain?
|
115
|
+
end
|
116
|
+
|
117
|
+
def inspect
|
118
|
+
"<Mod: #{charged_formula}>"
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'mspire/bin'
|
2
|
+
|
3
|
+
module Mspire
|
4
|
+
class Lipid
|
5
|
+
class Search
|
6
|
+
|
7
|
+
# A Search::Bin is a range that contains the *entire* query spectrum
|
8
|
+
# (not just the portion covered by the range). the query spectrum, and
|
9
|
+
# a ProbabilityDistribution -- the probability that a peak's delta to
|
10
|
+
# nearest peak is that small by chance.
|
11
|
+
class Bin < Mspire::Bin
|
12
|
+
# the intensity value of the query spectrum should be a query
|
13
|
+
attr_accessor :db_spectrum
|
14
|
+
attr_accessor :probability_distribution
|
15
|
+
|
16
|
+
def initialize(range_obj, db_spectrum)
|
17
|
+
super(range_obj.begin, range_obj.end, range_obj.exclude_end?)
|
18
|
+
@db_spectrum = db_spectrum
|
19
|
+
end
|
20
|
+
|
21
|
+
def <<(query)
|
22
|
+
@data << query
|
23
|
+
end
|
24
|
+
|
25
|
+
# returns the nearest num_hits Mspire::Lipid::Search::Hits sorted by delta
|
26
|
+
# [with tie going to the lower m/z]
|
27
|
+
# searches all queries and removes them from the data queue
|
28
|
+
def queries_to_hit_groups!(num_hits=1)
|
29
|
+
queries = @data.dup
|
30
|
+
@data.clear
|
31
|
+
|
32
|
+
@db_isobar_groups_by_index = @db_spectrum.intensities
|
33
|
+
|
34
|
+
hit_groups = queries.map do |query|
|
35
|
+
best_hits(query, num_hits)
|
36
|
+
end
|
37
|
+
|
38
|
+
all_top_hits = hit_groups.map(&:first)
|
39
|
+
|
40
|
+
# updates the pvalues for all the hits
|
41
|
+
pvalues = probability_distribution.pvalues( all_top_hits )
|
42
|
+
all_top_hits.zip(pvalues) {|hit, pvalue| hit.pvalue = pvalue }
|
43
|
+
|
44
|
+
hit_groups
|
45
|
+
end
|
46
|
+
|
47
|
+
# returns a HitGroup object
|
48
|
+
def best_hits(query, num_hits)
|
49
|
+
query_mz = query.mz
|
50
|
+
#puts "MZ: #{query_mz}"
|
51
|
+
db_mzs = @db_spectrum.mzs
|
52
|
+
index = @db_spectrum.find_nearest_index(query_mz)
|
53
|
+
_min = index - (num_hits-1)
|
54
|
+
(_min >= 0) || (_min = 0)
|
55
|
+
_max = index + (num_hits-1)
|
56
|
+
(_max < db_mzs.size) || (_max = @db_spectrum - 1)
|
57
|
+
delta_index_pairs = (_min.._max).map {|i| [query_mz.-(db_mzs[i]).abs, i] }
|
58
|
+
closest_delta_index_pairs = delta_index_pairs.sort
|
59
|
+
top_num_hits_delta_index_pairs = closest_delta_index_pairs[0, num_hits]
|
60
|
+
top_num_hit_indices = top_num_hits_delta_index_pairs.map(&:last)
|
61
|
+
hit_group = top_num_hit_indices.map do |index|
|
62
|
+
Hit.new( :db_isobar_group => @db_isobar_groups_by_index[index], :observed_mz => query_mz)
|
63
|
+
end
|
64
|
+
HitGroup.new(hit_group)
|
65
|
+
end
|
66
|
+
|
67
|
+
def inspect
|
68
|
+
"<(#{super}) @db_spectrum(points size)=#{db_spectrum.mzs.size} @probability_distribution=#{probability_distribution}>"
|
69
|
+
end
|
70
|
+
|
71
|
+
def to_range
|
72
|
+
Range.new( self.begin, self.end, self.exclude_end? )
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
|
2
|
+
module Mspire
|
3
|
+
class Lipid
|
4
|
+
class Search
|
5
|
+
# this is a group of Lipid::Ion objects that all have the same (or
|
6
|
+
# possibly similar) m/z
|
7
|
+
class DBIsobarGroup < Array
|
8
|
+
# it is implemented like this so that the isobar group *could* have
|
9
|
+
# individuals in it with slightly different m/z values and this coudl
|
10
|
+
# still be used as a container. In my current implementation they
|
11
|
+
# have exactly the same m/z
|
12
|
+
attr_accessor :mz
|
13
|
+
def initialize( ar=[], mz=nil)
|
14
|
+
@mz = mz if mz
|
15
|
+
self.replace(ar)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|