ms-lipidomics 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/LICENSE +21 -0
- data/README.rdoc +13 -0
- data/Rakefile +60 -0
- data/bin/lipidomic-search.rb +126 -0
- data/lib/ms/lipid.rb +19 -0
- data/lib/ms/lipid/ion.rb +39 -0
- data/lib/ms/lipid/modification.rb +112 -0
- data/lib/ms/lipid/search.rb +203 -0
- data/lib/ms/lipid/search/bin.rb +79 -0
- data/lib/ms/lipid/search/db_isobar_group.rb +20 -0
- data/lib/ms/lipid/search/hit.rb +75 -0
- data/lib/ms/lipid/search/probability_distribution.rb +49 -0
- data/lib/ms/lipid/search/query.rb +23 -0
- data/lib/ms/lipid_maps.rb +31 -0
- data/spec/ms/lipid/ion_spec.rb +23 -0
- data/spec/ms/lipid/modification_spec.rb +41 -0
- data/spec/ms/lipid/search_spec.rb +79 -0
- data/spec/ms/lipid_maps_spec.rb +19 -0
- data/spec/ms/lipid_spec.rb +16 -0
- data/spec/spec_helper.rb +13 -0
- data/spec/testfiles/lipidmaps_short.tsv +32 -0
- metadata +115 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2012 Brigham Young University
|
2
|
+
authored by: John T. Prince
|
3
|
+
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
a copy of this software and associated documentation files (the
|
6
|
+
"Software"), to deal in the Software without restriction, including
|
7
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rake'
|
5
|
+
|
6
|
+
require 'jeweler'
|
7
|
+
Jeweler::Tasks.new do |gem|
|
8
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
9
|
+
gem.name = "ms-lipidomics"
|
10
|
+
gem.homepage = "http://github.com/princelab/ms-lipidomics"
|
11
|
+
gem.license = "MIT"
|
12
|
+
gem.summary = %Q{mass spectrometry based lipidomics - especially shotgun lipidomics}
|
13
|
+
gem.description = %Q{does ms lipidomics}
|
14
|
+
gem.email = "jtprince@gmail.com"
|
15
|
+
gem.authors = ["John T. Prince"]
|
16
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
17
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
18
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
19
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
20
|
+
gem.add_development_dependency "rspec", "~> 2.3.0"
|
21
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
22
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
23
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
24
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
25
|
+
gem.add_development_dependency "bundler", "~> 1.0.0"
|
26
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
27
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
28
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
29
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
30
|
+
gem.add_development_dependency "jeweler", "~> 1.6.4"
|
31
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
32
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
33
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
34
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
35
|
+
gem.add_development_dependency "rcov", ">= 0"
|
36
|
+
end
|
37
|
+
Jeweler::RubygemsDotOrgTasks.new
|
38
|
+
|
39
|
+
require 'rspec/core'
|
40
|
+
require 'rspec/core/rake_task'
|
41
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
42
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
43
|
+
end
|
44
|
+
|
45
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
46
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
47
|
+
spec.rcov = true
|
48
|
+
end
|
49
|
+
|
50
|
+
task :default => :spec
|
51
|
+
|
52
|
+
require 'rdoc/task'
|
53
|
+
Rake::RDocTask.new do |rdoc|
|
54
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
55
|
+
|
56
|
+
rdoc.rdoc_dir = 'rdoc'
|
57
|
+
rdoc.title = "ms-lipidomics #{version}"
|
58
|
+
rdoc.rdoc_files.include('README*')
|
59
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
60
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'trollop'
|
4
|
+
require 'ms/mzml'
|
5
|
+
require 'ms/lipid/search'
|
6
|
+
require 'ms/lipid/ion'
|
7
|
+
require 'ms/lipid/search/query'
|
8
|
+
require 'ms/lipid_maps'
|
9
|
+
|
10
|
+
# for html output: (just make the id clickable)
|
11
|
+
LIPIDMAPS_SEARCH = "http://www.lipidmaps.org/data/LMSDRecord.php?LMID="
|
12
|
+
|
13
|
+
DEFAULTS = {
|
14
|
+
:bin_width => 5,
|
15
|
+
:bin_unit => :ppm,
|
16
|
+
:search_unit => :ppm,
|
17
|
+
}
|
18
|
+
|
19
|
+
class Sample
|
20
|
+
attr_accessor :file
|
21
|
+
attr_accessor :spectrum
|
22
|
+
def initialize(file, merge_opts={})
|
23
|
+
@file = file
|
24
|
+
@spectrum = merge_ms1_spectra(file, DEFAULTS.merge(merge_opts))
|
25
|
+
end
|
26
|
+
|
27
|
+
# returns a single spectrum object
|
28
|
+
def merge_ms1_spectra(file, opts)
|
29
|
+
spectra = []
|
30
|
+
warn "using number of peaks as proxy for ms level right now"
|
31
|
+
MS::Mzml.foreach(file) do |spectrum|
|
32
|
+
spectra << spectrum if spectrum.mzs.size > 1000 # <<<<<<------ kludge for ms_level == 1
|
33
|
+
end
|
34
|
+
spectra.each {|spectrum| spectrum.sort! }
|
35
|
+
|
36
|
+
MS::Spectrum.merge(spectra, opts)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
ext = ".lipidID.tsv"
|
41
|
+
|
42
|
+
parser = Trollop::Parser.new do
|
43
|
+
banner "usage: #{File.basename(__FILE__)} [OPTIONS] <lipidmaps>.tsv <file>.mzML ..."
|
44
|
+
text "output: <file>#{ext} ..."
|
45
|
+
text ""
|
46
|
+
text "note that sometimes you get an error from R like this:"
|
47
|
+
text "(`eval': voidEval failed: Packet[cmd=2130771970,len=<nil>, con='<nil>', status=error...)"
|
48
|
+
text "just re-run it and it will work"
|
49
|
+
text ""
|
50
|
+
opt :bin_width, "width of the bins for merging", :default => DEFAULTS[:bin_width]
|
51
|
+
opt :bin_unit, "units for binning (ppm or amu)", :default => DEFAULTS[:bin_unit].to_s
|
52
|
+
opt :search_unit, "unit for searching nearest hit (ppm or amu)", :default => DEFAULTS[:search_unit].to_s
|
53
|
+
opt :top_n_peaks, "the number of highest intensity peaks to query the DB with", :default => 1000
|
54
|
+
opt :display_n, "the number of best hits to display", :default => 20
|
55
|
+
opt :verbose, "talk about it"
|
56
|
+
end
|
57
|
+
|
58
|
+
opts = parser.parse(ARGV)
|
59
|
+
opts[:bin_unit] = opts[:bin_unit].to_sym
|
60
|
+
opts[:search_unit] = opts[:search_unit].to_sym
|
61
|
+
|
62
|
+
if ARGV.size < 2
|
63
|
+
parser.educate
|
64
|
+
exit
|
65
|
+
end
|
66
|
+
|
67
|
+
(lipidmaps, *files) = ARGV
|
68
|
+
|
69
|
+
$VERBOSE = opts[:verbose]
|
70
|
+
|
71
|
+
proton = MS::Lipid::Modification.new(:proton)
|
72
|
+
h2o_loss = MS::Lipid::Modification.new(:water, :loss => true)
|
73
|
+
|
74
|
+
lipids = MS::LipidMaps.parse_file(lipidmaps)
|
75
|
+
|
76
|
+
ions = lipids.map do |lipid|
|
77
|
+
[[proton], [proton, h2o_loss]].map do |mods|
|
78
|
+
MS::Lipid::Ion.new(lipid, mods)
|
79
|
+
end
|
80
|
+
end.flatten(1)
|
81
|
+
|
82
|
+
|
83
|
+
searcher = MS::Lipid::Search.new(ions, :ppm => (opts[:search_unit] == :ppm))
|
84
|
+
|
85
|
+
files.each do |file|
|
86
|
+
base = file.chomp(File.extname(file))
|
87
|
+
puts "processing file: #{file}" if $VERBOSE
|
88
|
+
sample = Sample.new(file, opts)
|
89
|
+
|
90
|
+
num_points = sample.spectrum.mzs.size
|
91
|
+
puts "#{num_points} merged peaks in #{file}" if $VERBOSE
|
92
|
+
|
93
|
+
highest_points = sample.spectrum.points.sort_by(&:last).reverse[0,opts[:top_n_peaks]].sort
|
94
|
+
|
95
|
+
sample.spectrum = MS::Spectrum.from_points( highest_points )
|
96
|
+
|
97
|
+
queries = sample.spectrum.mzs.each_with_index.map {|mz,index| MS::Lipid::Search::Query.new(mz, index) }
|
98
|
+
hit_groups = searcher.search(queries, :return_order => :sorted)
|
99
|
+
|
100
|
+
hit_info = [:qvalue, :pvalue, :observed_mz, :theoretical_mz, :delta, :ppm]
|
101
|
+
second_hit_info = [:ppm]
|
102
|
+
|
103
|
+
output = base + ext
|
104
|
+
puts "writing to #{output}" if $VERBOSE
|
105
|
+
File.open(output, 'w') do |out|
|
106
|
+
out.puts (hit_info + %w(2nd_hit_ppm first_isobar_name num_isobars isobars)).join("\t")
|
107
|
+
hit_groups[0,opts[:display_n]].each_with_index do |hit_group,i|
|
108
|
+
ar = []
|
109
|
+
tophit = hit_group.first
|
110
|
+
ar.push *hit_info.map {|mthd| tophit.send(mthd) }
|
111
|
+
ar.push *second_hit_info.map {|mthd| hit_group[1].send(mthd) }
|
112
|
+
common_name = tophit.db_isobar_group.first.lipid.common_name
|
113
|
+
common_name = tophit.db_isobar_group.first.lipid.systematic_name if common_name == "-"
|
114
|
+
ar.push common_name
|
115
|
+
ar.push tophit.db_isobar_group.size
|
116
|
+
ions = tophit.db_isobar_group.map do |ion|
|
117
|
+
[ion.lipid.lm_id, ion.modifications.map do |mod|
|
118
|
+
(mod.gain? ? '+' : '-') + "(#{mod.charged_formula})"
|
119
|
+
end.join
|
120
|
+
].join(":")
|
121
|
+
end.join(' ')
|
122
|
+
ar.push ions
|
123
|
+
out.puts ar.join("\t")
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
data/lib/ms/lipid.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
module MS
|
3
|
+
class Lipid
|
4
|
+
def self.members
|
5
|
+
[:lm_id,:common_name,:systematic_name,:formula,:mass,:category,:main_class,:sub_class]
|
6
|
+
end
|
7
|
+
|
8
|
+
members.each {|mem| attr_accessor mem }
|
9
|
+
|
10
|
+
def initialize(*args)
|
11
|
+
(@lm_id,@common_name,@systematic_name,@formula,@mass,@category,@main_class,@sub_class) = args
|
12
|
+
end
|
13
|
+
|
14
|
+
def inspect
|
15
|
+
cut_common_name = (common_name.size <= 20) ? common_name : (common_name[0,20]+"...")
|
16
|
+
"<#{lm_id}: #{formula}: #{mass} #{cut_common_name}>"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/ms/lipid/ion.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
|
2
|
+
module MS
|
3
|
+
class Lipid
|
4
|
+
# a lipid with modifications (typically the mods give it a charge so that
|
5
|
+
# it can be seen in the mass spec)
|
6
|
+
class Ion
|
7
|
+
# an MS::Lipid object
|
8
|
+
attr_accessor :lipid
|
9
|
+
# an MS::Lipid::Modifications object
|
10
|
+
attr_accessor :modifications
|
11
|
+
# the key attribute of a query
|
12
|
+
|
13
|
+
def initialize(lipid, mods=[])
|
14
|
+
@lipid = lipid
|
15
|
+
@modifications = mods
|
16
|
+
@mz = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def mz
|
20
|
+
return @mz if @mz
|
21
|
+
mass = @lipid.mass
|
22
|
+
charge = 0
|
23
|
+
@modifications.each do |mod|
|
24
|
+
mass += mod.massdiff
|
25
|
+
charge += mod.charge
|
26
|
+
end
|
27
|
+
if charge == 0
|
28
|
+
@mz = nil
|
29
|
+
else
|
30
|
+
@mz = mass / charge
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def inspect
|
35
|
+
"<|| Ion mz=#{mz} #{lipid.inspect} + #{modifications.map(&:inspect).join(', ')} ||>"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
require 'ms/mass'
|
2
|
+
|
3
|
+
module MS
|
4
|
+
class Lipid
|
5
|
+
|
6
|
+
|
7
|
+
# the convention is all mods are gains unless the name ends in an
|
8
|
+
# underscore
|
9
|
+
class Modification
|
10
|
+
|
11
|
+
# given a string with a formula and charge, returns the formula portion
|
12
|
+
# and the charges (as a signed integer)
|
13
|
+
def self.formula_and_charge(string)
|
14
|
+
md = string.match(/([^+]*)(\+*)$/)
|
15
|
+
charges_string = md[2]
|
16
|
+
if charges_string.nil?
|
17
|
+
0
|
18
|
+
else
|
19
|
+
charges_string.count(charges_string[0])
|
20
|
+
int = -int if charges_string[0] == '-'
|
21
|
+
end
|
22
|
+
[md[1], int]
|
23
|
+
end
|
24
|
+
|
25
|
+
# calculates the mass diff. For every positive charge the mass of an
|
26
|
+
# electron is subtracted; for every negative charge the mass of an
|
27
|
+
# electron is added. If gain is false, then the mass diff will be
|
28
|
+
# negative.
|
29
|
+
def self.massdiff(formula, charge, gain=true)
|
30
|
+
MS::Mass.formula_to_exact_mass(formula)
|
31
|
+
massdiff = MS::Mass.formula_to_exact_mass(formula)
|
32
|
+
massdiff -= (charge * MS::Mass::ELECTRON) # + charge subtracts, - charge adds
|
33
|
+
massdiff = -massdiff unless gain
|
34
|
+
massdiff
|
35
|
+
end
|
36
|
+
|
37
|
+
# the charge on the mod should be represented by the number of plusses
|
38
|
+
# or minuses after the formula (Li+ for a +1 charge Lithium or H2++, 2
|
39
|
+
# protons with a total of 2 charges)
|
40
|
+
FORMULAS = {
|
41
|
+
:proton => 'H',
|
42
|
+
:ammonium => 'NH3H',
|
43
|
+
:lithium => 'Li',
|
44
|
+
:water => 'H2O',
|
45
|
+
}
|
46
|
+
CHARGE = {
|
47
|
+
:proton => 1,
|
48
|
+
:ammonium => 1,
|
49
|
+
:lithium => 1,
|
50
|
+
:water => 0,
|
51
|
+
}
|
52
|
+
|
53
|
+
# determined by running formulas through MS::Mass.massdiff
|
54
|
+
MASSDIFFS = {}
|
55
|
+
FORMULAS.each do |name, formula|
|
56
|
+
MASSDIFFS[name] = self.massdiff(formula, CHARGE[name])
|
57
|
+
end
|
58
|
+
|
59
|
+
# as a symbol
|
60
|
+
attr_accessor :name
|
61
|
+
# as a molecular formula
|
62
|
+
attr_accessor :formula
|
63
|
+
# negative indicates a loss
|
64
|
+
attr_accessor :massdiff
|
65
|
+
# the charge
|
66
|
+
attr_accessor :charge
|
67
|
+
|
68
|
+
# if no mass or formula is given then it searches command mods for the name
|
69
|
+
# @param [Symbol] name the name of the mod
|
70
|
+
# A number of opts are expected if they are not found in the FORMULAS,
|
71
|
+
# CHARGE, or MASSDIFFS hashes:
|
72
|
+
#
|
73
|
+
# attributes:
|
74
|
+
# :formula = the chemical formula, lipidmaps style ("C2H4BrO")
|
75
|
+
# :massdiff = +/-Float
|
76
|
+
# :charge = +/- Integer
|
77
|
+
#
|
78
|
+
# instruction:
|
79
|
+
# :loss = true flips the mass diff sign during initialization
|
80
|
+
# necessary to get negative massdiff on named molecule
|
81
|
+
# (unnecessary if you input massdiff manually)
|
82
|
+
def initialize(name, opts={})
|
83
|
+
@name = name
|
84
|
+
@formula = opts[:formula] || FORMULAS[name]
|
85
|
+
@massdiff = opts[:massdiff] || MASSDIFFS[name]
|
86
|
+
@charge = opts[:charge] || CHARGE[name]
|
87
|
+
# necessary if you are using a named molecule and you want its loss
|
88
|
+
# rather than gain (i.e., you want a negative massdiff)
|
89
|
+
@massdiff = -@massdiff if opts[:loss]
|
90
|
+
end
|
91
|
+
|
92
|
+
def charged_formula
|
93
|
+
@formula + @charge.abs.times.map { (@charge > 0) ? '+' : '-' }.join
|
94
|
+
end
|
95
|
+
|
96
|
+
def gain?
|
97
|
+
massdiff > 0
|
98
|
+
end
|
99
|
+
|
100
|
+
def loss?
|
101
|
+
!gain?
|
102
|
+
end
|
103
|
+
|
104
|
+
def inspect
|
105
|
+
"<Mod: #{charged_formula}>"
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
|
@@ -0,0 +1,203 @@
|
|
1
|
+
require 'ms/spectrum'
|
2
|
+
require 'rserve/simpler' # TODO: move to integrated interface with rserve when available
|
3
|
+
require 'core_ext/array/in_groups'
|
4
|
+
require 'ms/lipid/search/hit'
|
5
|
+
require 'ms/lipid/search/bin'
|
6
|
+
require 'ms/lipid/modification'
|
7
|
+
require 'ms/lipid/search/probability_distribution'
|
8
|
+
|
9
|
+
module MS
|
10
|
+
class Lipid
|
11
|
+
class Search
|
12
|
+
STANDARD_MODIFICATIONS = {
|
13
|
+
:proton => [1,2],
|
14
|
+
:ammonium => [1],
|
15
|
+
:lithium => [1],
|
16
|
+
:water => [1,2],
|
17
|
+
}
|
18
|
+
STANDARD_SEARCH = {
|
19
|
+
:units => :ppm,
|
20
|
+
:query_min_count_per_bin => 500, # min number of peaks per bin
|
21
|
+
:num_rand_samples_per_bin => 1000,
|
22
|
+
:num_nearest => 2,
|
23
|
+
:return_order => :as_given, # or :sorted
|
24
|
+
}
|
25
|
+
|
26
|
+
attr_accessor :options
|
27
|
+
attr_accessor :search_function
|
28
|
+
|
29
|
+
# will generate PossibleLipid objects and return a new search object
|
30
|
+
# uses only one kind of loss at a time and one type of gain at a time
|
31
|
+
# will also do the combination of a gain and a loss if gain_and_loss is
|
32
|
+
# true
|
33
|
+
def self.generate_simple_queries(lipids, mods=STANDARD_MODIFICATIONS, gain_and_loss=false)
|
34
|
+
possible_lipids = []
|
35
|
+
real_mods_and_cnts = mods.map {|name, cnts| [MS::Lipid::Modification.new(name), cnts] }
|
36
|
+
# one of each
|
37
|
+
real_mods_and_cnts.each do |mod, counts|
|
38
|
+
counts.each do |cnt|
|
39
|
+
possible_lipids << MS::Lipid::Search::Query.new(lipid, Array.new(cnt, mod))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
if gain_and_loss
|
43
|
+
# one of each gain + one of each loss
|
44
|
+
(gain_mod_cnt_pairs, loss_mod_cnt_pairs) = real_mods_and_cnts.partition {|mod, count| mod.gain }
|
45
|
+
gain_mod_cnt_pairs.each do |mod, cnt|
|
46
|
+
lipids.each do |lipid|
|
47
|
+
#### need to implement still (use combinations or something...)
|
48
|
+
get_this_working!
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
self.new(possible_lipids)
|
53
|
+
end
|
54
|
+
|
55
|
+
# ions are MS::Lipid::Ion objects
|
56
|
+
# each one should give a non-nil m/z value
|
57
|
+
def initialize(ions=[], opts={})
|
58
|
+
@options = STANDARD_SEARCH.merge(opts)
|
59
|
+
@db_isobar_spectrum = create_db_isobar_spectrum(ions)
|
60
|
+
@search_function = create_search_function(ions, @options)
|
61
|
+
end
|
62
|
+
|
63
|
+
# returns an array of HitGroup and a parallel array of BH derived
|
64
|
+
# q-values (will switch to Storey soon enough). The HitGroups are
|
65
|
+
# returned in the order in which the mz_values are given.
|
66
|
+
# assumes search_queries are in ascending m/z order
|
67
|
+
def search(search_queries, opts={})
|
68
|
+
opt = @options.merge( opts )
|
69
|
+
hit_groups = @search_function.call(search_queries, opt[:num_nearest])
|
70
|
+
sorted_hit_groups = qvalues!(hit_groups, opt)
|
71
|
+
case opts[:return_order]
|
72
|
+
when :as_given
|
73
|
+
hit_groups
|
74
|
+
when :sorted
|
75
|
+
sorted_hit_groups
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def qvalues!(hit_groups, opts)
|
80
|
+
|
81
|
+
# from http://stats.stackexchange.com/questions/870/multiple-hypothesis-testing-correction-with-benjamini-hochberg-p-values-or-q-va
|
82
|
+
# but I've already coded this up before, too, in multiple ways...
|
83
|
+
prev_bh_value = 0
|
84
|
+
num_total_tests = hit_groups.size
|
85
|
+
|
86
|
+
#hit_groups.each {|hg| p [hg.first.pvalue, hg] }
|
87
|
+
|
88
|
+
# calculate Q-values BH style for now:
|
89
|
+
# first hit is the best hit in the group
|
90
|
+
pval_hg_index_tuples = hit_groups.each_with_index.map {|hg,i| [hg.pvalue, hg.delta.abs, hg.ppm.abs, i, hg] }
|
91
|
+
|
92
|
+
if pval_hg_index_tuples.any? {|pair| pair.first.nan? }
|
93
|
+
$stderr.puts "pvalue of NaN!"
|
94
|
+
$stderr.puts ">>> Consider increasing query_min_count_per_bin or setting ppm to false <<<"
|
95
|
+
raise
|
96
|
+
end
|
97
|
+
|
98
|
+
sorted_pval_index_tuples = pval_hg_index_tuples.sort
|
99
|
+
|
100
|
+
sorted_pval_index_tuples.each_with_index do |tuple,i|
|
101
|
+
pval = tuple.first
|
102
|
+
bh_value = pval * num_total_tests / (i + 1)
|
103
|
+
# Sometimes this correction can give values greater than 1,
|
104
|
+
# so we set those values at 1
|
105
|
+
bh_value = [bh_value, 1].min
|
106
|
+
|
107
|
+
# To preserve monotonicity in the values, we take the
|
108
|
+
# maximum of the previous value or this one, so that we
|
109
|
+
# don't yield a value less than the previous.
|
110
|
+
bh_value = [bh_value, prev_bh_value].max
|
111
|
+
prev_bh_value = bh_value
|
112
|
+
tuple.last.first.qvalue = bh_value # give the top hit the q-value
|
113
|
+
end
|
114
|
+
|
115
|
+
sorted_pval_index_tuples.map(&:last)
|
116
|
+
end
|
117
|
+
|
118
|
+
def create_search_function(ions, opt)
|
119
|
+
|
120
|
+
db_isobar_spectrum = create_db_isobar_spectrum(ions)
|
121
|
+
|
122
|
+
search_bins = create_search_bins(db_isobar_spectrum, opt[:query_min_count_per_bin])
|
123
|
+
|
124
|
+
create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, opt[:num_rand_samples_per_bin], opt[:ppm])
|
125
|
+
|
126
|
+
# create the actual search function
|
127
|
+
# returns an array of hit_groups
|
128
|
+
lambda do |search_queries, num_nearest_hits|
|
129
|
+
Bin.bin(search_bins, search_queries, &:mz)
|
130
|
+
search_bins_with_data = search_bins.reject {|bin| bin.data.empty? }
|
131
|
+
hit_groups = search_bins_with_data.map {|bin| bin.queries_to_hit_groups!(opt[:num_nearest]) }.flatten(1)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
#####################################################
|
136
|
+
# Ancillary to create_search_function:
|
137
|
+
#####################################################
|
138
|
+
|
139
|
+
# returns a DB isobar spectrum where the m/z values are all the m/z
|
140
|
+
# values to search for and the intensities each an array corresponding
|
141
|
+
# to all the lipid ions matching that m/z value
|
142
|
+
def create_db_isobar_spectrum(ions)
|
143
|
+
mzs = [] ; query_groups = []
|
144
|
+
pairs = ions.group_by(&:mz).sort_by(&:first)
|
145
|
+
pairs.each {|mz, ar| mzs << mz ; query_groups << ar }
|
146
|
+
MS::Spectrum.new([mzs, query_groups])
|
147
|
+
end
|
148
|
+
|
149
|
+
# use_ppm uses ppm or amu if false
|
150
|
+
# returns the search_bins
|
151
|
+
def create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, num_rand_samples_per_bin, use_ppm=true)
|
152
|
+
search_bins.each do |search_bin|
|
153
|
+
rng = Random.new
|
154
|
+
random_mzs = num_rand_samples_per_bin.times.map { rng.rand(search_bin.to_range) }
|
155
|
+
# find the deltas
|
156
|
+
diffs = random_mzs.map do |random_mz|
|
157
|
+
nearest_random_mz = db_isobar_spectrum.find_nearest(random_mz)
|
158
|
+
delta = (random_mz - nearest_random_mz).abs
|
159
|
+
use_ppm ? delta./(nearest_random_mz).*(1e6) : delta
|
160
|
+
end
|
161
|
+
search_bin.probability_distribution = ProbabilityDistribution.deviations_to_probability_distribution((use_ppm ? :ppm : :amu), diffs)
|
162
|
+
end
|
163
|
+
search_bins
|
164
|
+
end
|
165
|
+
|
166
|
+
def create_search_bins(db_isobar_spectrum, min_n_per_bin)
|
167
|
+
# make sure we get the right bin size based on the input
|
168
|
+
ss = db_isobar_spectrum.mzs.size ; optimal_num_groups = 1
|
169
|
+
(1..ss).each do |divisions|
|
170
|
+
if (ss.to_f / divisions) >= min_n_per_bin
|
171
|
+
optimal_num_groups = divisions
|
172
|
+
else ; break
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
mz_ranges = []
|
177
|
+
prev = nil
|
178
|
+
|
179
|
+
groups = db_isobar_spectrum.points.in_groups(optimal_num_groups,false).to_a
|
180
|
+
|
181
|
+
case groups.size
|
182
|
+
when 0
|
183
|
+
raise 'I think you need some data in your query spectrum!'
|
184
|
+
when 1
|
185
|
+
group = groups.first
|
186
|
+
[ MS::Lipid::Search::Bin.new( Range.new(group.first.first, group.last.first), db_isobar_spectrum ) ]
|
187
|
+
else
|
188
|
+
search_bins = groups.each_cons(2).map do |points1, points2|
|
189
|
+
bin = MS::Lipid::Search::Bin.new( Range.new(points1.first.first, points2.first.first, true), db_isobar_spectrum )
|
190
|
+
prev = points2
|
191
|
+
bin
|
192
|
+
end
|
193
|
+
_range = Range.new(prev.first.first, prev.last.first)
|
194
|
+
search_bins << MS::Lipid::Search::Bin.new(_range, db_isobar_spectrum) # inclusive
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
|
203
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'bin'
|
2
|
+
|
3
|
+
module MS
|
4
|
+
class Lipid
|
5
|
+
class Search
|
6
|
+
|
7
|
+
# A Search::Bin is a range that contains the *entire* query spectrum
|
8
|
+
# (not just the portion covered by the range). the query spectrum, and
|
9
|
+
# a ProbabilityDistribution -- the probability that a peak's delta to
|
10
|
+
# nearest peak is that small by chance.
|
11
|
+
class Bin < ::Bin
|
12
|
+
# the intensity value of the query spectrum should be a query
|
13
|
+
attr_accessor :db_spectrum
|
14
|
+
attr_accessor :probability_distribution
|
15
|
+
|
16
|
+
def initialize(range_obj, db_spectrum)
|
17
|
+
super(range_obj.begin, range_obj.end, range_obj.exclude_end?)
|
18
|
+
@db_spectrum = db_spectrum
|
19
|
+
end
|
20
|
+
|
21
|
+
def <<(query)
|
22
|
+
@data << query
|
23
|
+
end
|
24
|
+
|
25
|
+
# returns the nearest num_hits MS::Lipid::Search::Hits sorted by delta
|
26
|
+
# [with tie going to the lower m/z]
|
27
|
+
# searches all queries and removes them from the data queue
|
28
|
+
def queries_to_hit_groups!(num_hits=1)
|
29
|
+
queries = @data.dup
|
30
|
+
@data.clear
|
31
|
+
|
32
|
+
@db_isobar_groups_by_index = @db_spectrum.intensities
|
33
|
+
|
34
|
+
hit_groups = queries.map do |query|
|
35
|
+
best_hits(query, num_hits)
|
36
|
+
end
|
37
|
+
|
38
|
+
all_top_hits = hit_groups.map(&:first)
|
39
|
+
|
40
|
+
# updates the pvalues for all the hits
|
41
|
+
pvalues = probability_distribution.pvalues( all_top_hits )
|
42
|
+
all_top_hits.zip(pvalues) {|hit, pvalue| hit.pvalue = pvalue }
|
43
|
+
|
44
|
+
hit_groups
|
45
|
+
end
|
46
|
+
|
47
|
+
# returns a HitGroup object
|
48
|
+
def best_hits(query, num_hits)
|
49
|
+
query_mz = query.mz
|
50
|
+
#puts "MZ: #{query_mz}"
|
51
|
+
db_mzs = @db_spectrum.mzs
|
52
|
+
index = @db_spectrum.find_nearest_index(query_mz)
|
53
|
+
_min = index - (num_hits-1)
|
54
|
+
(_min >= 0) || (_min = 0)
|
55
|
+
_max = index + (num_hits-1)
|
56
|
+
(_max < db_mzs.size) || (_max = @db_spectrum - 1)
|
57
|
+
delta_index_pairs = (_min.._max).map {|i| [query_mz.-(db_mzs[i]).abs, i] }
|
58
|
+
closest_delta_index_pairs = delta_index_pairs.sort
|
59
|
+
top_num_hits_delta_index_pairs = closest_delta_index_pairs[0, num_hits]
|
60
|
+
top_num_hit_indices = top_num_hits_delta_index_pairs.map(&:last)
|
61
|
+
hit_group = top_num_hit_indices.map do |index|
|
62
|
+
Hit.new( :db_isobar_group => @db_isobar_groups_by_index[index], :observed_mz => query_mz)
|
63
|
+
end
|
64
|
+
HitGroup.new(hit_group)
|
65
|
+
end
|
66
|
+
|
67
|
+
def inspect
|
68
|
+
"<(#{super}) @db_spectrum(points size)=#{db_spectrum.mzs.size} @probability_distribution=#{probability_distribution}>"
|
69
|
+
end
|
70
|
+
|
71
|
+
def to_range
|
72
|
+
Range.new( self.begin, self.end, self.exclude_end? )
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
|
2
|
+
module MS
|
3
|
+
class Lipid
|
4
|
+
class Search
|
5
|
+
# this is a group of Lipid::Ion objects that all have the same (or
|
6
|
+
# possibly similar) m/z
|
7
|
+
class DBIsobarGroup < Array
|
8
|
+
# it is implemented like this so that the isobar group *could* have
|
9
|
+
# individuals in it with slightly different m/z values and this coudl
|
10
|
+
# still be used as a container. In my current implementation they
|
11
|
+
# have exactly the same m/z
|
12
|
+
attr_accessor :mz
|
13
|
+
def initialize( ar=[], mz=nil)
|
14
|
+
@mz = mz if mz
|
15
|
+
self.replace(ar)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
|
2
|
+
module MS
|
3
|
+
class Lipid
|
4
|
+
class Search
|
5
|
+
class Hit
|
6
|
+
# the db_isobar_group this hit is associated with. Each hit is only
|
7
|
+
# associated with a single db_isobar_group!
|
8
|
+
attr_accessor :db_isobar_group
|
9
|
+
# the experimental m/z value
|
10
|
+
attr_accessor :observed_mz
|
11
|
+
# the probability the hit is due to random chance
|
12
|
+
attr_accessor :pvalue
|
13
|
+
# the FDR if the threshold accepts this pvalue. Note that this value
|
14
|
+
# is relative to the number of tests performed and not completely
|
15
|
+
# intrinsic to the hit itself.
|
16
|
+
attr_accessor :qvalue
|
17
|
+
|
18
|
+
# the probability distribution that can be used to determine its
|
19
|
+
# pvalue
|
20
|
+
attr_accessor :probability_distribution
|
21
|
+
|
22
|
+
def initialize(hash={})
|
23
|
+
hash.each {|k,v| instance_variable_set("@#{k}", v) }
|
24
|
+
end
|
25
|
+
|
26
|
+
# observed_mz - query m/z
|
27
|
+
def delta
|
28
|
+
@observed_mz - @db_isobar_group.first.mz.to_f
|
29
|
+
end
|
30
|
+
|
31
|
+
alias_method :amu, :delta
|
32
|
+
|
33
|
+
# the absolute value of distance from true val
|
34
|
+
def delta_abs
|
35
|
+
delta.abs
|
36
|
+
end
|
37
|
+
|
38
|
+
# parts per million (divided by theoretical m/z)
|
39
|
+
def ppm
|
40
|
+
(delta / @db_isobar_group.first.mz) * 1e6
|
41
|
+
end
|
42
|
+
|
43
|
+
def theoretical_mz
|
44
|
+
@db_isobar_group.first.mz
|
45
|
+
end
|
46
|
+
|
47
|
+
def inspect
|
48
|
+
"<<#{super} -- <ppm=#{ppm} delta=#{delta} theoretical_mz=#{theoretical_mz}>>"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# A query that matched multiple items. Each search returns a hit group
|
53
|
+
# which consists of the best hits for that experimental m/z. When
|
54
|
+
# queried for values like delta or ppm, it will delegate to the first hit.
|
55
|
+
# So, in many ways it can be used as a container for hits, but it puts
|
56
|
+
# its best face forward.
|
57
|
+
class HitGroup < Array
|
58
|
+
|
59
|
+
# should implement with delegator obviously...
|
60
|
+
# should allow setting ???
|
61
|
+
|
62
|
+
def delta() first.delta end
|
63
|
+
def ppm() first.ppm end
|
64
|
+
def theoretical_mz() first.theoretical_mz end
|
65
|
+
def query_group() first.query_group end
|
66
|
+
def observed_mz() first.observed_mz end
|
67
|
+
def pvalue() first.pvalue end
|
68
|
+
def qvalue() first.pvalue end
|
69
|
+
|
70
|
+
def best_hit() first end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
|
2
|
+
module MS
|
3
|
+
class Lipid
|
4
|
+
class Search
|
5
|
+
class ProbabilityDistribution
|
6
|
+
DEFAULT_TYPE = :ppm
|
7
|
+
R = Rserve::Simpler.new
|
8
|
+
|
9
|
+
# takes location, scale and shape parameters
|
10
|
+
attr_accessor :location, :scale, :shape
|
11
|
+
# type is :ppm or :delta_abs
|
12
|
+
attr_accessor :type
|
13
|
+
def initialize(location, scale, shape, type=DEFAULT_TYPE)
|
14
|
+
@location, @scale, @shape = location, scale, shape
|
15
|
+
@type = type
|
16
|
+
end
|
17
|
+
|
18
|
+
# takes a deviation and returns the pvalue
|
19
|
+
def pvalue(hit)
|
20
|
+
R.converse "pgev(log(#{hit.send(type)}), #{@location}, #{@scale}, #{@shape})"
|
21
|
+
end
|
22
|
+
|
23
|
+
# same as pvalue, just tries to limit the number of calls to R to
|
24
|
+
# speed things up!
|
25
|
+
def pvalues(hits)
|
26
|
+
deltas = hits.map {|v| v.send(type).abs }
|
27
|
+
R.converse("sapply(r_devs, function(elt) pgev(log(elt), #{@location}, #{@scale}, #{@shape}))", :r_devs => deltas)
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.require_r_library(lib)
|
31
|
+
reply = R.converse "library(#{lib})"
|
32
|
+
unless reply.size > 4 # ~roughly
|
33
|
+
$stderr.puts "The libraries ismev and evd must be installed in your R env!"
|
34
|
+
$stderr.puts "From within R (works best if R is started with sudo or root for installing):"
|
35
|
+
$stderr.puts %Q{install.packages("ismev") ; install.packages("evd")}
|
36
|
+
raise "must have R (rserve) and ismev and evd installed!"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# returns an EVD object
|
41
|
+
def self.deviations_to_probability_distribution(type, devs)
|
42
|
+
%w(ismev evd).each {|lib| require_r_library(lib) }
|
43
|
+
params = R.converse("m <- gev.fit(log(devs_r))\n c(m$mle[1], m$mle[2], m$mle[3])", :devs_r => devs )
|
44
|
+
self.new(*params, type)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
module MS
|
5
|
+
class Lipid
|
6
|
+
class Search
|
7
|
+
class Query
|
8
|
+
|
9
|
+
# the experimentally observed lowest mz
|
10
|
+
attr_accessor :mz
|
11
|
+
|
12
|
+
# the index of search spectrum that the m/z was derived from
|
13
|
+
# this allows for the creation of an isotope envelope starting from a
|
14
|
+
# particular m/z value.
|
15
|
+
attr_accessor :index
|
16
|
+
|
17
|
+
def initialize(mz, index)
|
18
|
+
@mz, @index = mz, index
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'ms/lipid'
|
2
|
+
require 'ms/mass'
|
3
|
+
|
4
|
+
module MS
|
5
|
+
module LipidMaps
|
6
|
+
# returns an array of Lipids
|
7
|
+
# if high_res_mass is true (default), then the formula is used to calculate a higher
|
8
|
+
# resolution mass than what is in lipidmaps
|
9
|
+
def self.parse_file(lipidmaps_tsv, high_res_mass=true, skip_clas_defs=true)
|
10
|
+
seen_first_line = false
|
11
|
+
IO.foreach(lipidmaps_tsv).map do |line|
|
12
|
+
line.chomp!
|
13
|
+
pieces = line.split("\t")
|
14
|
+
if pieces[3] !~ /[A-Z]/ # <- there is no formula!
|
15
|
+
nil
|
16
|
+
else
|
17
|
+
if seen_first_line
|
18
|
+
pieces[4] = MS::Mass.formula_to_exact_mass(pieces[3]) if high_res_mass
|
19
|
+
l = MS::Lipid.new *pieces
|
20
|
+
else
|
21
|
+
seen_first_line = true
|
22
|
+
warn "lipidmaps column headers are not right!" unless pieces.map(&:downcase) == MS::Lipid.members.map(&:to_s)
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end.compact
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
|
4
|
+
require 'ms/lipid'
|
5
|
+
require 'ms/lipid/modification'
|
6
|
+
require 'ms/lipid/ion'
|
7
|
+
|
8
|
+
describe MS::Lipid::Ion do
|
9
|
+
before do
|
10
|
+
lipid = MS::Lipid.new
|
11
|
+
lipid.mass = 300.2
|
12
|
+
proton = MS::Lipid::Modification.new(:proton)
|
13
|
+
h2o_loss = MS::Lipid::Modification.new(:water, :loss => true)
|
14
|
+
@plus1 = MS::Lipid::Ion.new(lipid, [proton, h2o_loss])
|
15
|
+
@plus2 = MS::Lipid::Ion.new(lipid, [proton, proton, h2o_loss])
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'calculates the correct m/z' do
|
19
|
+
@plus1.mz.should be_within(1e5).of(283.196711735)
|
20
|
+
@plus2.mz.should be_within(1e5).of(142.101994085)
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/lipid/modification'
|
4
|
+
|
5
|
+
describe MS::Lipid::Modification do
|
6
|
+
Mod = MS::Lipid::Modification
|
7
|
+
|
8
|
+
it 'can create common mods easily' do
|
9
|
+
# water loss
|
10
|
+
water_loss = Mod.new(:water, :loss => true)
|
11
|
+
water_loss.loss?.should be_true
|
12
|
+
water_loss.massdiff.<(0).should be_true
|
13
|
+
water_loss.charge.should == 0
|
14
|
+
water_loss.charged_formula.should == 'H2O'
|
15
|
+
|
16
|
+
# proton gain
|
17
|
+
prot = Mod.new(:proton)
|
18
|
+
prot.gain?.should be_true
|
19
|
+
prot.massdiff.>(0).should be_true
|
20
|
+
prot.charge.should == 1
|
21
|
+
prot.charged_formula.should == 'H+'
|
22
|
+
|
23
|
+
ammon = Mod.new(:ammonium)
|
24
|
+
ammon.gain?.should be_true
|
25
|
+
ammon.massdiff.>(0).should be_true
|
26
|
+
ammon.charge.should == 1
|
27
|
+
ammon.charged_formula.should == 'NH3H+'
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'can create custom mods' do
|
31
|
+
mymod = Mod.new(:super_snazzy)
|
32
|
+
mymod.formula.should be_nil
|
33
|
+
mymod.massdiff.should be_nil
|
34
|
+
mymod.charge.should be_nil
|
35
|
+
|
36
|
+
mymod.formula = 'CH4'
|
37
|
+
mymod.charge = 2
|
38
|
+
mymod.massdiff = MS::Lipid::Modification.massdiff(mymod.formula, mymod.charge)
|
39
|
+
mymod.massdiff.should be_within(1e4).of(16.030202)
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/lipid_maps'
|
4
|
+
require 'ms/lipid/search'
|
5
|
+
require 'ms/lipid/search/query'
|
6
|
+
require 'ms/lipid/modification'
|
7
|
+
|
8
|
+
describe MS::Lipid::Search do
|
9
|
+
before do
|
10
|
+
@proton = MS::Lipid::Modification.new(:proton)
|
11
|
+
@h2o_loss = MS::Lipid::Modification.new(:water, :loss => true)
|
12
|
+
end
|
13
|
+
describe 'searching a section of lipid maps' do
|
14
|
+
before do
|
15
|
+
@lipids = MS::LipidMaps.parse_file(TESTFILES + '/lipidmaps_short.tsv')
|
16
|
+
@ions = @lipids.map do |lipid|
|
17
|
+
[[@proton], [@proton, @h2o_loss]].map do |mods|
|
18
|
+
MS::Lipid::Ion.new(lipid, mods)
|
19
|
+
end
|
20
|
+
end.flatten(1)
|
21
|
+
@samples = Hash[ {
|
22
|
+
:sample1 => [[187.1633, 244.22, 616.51, 717.50],
|
23
|
+
[100, 200, 100, 200]],
|
24
|
+
:sample2 => [[187.164, 396.15, 244.24, 347.28, 618.502],
|
25
|
+
[110, 210, 110, 210, 110]],
|
26
|
+
:sample3 => [[187.160, 396.28, 244.24, 347.263, 618.511],
|
27
|
+
[120, 220, 120, 220, 120]],
|
28
|
+
:sample4 => [[187.157, 396.20, 244.30, 618.22, 933.01],
|
29
|
+
[30, 33, 38, 99, 22]],
|
30
|
+
}.map {|key,data| [key, MS::Spectrum.new(data)] } ]
|
31
|
+
@pretend_search_mzs = [187.157, 396.20, 244.30, 618.22, 933.01]
|
32
|
+
end
|
33
|
+
|
34
|
+
xit 'creates a query search spectrum' do
|
35
|
+
#spec = .create_query_search_spectrum(@ions)
|
36
|
+
#spec.mzs.any? {|mz| mz.nil? }.should be_false
|
37
|
+
#spec.mzs.size.should == 56
|
38
|
+
#spec.intensities.map(&:size).count(2).should == 4
|
39
|
+
#spec.intensities.map(&:size).count(1).should == 52
|
40
|
+
end
|
41
|
+
|
42
|
+
xit 'creates a probability function' do
|
43
|
+
#subject.create_search_function(@ions, :prob_min_bincnt => 20)
|
44
|
+
end
|
45
|
+
|
46
|
+
xit 'searches mz values' do
|
47
|
+
searcher = MS::Lipid::Search.new(@ions, :query_min_count_per_bin => 8, :num_rand_samples_per_bin => 1000, :ppm => false)
|
48
|
+
num_nearest_hits = 3
|
49
|
+
(hit_groups, qvals) = searcher.search(@pretend_search_mzs, 3)
|
50
|
+
p hit_groups.map(&:first).map(&:pvalue)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe 'searching a full lipid maps' do
|
55
|
+
|
56
|
+
before do
|
57
|
+
# this will be specific to your install since it's not part of install
|
58
|
+
path_to_lipidmaps_db = "#{ENV['HOME']}/tmp/tamil/lipidmaps_20120103_classes_1_2_3_4_5_6_7_8.exact_mass.tsv"
|
59
|
+
@lipids = MS::LipidMaps.parse_file(path_to_lipidmaps_db)
|
60
|
+
@ions = @lipids.map do |lipid|
|
61
|
+
[[@proton], [@proton, @proton], [@proton, @h2o_loss]].map do |mods|
|
62
|
+
MS::Lipid::Search::Query.new(lipid, mods)
|
63
|
+
end
|
64
|
+
end.flatten(1)
|
65
|
+
@pretend_search_mzs = [187.157, 396.20, 244.30, 618.22, 933.01]
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'returns hit groups parallel with input m/zs' do
|
69
|
+
searcher = MS::Lipid::Search.new(@ions, :query_min_count_per_bin => 1000, :ppm => false)
|
70
|
+
hit_groups = searcher.search(@pretend_search_mzs, 3)
|
71
|
+
best_hits = hit_groups.map(&:best_hit)
|
72
|
+
best_hits.map {|hit| hit.observed_mz }.should == @pretend_search_mzs
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'works with :ppm => true'
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/lipid_maps'
|
4
|
+
|
5
|
+
describe MS::LipidMaps do
|
6
|
+
|
7
|
+
before do
|
8
|
+
@tfile = TESTFILES + '/lipidmaps_short.tsv'
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'parses lipid maps files' do
|
12
|
+
lipids = MS::LipidMaps.parse_file(@tfile)
|
13
|
+
lipids.size.should == 30 # one is rejected for no formula
|
14
|
+
ll = lipids.last
|
15
|
+
ll.sub_class.should == 'Isoflavonoids [PK1205]'
|
16
|
+
ll.lm_id.should == "LMPK12050388"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/lipid'
|
4
|
+
|
5
|
+
describe MS::Lipid do
|
6
|
+
|
7
|
+
before do
|
8
|
+
@data = ['LMFA00000007', 'n-decanohydroxamic acid', 'N-hydroxydecanamide', 'C10H21NO2', 187.16, 'Fatty Acyls [FA]', 'Other Fatty Acyls [FA00]']
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'can be initialized with an array' do
|
12
|
+
lipid = MS::Lipid.new(*@data)
|
13
|
+
lipid.mass.should == @data[4]
|
14
|
+
lipid.sub_class.should == nil
|
15
|
+
end
|
16
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
|
5
|
+
# Requires supporting files with custom matchers and macros, etc,
|
6
|
+
# in ./support/ and its subdirectories.
|
7
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
8
|
+
|
9
|
+
RSpec.configure do |config|
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
TESTFILES = File.expand_path(File.join(File.dirname(__FILE__), "testfiles"))
|
@@ -0,0 +1,32 @@
|
|
1
|
+
LM_ID COMMON_NAME SYSTEMATIC_NAME FORMULA MASS CATEGORY MAIN_CLASS SUB_CLASS
|
2
|
+
LMFA00000007 n-decanohydroxamic acid N-hydroxydecanamide C10H21NO2 187.16 Fatty Acyls [FA] Other Fatty Acyls [FA00]
|
3
|
+
LMFA00000008 (9S,10S)-10-hydroxy-9-(phosphonooxy)octadecanoic acid (9S,10S)-10-hydroxy-9-(phosphonooxy)octadecanoic acid C18H37O7P 396.23 Fatty Acyls [FA] Other Fatty Acyls [FA00]
|
4
|
+
LMFA00000009 N-(6-aminohexanoyl)-6-aminohexanoic acid 6-(6-aminohexanamido)hexanoic acid C12H24N2O3 244.18 Fatty Acyls [FA] Other Fatty Acyls [FA00]
|
5
|
+
LMFA00000014 Virodhamine 2-aminoethyl-5Z,8Z,11Z,14Z-eicosatetraenoate C22H37NO2 347.28 Fatty Acyls [FA] Other Fatty Acyls [FA00]
|
6
|
+
MGL02010062 DG(16:0/20:3(8Z,11Z,14Z)/0:0)[iso2] 1-hexadecanoyl-2-(8Z,11Z,14Z-eicosatrienoyl)-sn-glycerol C39H70O5 618.52 Glycerolipids [GL] Diradylglycerols [GL02] Diacylglycerols [GL0201]
|
7
|
+
LMGL02010063 DG(18:2(9Z,12Z)/18:2(9Z,12Z)/0:0) 1,2-di-(9Z,12Z-octadecadienoyl)-sn-glycerol C39H68O5 616.51 Glycerolipids [GL] Diradylglycerols [GL02] Diacylglycerols [GL0201]
|
8
|
+
LMGL02010064 DG(18:1(9Z)/18:3(9Z,12Z,15Z)/0:0)[iso2] 1-(9Z-octadecenoyl)-2-(9Z,12Z,15Z-octadecatrienoyl)-sn-glycerol C39H68O5 616.51 Glycerolipids [GL] Diradylglycerols [GL02] Diacylglycerols [GL0201]
|
9
|
+
LMGL02010065 DG(18:0/19:0/0:0)[iso2] 1-octadecanoyl-2-nonadecanoyl-sn-glycerol C40H78O5 638.58 Glycerolipids [GL] Diradylglycerols [GL02] Diacylglycerols [GL0201]
|
10
|
+
MGP01011596 PC(18:1(9Z)/13:0) 1-(9Z-octadecenoyl)-2-tridecanoyl-glycero-3-phosphocholine C39H76NO8P 717.53 Glycerophospholipids [GP] Glycerophosphocholines [GP01] Diacylglycerophosphocholines [GP0101]
|
11
|
+
LMGP01011597 PC(18:1(9Z)/14:1(9Z)) 1-(9Z-octadecenoyl)-2-(9Z-tetradecenoyl)-glycero-3-phosphocholine C40H76NO8P 729.53 Glycerophospholipids [GP] Glycerophosphocholines [GP01] Diacylglycerophosphocholines [GP0101]
|
12
|
+
LMGP01011598 PC(18:1(9Z)/15:0) 1-(9Z-octadecenoyl)-2-pentadecanoyl-glycero-3-phosphocholine C41H80NO8P 745.56 Glycerophospholipids [GP] Glycerophosphocholines [GP01] Diacylglycerophosphocholines [GP0101]
|
13
|
+
LMGP01011599 PC(18:1(9Z)/15:1(9Z)) 1-(9Z-octadecenoyl)-2-(9Z-pentadecenoyl)-glycero-3-phosphocholine C41H78NO8P 743.55 Glycerophospholipids [GP] Glycerophosphocholines [GP01] Diacylglycerophosphocholines [GP0101]
|
14
|
+
LMGP01011600 PC(18:1(9Z)/17:0) 1-(9Z-octadecenoyl)-2-heptadecanoyl-glycero-3-phosphocholine C43H84NO8P 773.59 Glycerophospholipids [GP] Glycerophosphocholines [GP01] Diacylglycerophosphocholines [GP0101]
|
15
|
+
MSP0502AN07 - Galalpha1-3Galalpha1-3Galalpha1-3Galalpha1-4Galbeta1-4Glcbeta-Cer(d18:1/24:1(15Z)) C78H141NO33 1619.94 Sphingolipids [SP] Neutral glycosphingolipids [SP05] GalNAcbeta1-3Galalpha1-4Galbeta1-4Glc- (Globo series) [SP0502]
|
16
|
+
LMSP0502AN08 - Galalpha1-3Galalpha1-3Galalpha1-3Galalpha1-4Galbeta1-4Glcbeta-Cer(d18:1/26:1(17Z)) C80H145NO33 1647.97 Sphingolipids [SP] Neutral glycosphingolipids [SP05] GalNAcbeta1-3Galalpha1-4Galbeta1-4Glc- (Globo series) [SP0502]
|
17
|
+
LMSP0502AO00 - Galbeta1-3Galalpha1-4Galbeta1-4Glcbeta-Cer - - Sphingolipids [SP] Neutral glycosphingolipids [SP05] GalNAcbeta1-3Galalpha1-4Galbeta1-4Glc- (Globo series) [SP0502]
|
18
|
+
LMSP0502AO01 - Galbeta1-3Galalpha1-4Galbeta1-4Glcbeta-Cer(d18:1/16:0) C58H107NO23 1185.72 Sphingolipids [SP] Neutral glycosphingolipids [SP05] GalNAcbeta1-3Galalpha1-4Galbeta1-4Glc- (Globo series) [SP0502]
|
19
|
+
MST02020021 Dehydroepiandrosterone 3beta-hydroxyandrost-5-en-17-one C19H28O2 288.21 Sterol Lipids [ST] Steroids [ST02] C19 steroids (androgens) and derivatives [ST0202]
|
20
|
+
LMST02020023 Epiandrosterone 3beta-hydroxy-androstan-17-one C19H30O2 290.22 Sterol Lipids [ST] Steroids [ST02] C19 steroids (androgens) and derivatives [ST0202]
|
21
|
+
LMST02020025 Fluoxymesterone 9alpha-fluoro-11beta,17beta-dihydroxy-17-methylandrost-4-en-3-one C20H29FO3 336.21 Sterol Lipids [ST] Steroids [ST02] C19 steroids (androgens) and derivatives [ST0202]
|
22
|
+
LMST02020026 Formyldienolone 11alpha,17beta-dihydroxy-17-methyl-3-oxoandrosta-1,4-diene-2-carboxaldehyde C21H28O4 344.20 Sterol Lipids [ST] Steroids [ST02] C19 steroids (androgens) and derivatives [ST0202]
|
23
|
+
LMST02020027 Mestanolone 17beta-hydroxy-17-methyl-androstan-3-one C20H32O2 304.24 Sterol Lipids [ST] Steroids [ST02] C19 steroids (androgens) and derivatives [ST0202]
|
24
|
+
MPR03010008 Decaprenol 3,7,11,15,19,23,27,31,35,39-decamethyltetraconta-2Z,6Z,10Z,14Z,18Z,22Z,26Z,30Z,34E,38-decaen-1-ol C50H82O 698.64 Prenol Lipids [PR] Polyprenols [PR03] Bactoprenols [PR0301]
|
25
|
+
LMPR03020001 undecaprenyl phosphate 3,7,11,15,19,23,27,31,35,39,43-undecamethyltetratetraconta-2Z,6Z,10Z,14Z,18Z,22Z,26Z,30Z,34E,38E,42-undecaen-1-yl phosphate C55H91O4P 846.67 Prenol Lipids [PR] Polyprenols [PR03] Bactoprenol monophosphates [PR0302]
|
26
|
+
LMPR03020002 Undecaprenyl phosphate mannose Alpha-mannosyl-undecaprenyl phosphate C61H101O9P 1008.72 Prenol Lipids [PR] Polyprenols [PR03] Bactoprenol monophosphates [PR0302]
|
27
|
+
LMPR03020004 Dodecaprenyl phosphate-galacturonic acid - C66H107O10P 1090.76 Prenol Lipids [PR] Polyprenols [PR03] Bactoprenol monophosphates [PR0302]
|
28
|
+
MPK12050384 Irisolidone 5,7-Dihydroxy-6,4'-dimethoxyisoflavone C17H14O6 314.08 Polyketides [PK] Flavonoids [PK12] Isoflavonoids [PK1205]
|
29
|
+
LMPK12050385 Tectorigenin 5,7,4'-Trihydroxy-6-methoxyisoflavone C16H12O6 300.06 Polyketides [PK] Flavonoids [PK12] Isoflavonoids [PK1205]
|
30
|
+
LMPK12050386 6-Hydroxygenistein - C15H10O6 286.05 Polyketides [PK] Flavonoids [PK12] Isoflavonoids [PK1205]
|
31
|
+
LMPK12050387 Irisolone 4'-Hydroxy-5-methoxy-6,7-methylenedioxyisoflavone C17H12O6 312.06 Polyketides [PK] Flavonoids [PK12] Isoflavonoids [PK1205]
|
32
|
+
LMPK12050388 7-O-Methyltectorigenin 5,4'-Dihydroxy-6,7-dimethoxyisoflavone C17H14O6 314.08 Polyketides [PK] Flavonoids [PK12] Isoflavonoids [PK1205]
|
metadata
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ms-lipidomics
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- John T. Prince
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-02-24 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &7075680 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.3.0
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *7075680
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: bundler
|
27
|
+
requirement: &7075180 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.0.0
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *7075180
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: jeweler
|
38
|
+
requirement: &7074680 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ~>
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.6.4
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *7074680
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: rcov
|
49
|
+
requirement: &7074200 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *7074200
|
58
|
+
description: does ms lipidomics
|
59
|
+
email: jtprince@gmail.com
|
60
|
+
executables:
|
61
|
+
- lipidomic-search.rb
|
62
|
+
extensions: []
|
63
|
+
extra_rdoc_files:
|
64
|
+
- LICENSE
|
65
|
+
- README.rdoc
|
66
|
+
files:
|
67
|
+
- .document
|
68
|
+
- .rspec
|
69
|
+
- LICENSE
|
70
|
+
- README.rdoc
|
71
|
+
- Rakefile
|
72
|
+
- bin/lipidomic-search.rb
|
73
|
+
- lib/ms/lipid.rb
|
74
|
+
- lib/ms/lipid/ion.rb
|
75
|
+
- lib/ms/lipid/modification.rb
|
76
|
+
- lib/ms/lipid/search.rb
|
77
|
+
- lib/ms/lipid/search/bin.rb
|
78
|
+
- lib/ms/lipid/search/db_isobar_group.rb
|
79
|
+
- lib/ms/lipid/search/hit.rb
|
80
|
+
- lib/ms/lipid/search/probability_distribution.rb
|
81
|
+
- lib/ms/lipid/search/query.rb
|
82
|
+
- lib/ms/lipid_maps.rb
|
83
|
+
- spec/ms/lipid/ion_spec.rb
|
84
|
+
- spec/ms/lipid/modification_spec.rb
|
85
|
+
- spec/ms/lipid/search_spec.rb
|
86
|
+
- spec/ms/lipid_maps_spec.rb
|
87
|
+
- spec/ms/lipid_spec.rb
|
88
|
+
- spec/spec_helper.rb
|
89
|
+
- spec/testfiles/lipidmaps_short.tsv
|
90
|
+
homepage: http://github.com/princelab/ms-lipidomics
|
91
|
+
licenses:
|
92
|
+
- MIT
|
93
|
+
post_install_message:
|
94
|
+
rdoc_options: []
|
95
|
+
require_paths:
|
96
|
+
- lib
|
97
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
98
|
+
none: false
|
99
|
+
requirements:
|
100
|
+
- - ! '>='
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
+
none: false
|
105
|
+
requirements:
|
106
|
+
- - ! '>='
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
109
|
+
requirements: []
|
110
|
+
rubyforge_project:
|
111
|
+
rubygems_version: 1.8.15
|
112
|
+
signing_key:
|
113
|
+
specification_version: 3
|
114
|
+
summary: mass spectrometry based lipidomics - especially shotgun lipidomics
|
115
|
+
test_files: []
|