ms-lipidomics 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +1 -0
- data/LICENSE +21 -0
- data/README.rdoc +13 -0
- data/Rakefile +60 -0
- data/bin/lipidomic-search.rb +126 -0
- data/lib/ms/lipid.rb +19 -0
- data/lib/ms/lipid/ion.rb +39 -0
- data/lib/ms/lipid/modification.rb +112 -0
- data/lib/ms/lipid/search.rb +203 -0
- data/lib/ms/lipid/search/bin.rb +79 -0
- data/lib/ms/lipid/search/db_isobar_group.rb +20 -0
- data/lib/ms/lipid/search/hit.rb +75 -0
- data/lib/ms/lipid/search/probability_distribution.rb +49 -0
- data/lib/ms/lipid/search/query.rb +23 -0
- data/lib/ms/lipid_maps.rb +31 -0
- data/spec/ms/lipid/ion_spec.rb +23 -0
- data/spec/ms/lipid/modification_spec.rb +41 -0
- data/spec/ms/lipid/search_spec.rb +79 -0
- data/spec/ms/lipid_maps_spec.rb +19 -0
- data/spec/ms/lipid_spec.rb +16 -0
- data/spec/spec_helper.rb +13 -0
- data/spec/testfiles/lipidmaps_short.tsv +32 -0
- metadata +115 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2012 Brigham Young University
|
2
|
+
authored by: John T. Prince
|
3
|
+
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
a copy of this software and associated documentation files (the
|
6
|
+
"Software"), to deal in the Software without restriction, including
|
7
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rake'
|
5
|
+
|
6
|
+
require 'jeweler'
|
7
|
+
Jeweler::Tasks.new do |gem|
|
8
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
9
|
+
gem.name = "ms-lipidomics"
|
10
|
+
gem.homepage = "http://github.com/princelab/ms-lipidomics"
|
11
|
+
gem.license = "MIT"
|
12
|
+
gem.summary = %Q{mass spectrometry based lipidomics - especially shotgun lipidomics}
|
13
|
+
gem.description = %Q{does ms lipidomics}
|
14
|
+
gem.email = "jtprince@gmail.com"
|
15
|
+
gem.authors = ["John T. Prince"]
|
16
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
17
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
18
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
19
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
20
|
+
gem.add_development_dependency "rspec", "~> 2.3.0"
|
21
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
22
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
23
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
24
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
25
|
+
gem.add_development_dependency "bundler", "~> 1.0.0"
|
26
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
27
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
28
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
29
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
30
|
+
gem.add_development_dependency "jeweler", "~> 1.6.4"
|
31
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
32
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
33
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
34
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
35
|
+
gem.add_development_dependency "rcov", ">= 0"
|
36
|
+
end
|
37
|
+
Jeweler::RubygemsDotOrgTasks.new
|
38
|
+
|
39
|
+
require 'rspec/core'
|
40
|
+
require 'rspec/core/rake_task'
|
41
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
42
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
43
|
+
end
|
44
|
+
|
45
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
46
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
47
|
+
spec.rcov = true
|
48
|
+
end
|
49
|
+
|
50
|
+
task :default => :spec
|
51
|
+
|
52
|
+
require 'rdoc/task'
|
53
|
+
Rake::RDocTask.new do |rdoc|
|
54
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
55
|
+
|
56
|
+
rdoc.rdoc_dir = 'rdoc'
|
57
|
+
rdoc.title = "ms-lipidomics #{version}"
|
58
|
+
rdoc.rdoc_files.include('README*')
|
59
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
60
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'trollop'
|
4
|
+
require 'ms/mzml'
|
5
|
+
require 'ms/lipid/search'
|
6
|
+
require 'ms/lipid/ion'
|
7
|
+
require 'ms/lipid/search/query'
|
8
|
+
require 'ms/lipid_maps'
|
9
|
+
|
10
|
+
# for html output: (just make the id clickable)
|
11
|
+
LIPIDMAPS_SEARCH = "http://www.lipidmaps.org/data/LMSDRecord.php?LMID="
|
12
|
+
|
13
|
+
DEFAULTS = {
|
14
|
+
:bin_width => 5,
|
15
|
+
:bin_unit => :ppm,
|
16
|
+
:search_unit => :ppm,
|
17
|
+
}
|
18
|
+
|
19
|
+
class Sample
|
20
|
+
attr_accessor :file
|
21
|
+
attr_accessor :spectrum
|
22
|
+
def initialize(file, merge_opts={})
|
23
|
+
@file = file
|
24
|
+
@spectrum = merge_ms1_spectra(file, DEFAULTS.merge(merge_opts))
|
25
|
+
end
|
26
|
+
|
27
|
+
# returns a single spectrum object
|
28
|
+
def merge_ms1_spectra(file, opts)
|
29
|
+
spectra = []
|
30
|
+
warn "using number of peaks as proxy for ms level right now"
|
31
|
+
MS::Mzml.foreach(file) do |spectrum|
|
32
|
+
spectra << spectrum if spectrum.mzs.size > 1000 # <<<<<<------ kludge for ms_level == 1
|
33
|
+
end
|
34
|
+
spectra.each {|spectrum| spectrum.sort! }
|
35
|
+
|
36
|
+
MS::Spectrum.merge(spectra, opts)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
ext = ".lipidID.tsv"
|
41
|
+
|
42
|
+
parser = Trollop::Parser.new do
|
43
|
+
banner "usage: #{File.basename(__FILE__)} [OPTIONS] <lipidmaps>.tsv <file>.mzML ..."
|
44
|
+
text "output: <file>#{ext} ..."
|
45
|
+
text ""
|
46
|
+
text "note that sometimes you get an error from R like this:"
|
47
|
+
text "(`eval': voidEval failed: Packet[cmd=2130771970,len=<nil>, con='<nil>', status=error...)"
|
48
|
+
text "just re-run it and it will work"
|
49
|
+
text ""
|
50
|
+
opt :bin_width, "width of the bins for merging", :default => DEFAULTS[:bin_width]
|
51
|
+
opt :bin_unit, "units for binning (ppm or amu)", :default => DEFAULTS[:bin_unit].to_s
|
52
|
+
opt :search_unit, "unit for searching nearest hit (ppm or amu)", :default => DEFAULTS[:search_unit].to_s
|
53
|
+
opt :top_n_peaks, "the number of highest intensity peaks to query the DB with", :default => 1000
|
54
|
+
opt :display_n, "the number of best hits to display", :default => 20
|
55
|
+
opt :verbose, "talk about it"
|
56
|
+
end
|
57
|
+
|
58
|
+
opts = parser.parse(ARGV)
|
59
|
+
opts[:bin_unit] = opts[:bin_unit].to_sym
|
60
|
+
opts[:search_unit] = opts[:search_unit].to_sym
|
61
|
+
|
62
|
+
if ARGV.size < 2
|
63
|
+
parser.educate
|
64
|
+
exit
|
65
|
+
end
|
66
|
+
|
67
|
+
(lipidmaps, *files) = ARGV
|
68
|
+
|
69
|
+
$VERBOSE = opts[:verbose]
|
70
|
+
|
71
|
+
proton = MS::Lipid::Modification.new(:proton)
|
72
|
+
h2o_loss = MS::Lipid::Modification.new(:water, :loss => true)
|
73
|
+
|
74
|
+
lipids = MS::LipidMaps.parse_file(lipidmaps)
|
75
|
+
|
76
|
+
ions = lipids.map do |lipid|
|
77
|
+
[[proton], [proton, h2o_loss]].map do |mods|
|
78
|
+
MS::Lipid::Ion.new(lipid, mods)
|
79
|
+
end
|
80
|
+
end.flatten(1)
|
81
|
+
|
82
|
+
|
83
|
+
searcher = MS::Lipid::Search.new(ions, :ppm => (opts[:search_unit] == :ppm))
|
84
|
+
|
85
|
+
files.each do |file|
|
86
|
+
base = file.chomp(File.extname(file))
|
87
|
+
puts "processing file: #{file}" if $VERBOSE
|
88
|
+
sample = Sample.new(file, opts)
|
89
|
+
|
90
|
+
num_points = sample.spectrum.mzs.size
|
91
|
+
puts "#{num_points} merged peaks in #{file}" if $VERBOSE
|
92
|
+
|
93
|
+
highest_points = sample.spectrum.points.sort_by(&:last).reverse[0,opts[:top_n_peaks]].sort
|
94
|
+
|
95
|
+
sample.spectrum = MS::Spectrum.from_points( highest_points )
|
96
|
+
|
97
|
+
queries = sample.spectrum.mzs.each_with_index.map {|mz,index| MS::Lipid::Search::Query.new(mz, index) }
|
98
|
+
hit_groups = searcher.search(queries, :return_order => :sorted)
|
99
|
+
|
100
|
+
hit_info = [:qvalue, :pvalue, :observed_mz, :theoretical_mz, :delta, :ppm]
|
101
|
+
second_hit_info = [:ppm]
|
102
|
+
|
103
|
+
output = base + ext
|
104
|
+
puts "writing to #{output}" if $VERBOSE
|
105
|
+
File.open(output, 'w') do |out|
|
106
|
+
out.puts (hit_info + %w(2nd_hit_ppm first_isobar_name num_isobars isobars)).join("\t")
|
107
|
+
hit_groups[0,opts[:display_n]].each_with_index do |hit_group,i|
|
108
|
+
ar = []
|
109
|
+
tophit = hit_group.first
|
110
|
+
ar.push *hit_info.map {|mthd| tophit.send(mthd) }
|
111
|
+
ar.push *second_hit_info.map {|mthd| hit_group[1].send(mthd) }
|
112
|
+
common_name = tophit.db_isobar_group.first.lipid.common_name
|
113
|
+
common_name = tophit.db_isobar_group.first.lipid.systematic_name if common_name == "-"
|
114
|
+
ar.push common_name
|
115
|
+
ar.push tophit.db_isobar_group.size
|
116
|
+
ions = tophit.db_isobar_group.map do |ion|
|
117
|
+
[ion.lipid.lm_id, ion.modifications.map do |mod|
|
118
|
+
(mod.gain? ? '+' : '-') + "(#{mod.charged_formula})"
|
119
|
+
end.join
|
120
|
+
].join(":")
|
121
|
+
end.join(' ')
|
122
|
+
ar.push ions
|
123
|
+
out.puts ar.join("\t")
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
data/lib/ms/lipid.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
module MS
|
3
|
+
class Lipid
|
4
|
+
def self.members
|
5
|
+
[:lm_id,:common_name,:systematic_name,:formula,:mass,:category,:main_class,:sub_class]
|
6
|
+
end
|
7
|
+
|
8
|
+
members.each {|mem| attr_accessor mem }
|
9
|
+
|
10
|
+
def initialize(*args)
|
11
|
+
(@lm_id,@common_name,@systematic_name,@formula,@mass,@category,@main_class,@sub_class) = args
|
12
|
+
end
|
13
|
+
|
14
|
+
def inspect
|
15
|
+
cut_common_name = (common_name.size <= 20) ? common_name : (common_name[0,20]+"...")
|
16
|
+
"<#{lm_id}: #{formula}: #{mass} #{cut_common_name}>"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/ms/lipid/ion.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
|
2
|
+
module MS
|
3
|
+
class Lipid
|
4
|
+
# a lipid with modifications (typically the mods give it a charge so that
|
5
|
+
# it can be seen in the mass spec)
|
6
|
+
class Ion
|
7
|
+
# an MS::Lipid object
|
8
|
+
attr_accessor :lipid
|
9
|
+
# an MS::Lipid::Modifications object
|
10
|
+
attr_accessor :modifications
|
11
|
+
# the key attribute of a query
|
12
|
+
|
13
|
+
def initialize(lipid, mods=[])
|
14
|
+
@lipid = lipid
|
15
|
+
@modifications = mods
|
16
|
+
@mz = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def mz
|
20
|
+
return @mz if @mz
|
21
|
+
mass = @lipid.mass
|
22
|
+
charge = 0
|
23
|
+
@modifications.each do |mod|
|
24
|
+
mass += mod.massdiff
|
25
|
+
charge += mod.charge
|
26
|
+
end
|
27
|
+
if charge == 0
|
28
|
+
@mz = nil
|
29
|
+
else
|
30
|
+
@mz = mass / charge
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def inspect
|
35
|
+
"<|| Ion mz=#{mz} #{lipid.inspect} + #{modifications.map(&:inspect).join(', ')} ||>"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
require 'ms/mass'
|
2
|
+
|
3
|
+
module MS
|
4
|
+
class Lipid
|
5
|
+
|
6
|
+
|
7
|
+
# the convention is all mods are gains unless the name ends in an
|
8
|
+
# underscore
|
9
|
+
class Modification
|
10
|
+
|
11
|
+
# given a string with a formula and charge, returns the formula portion
|
12
|
+
# and the charges (as a signed integer)
|
13
|
+
def self.formula_and_charge(string)
|
14
|
+
md = string.match(/([^+]*)(\+*)$/)
|
15
|
+
charges_string = md[2]
|
16
|
+
if charges_string.nil?
|
17
|
+
0
|
18
|
+
else
|
19
|
+
charges_string.count(charges_string[0])
|
20
|
+
int = -int if charges_string[0] == '-'
|
21
|
+
end
|
22
|
+
[md[1], int]
|
23
|
+
end
|
24
|
+
|
25
|
+
# calculates the mass diff. For every positive charge the mass of an
|
26
|
+
# electron is subtracted; for every negative charge the mass of an
|
27
|
+
# electron is added. If gain is false, then the mass diff will be
|
28
|
+
# negative.
|
29
|
+
def self.massdiff(formula, charge, gain=true)
|
30
|
+
MS::Mass.formula_to_exact_mass(formula)
|
31
|
+
massdiff = MS::Mass.formula_to_exact_mass(formula)
|
32
|
+
massdiff -= (charge * MS::Mass::ELECTRON) # + charge subtracts, - charge adds
|
33
|
+
massdiff = -massdiff unless gain
|
34
|
+
massdiff
|
35
|
+
end
|
36
|
+
|
37
|
+
# the charge on the mod should be represented by the number of plusses
|
38
|
+
# or minuses after the formula (Li+ for a +1 charge Lithium or H2++, 2
|
39
|
+
# protons with a total of 2 charges)
|
40
|
+
FORMULAS = {
|
41
|
+
:proton => 'H',
|
42
|
+
:ammonium => 'NH3H',
|
43
|
+
:lithium => 'Li',
|
44
|
+
:water => 'H2O',
|
45
|
+
}
|
46
|
+
CHARGE = {
|
47
|
+
:proton => 1,
|
48
|
+
:ammonium => 1,
|
49
|
+
:lithium => 1,
|
50
|
+
:water => 0,
|
51
|
+
}
|
52
|
+
|
53
|
+
# determined by running formulas through MS::Mass.massdiff
|
54
|
+
MASSDIFFS = {}
|
55
|
+
FORMULAS.each do |name, formula|
|
56
|
+
MASSDIFFS[name] = self.massdiff(formula, CHARGE[name])
|
57
|
+
end
|
58
|
+
|
59
|
+
# as a symbol
|
60
|
+
attr_accessor :name
|
61
|
+
# as a molecular formula
|
62
|
+
attr_accessor :formula
|
63
|
+
# negative indicates a loss
|
64
|
+
attr_accessor :massdiff
|
65
|
+
# the charge
|
66
|
+
attr_accessor :charge
|
67
|
+
|
68
|
+
# if no mass or formula is given then it searches command mods for the name
|
69
|
+
# @param [Symbol] name the name of the mod
|
70
|
+
# A number of opts are expected if they are not found in the FORMULAS,
|
71
|
+
# CHARGE, or MASSDIFFS hashes:
|
72
|
+
#
|
73
|
+
# attributes:
|
74
|
+
# :formula = the chemical formula, lipidmaps style ("C2H4BrO")
|
75
|
+
# :massdiff = +/-Float
|
76
|
+
# :charge = +/- Integer
|
77
|
+
#
|
78
|
+
# instruction:
|
79
|
+
# :loss = true flips the mass diff sign during initialization
|
80
|
+
# necessary to get negative massdiff on named molecule
|
81
|
+
# (unnecessary if you input massdiff manually)
|
82
|
+
def initialize(name, opts={})
|
83
|
+
@name = name
|
84
|
+
@formula = opts[:formula] || FORMULAS[name]
|
85
|
+
@massdiff = opts[:massdiff] || MASSDIFFS[name]
|
86
|
+
@charge = opts[:charge] || CHARGE[name]
|
87
|
+
# necessary if you are using a named molecule and you want its loss
|
88
|
+
# rather than gain (i.e., you want a negative massdiff)
|
89
|
+
@massdiff = -@massdiff if opts[:loss]
|
90
|
+
end
|
91
|
+
|
92
|
+
def charged_formula
|
93
|
+
@formula + @charge.abs.times.map { (@charge > 0) ? '+' : '-' }.join
|
94
|
+
end
|
95
|
+
|
96
|
+
def gain?
|
97
|
+
massdiff > 0
|
98
|
+
end
|
99
|
+
|
100
|
+
def loss?
|
101
|
+
!gain?
|
102
|
+
end
|
103
|
+
|
104
|
+
def inspect
|
105
|
+
"<Mod: #{charged_formula}>"
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
|
@@ -0,0 +1,203 @@
|
|
1
|
+
require 'ms/spectrum'
|
2
|
+
require 'rserve/simpler' # TODO: move to integrated interface with rserve when available
|
3
|
+
require 'core_ext/array/in_groups'
|
4
|
+
require 'ms/lipid/search/hit'
|
5
|
+
require 'ms/lipid/search/bin'
|
6
|
+
require 'ms/lipid/modification'
|
7
|
+
require 'ms/lipid/search/probability_distribution'
|
8
|
+
|
9
|
+
module MS
|
10
|
+
class Lipid
|
11
|
+
class Search
|
12
|
+
STANDARD_MODIFICATIONS = {
|
13
|
+
:proton => [1,2],
|
14
|
+
:ammonium => [1],
|
15
|
+
:lithium => [1],
|
16
|
+
:water => [1,2],
|
17
|
+
}
|
18
|
+
STANDARD_SEARCH = {
|
19
|
+
:units => :ppm,
|
20
|
+
:query_min_count_per_bin => 500, # min number of peaks per bin
|
21
|
+
:num_rand_samples_per_bin => 1000,
|
22
|
+
:num_nearest => 2,
|
23
|
+
:return_order => :as_given, # or :sorted
|
24
|
+
}
|
25
|
+
|
26
|
+
attr_accessor :options
|
27
|
+
attr_accessor :search_function
|
28
|
+
|
29
|
+
# will generate PossibleLipid objects and return a new search object
|
30
|
+
# uses only one kind of loss at a time and one type of gain at a time
|
31
|
+
# will also do the combination of a gain and a loss if gain_and_loss is
|
32
|
+
# true
|
33
|
+
def self.generate_simple_queries(lipids, mods=STANDARD_MODIFICATIONS, gain_and_loss=false)
|
34
|
+
possible_lipids = []
|
35
|
+
real_mods_and_cnts = mods.map {|name, cnts| [MS::Lipid::Modification.new(name), cnts] }
|
36
|
+
# one of each
|
37
|
+
real_mods_and_cnts.each do |mod, counts|
|
38
|
+
counts.each do |cnt|
|
39
|
+
possible_lipids << MS::Lipid::Search::Query.new(lipid, Array.new(cnt, mod))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
if gain_and_loss
|
43
|
+
# one of each gain + one of each loss
|
44
|
+
(gain_mod_cnt_pairs, loss_mod_cnt_pairs) = real_mods_and_cnts.partition {|mod, count| mod.gain }
|
45
|
+
gain_mod_cnt_pairs.each do |mod, cnt|
|
46
|
+
lipids.each do |lipid|
|
47
|
+
#### need to implement still (use combinations or something...)
|
48
|
+
get_this_working!
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
self.new(possible_lipids)
|
53
|
+
end
|
54
|
+
|
55
|
+
# ions are MS::Lipid::Ion objects
|
56
|
+
# each one should give a non-nil m/z value
|
57
|
+
def initialize(ions=[], opts={})
|
58
|
+
@options = STANDARD_SEARCH.merge(opts)
|
59
|
+
@db_isobar_spectrum = create_db_isobar_spectrum(ions)
|
60
|
+
@search_function = create_search_function(ions, @options)
|
61
|
+
end
|
62
|
+
|
63
|
+
# returns an array of HitGroup and a parallel array of BH derived
|
64
|
+
# q-values (will switch to Storey soon enough). The HitGroups are
|
65
|
+
# returned in the order in which the mz_values are given.
|
66
|
+
# assumes search_queries are in ascending m/z order
|
67
|
+
def search(search_queries, opts={})
|
68
|
+
opt = @options.merge( opts )
|
69
|
+
hit_groups = @search_function.call(search_queries, opt[:num_nearest])
|
70
|
+
sorted_hit_groups = qvalues!(hit_groups, opt)
|
71
|
+
case opts[:return_order]
|
72
|
+
when :as_given
|
73
|
+
hit_groups
|
74
|
+
when :sorted
|
75
|
+
sorted_hit_groups
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def qvalues!(hit_groups, opts)
|
80
|
+
|
81
|
+
# from http://stats.stackexchange.com/questions/870/multiple-hypothesis-testing-correction-with-benjamini-hochberg-p-values-or-q-va
|
82
|
+
# but I've already coded this up before, too, in multiple ways...
|
83
|
+
prev_bh_value = 0
|
84
|
+
num_total_tests = hit_groups.size
|
85
|
+
|
86
|
+
#hit_groups.each {|hg| p [hg.first.pvalue, hg] }
|
87
|
+
|
88
|
+
# calculate Q-values BH style for now:
|
89
|
+
# first hit is the best hit in the group
|
90
|
+
pval_hg_index_tuples = hit_groups.each_with_index.map {|hg,i| [hg.pvalue, hg.delta.abs, hg.ppm.abs, i, hg] }
|
91
|
+
|
92
|
+
if pval_hg_index_tuples.any? {|pair| pair.first.nan? }
|
93
|
+
$stderr.puts "pvalue of NaN!"
|
94
|
+
$stderr.puts ">>> Consider increasing query_min_count_per_bin or setting ppm to false <<<"
|
95
|
+
raise
|
96
|
+
end
|
97
|
+
|
98
|
+
sorted_pval_index_tuples = pval_hg_index_tuples.sort
|
99
|
+
|
100
|
+
sorted_pval_index_tuples.each_with_index do |tuple,i|
|
101
|
+
pval = tuple.first
|
102
|
+
bh_value = pval * num_total_tests / (i + 1)
|
103
|
+
# Sometimes this correction can give values greater than 1,
|
104
|
+
# so we set those values at 1
|
105
|
+
bh_value = [bh_value, 1].min
|
106
|
+
|
107
|
+
# To preserve monotonicity in the values, we take the
|
108
|
+
# maximum of the previous value or this one, so that we
|
109
|
+
# don't yield a value less than the previous.
|
110
|
+
bh_value = [bh_value, prev_bh_value].max
|
111
|
+
prev_bh_value = bh_value
|
112
|
+
tuple.last.first.qvalue = bh_value # give the top hit the q-value
|
113
|
+
end
|
114
|
+
|
115
|
+
sorted_pval_index_tuples.map(&:last)
|
116
|
+
end
|
117
|
+
|
118
|
+
def create_search_function(ions, opt)
|
119
|
+
|
120
|
+
db_isobar_spectrum = create_db_isobar_spectrum(ions)
|
121
|
+
|
122
|
+
search_bins = create_search_bins(db_isobar_spectrum, opt[:query_min_count_per_bin])
|
123
|
+
|
124
|
+
create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, opt[:num_rand_samples_per_bin], opt[:ppm])
|
125
|
+
|
126
|
+
# create the actual search function
|
127
|
+
# returns an array of hit_groups
|
128
|
+
lambda do |search_queries, num_nearest_hits|
|
129
|
+
Bin.bin(search_bins, search_queries, &:mz)
|
130
|
+
search_bins_with_data = search_bins.reject {|bin| bin.data.empty? }
|
131
|
+
hit_groups = search_bins_with_data.map {|bin| bin.queries_to_hit_groups!(opt[:num_nearest]) }.flatten(1)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
#####################################################
|
136
|
+
# Ancillary to create_search_function:
|
137
|
+
#####################################################
|
138
|
+
|
139
|
+
# returns a DB isobar spectrum where the m/z values are all the m/z
|
140
|
+
# values to search for and the intensities each an array corresponding
|
141
|
+
# to all the lipid ions matching that m/z value
|
142
|
+
def create_db_isobar_spectrum(ions)
|
143
|
+
mzs = [] ; query_groups = []
|
144
|
+
pairs = ions.group_by(&:mz).sort_by(&:first)
|
145
|
+
pairs.each {|mz, ar| mzs << mz ; query_groups << ar }
|
146
|
+
MS::Spectrum.new([mzs, query_groups])
|
147
|
+
end
|
148
|
+
|
149
|
+
# use_ppm uses ppm or amu if false
|
150
|
+
# returns the search_bins
|
151
|
+
def create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, num_rand_samples_per_bin, use_ppm=true)
|
152
|
+
search_bins.each do |search_bin|
|
153
|
+
rng = Random.new
|
154
|
+
random_mzs = num_rand_samples_per_bin.times.map { rng.rand(search_bin.to_range) }
|
155
|
+
# find the deltas
|
156
|
+
diffs = random_mzs.map do |random_mz|
|
157
|
+
nearest_random_mz = db_isobar_spectrum.find_nearest(random_mz)
|
158
|
+
delta = (random_mz - nearest_random_mz).abs
|
159
|
+
use_ppm ? delta./(nearest_random_mz).*(1e6) : delta
|
160
|
+
end
|
161
|
+
search_bin.probability_distribution = ProbabilityDistribution.deviations_to_probability_distribution((use_ppm ? :ppm : :amu), diffs)
|
162
|
+
end
|
163
|
+
search_bins
|
164
|
+
end
|
165
|
+
|
166
|
+
def create_search_bins(db_isobar_spectrum, min_n_per_bin)
|
167
|
+
# make sure we get the right bin size based on the input
|
168
|
+
ss = db_isobar_spectrum.mzs.size ; optimal_num_groups = 1
|
169
|
+
(1..ss).each do |divisions|
|
170
|
+
if (ss.to_f / divisions) >= min_n_per_bin
|
171
|
+
optimal_num_groups = divisions
|
172
|
+
else ; break
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
mz_ranges = []
|
177
|
+
prev = nil
|
178
|
+
|
179
|
+
groups = db_isobar_spectrum.points.in_groups(optimal_num_groups,false).to_a
|
180
|
+
|
181
|
+
case groups.size
|
182
|
+
when 0
|
183
|
+
raise 'I think you need some data in your query spectrum!'
|
184
|
+
when 1
|
185
|
+
group = groups.first
|
186
|
+
[ MS::Lipid::Search::Bin.new( Range.new(group.first.first, group.last.first), db_isobar_spectrum ) ]
|
187
|
+
else
|
188
|
+
search_bins = groups.each_cons(2).map do |points1, points2|
|
189
|
+
bin = MS::Lipid::Search::Bin.new( Range.new(points1.first.first, points2.first.first, true), db_isobar_spectrum )
|
190
|
+
prev = points2
|
191
|
+
bin
|
192
|
+
end
|
193
|
+
_range = Range.new(prev.first.first, prev.last.first)
|
194
|
+
search_bins << MS::Lipid::Search::Bin.new(_range, db_isobar_spectrum) # inclusive
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
|
203
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'bin'
|
2
|
+
|
3
|
+
module MS
|
4
|
+
class Lipid
|
5
|
+
class Search
|
6
|
+
|
7
|
+
# A Search::Bin is a range that contains the *entire* query spectrum
|
8
|
+
# (not just the portion covered by the range). the query spectrum, and
|
9
|
+
# a ProbabilityDistribution -- the probability that a peak's delta to
|
10
|
+
# nearest peak is that small by chance.
|
11
|
+
class Bin < ::Bin
|
12
|
+
# the intensity value of the query spectrum should be a query
|
13
|
+
attr_accessor :db_spectrum
|
14
|
+
attr_accessor :probability_distribution
|
15
|
+
|
16
|
+
def initialize(range_obj, db_spectrum)
|
17
|
+
super(range_obj.begin, range_obj.end, range_obj.exclude_end?)
|
18
|
+
@db_spectrum = db_spectrum
|
19
|
+
end
|
20
|
+
|
21
|
+
def <<(query)
|
22
|
+
@data << query
|
23
|
+
end
|
24
|
+
|
25
|
+
# returns the nearest num_hits MS::Lipid::Search::Hits sorted by delta
|
26
|
+
# [with tie going to the lower m/z]
|
27
|
+
# searches all queries and removes them from the data queue
|
28
|
+
def queries_to_hit_groups!(num_hits=1)
|
29
|
+
queries = @data.dup
|
30
|
+
@data.clear
|
31
|
+
|
32
|
+
@db_isobar_groups_by_index = @db_spectrum.intensities
|
33
|
+
|
34
|
+
hit_groups = queries.map do |query|
|
35
|
+
best_hits(query, num_hits)
|
36
|
+
end
|
37
|
+
|
38
|
+
all_top_hits = hit_groups.map(&:first)
|
39
|
+
|
40
|
+
# updates the pvalues for all the hits
|
41
|
+
pvalues = probability_distribution.pvalues( all_top_hits )
|
42
|
+
all_top_hits.zip(pvalues) {|hit, pvalue| hit.pvalue = pvalue }
|
43
|
+
|
44
|
+
hit_groups
|
45
|
+
end
|
46
|
+
|
47
|
+
# returns a HitGroup object
|
48
|
+
def best_hits(query, num_hits)
|
49
|
+
query_mz = query.mz
|
50
|
+
#puts "MZ: #{query_mz}"
|
51
|
+
db_mzs = @db_spectrum.mzs
|
52
|
+
index = @db_spectrum.find_nearest_index(query_mz)
|
53
|
+
_min = index - (num_hits-1)
|
54
|
+
(_min >= 0) || (_min = 0)
|
55
|
+
_max = index + (num_hits-1)
|
56
|
+
(_max < db_mzs.size) || (_max = @db_spectrum - 1)
|
57
|
+
delta_index_pairs = (_min.._max).map {|i| [query_mz.-(db_mzs[i]).abs, i] }
|
58
|
+
closest_delta_index_pairs = delta_index_pairs.sort
|
59
|
+
top_num_hits_delta_index_pairs = closest_delta_index_pairs[0, num_hits]
|
60
|
+
top_num_hit_indices = top_num_hits_delta_index_pairs.map(&:last)
|
61
|
+
hit_group = top_num_hit_indices.map do |index|
|
62
|
+
Hit.new( :db_isobar_group => @db_isobar_groups_by_index[index], :observed_mz => query_mz)
|
63
|
+
end
|
64
|
+
HitGroup.new(hit_group)
|
65
|
+
end
|
66
|
+
|
67
|
+
def inspect
|
68
|
+
"<(#{super}) @db_spectrum(points size)=#{db_spectrum.mzs.size} @probability_distribution=#{probability_distribution}>"
|
69
|
+
end
|
70
|
+
|
71
|
+
def to_range
|
72
|
+
Range.new( self.begin, self.end, self.exclude_end? )
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
|
2
|
+
module MS
|
3
|
+
class Lipid
|
4
|
+
class Search
|
5
|
+
# this is a group of Lipid::Ion objects that all have the same (or
|
6
|
+
# possibly similar) m/z
|
7
|
+
class DBIsobarGroup < Array
|
8
|
+
# it is implemented like this so that the isobar group *could* have
|
9
|
+
# individuals in it with slightly different m/z values and this coudl
|
10
|
+
# still be used as a container. In my current implementation they
|
11
|
+
# have exactly the same m/z
|
12
|
+
attr_accessor :mz
|
13
|
+
def initialize( ar=[], mz=nil)
|
14
|
+
@mz = mz if mz
|
15
|
+
self.replace(ar)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
|
2
|
+
module MS
|
3
|
+
class Lipid
|
4
|
+
class Search
|
5
|
+
class Hit
|
6
|
+
# the db_isobar_group this hit is associated with. Each hit is only
|
7
|
+
# associated with a single db_isobar_group!
|
8
|
+
attr_accessor :db_isobar_group
|
9
|
+
# the experimental m/z value
|
10
|
+
attr_accessor :observed_mz
|
11
|
+
# the probability the hit is due to random chance
|
12
|
+
attr_accessor :pvalue
|
13
|
+
# the FDR if the threshold accepts this pvalue. Note that this value
|
14
|
+
# is relative to the number of tests performed and not completely
|
15
|
+
# intrinsic to the hit itself.
|
16
|
+
attr_accessor :qvalue
|
17
|
+
|
18
|
+
# the probability distribution that can be used to determine its
|
19
|
+
# pvalue
|
20
|
+
attr_accessor :probability_distribution
|
21
|
+
|
22
|
+
def initialize(hash={})
|
23
|
+
hash.each {|k,v| instance_variable_set("@#{k}", v) }
|
24
|
+
end
|
25
|
+
|
26
|
+
# observed_mz - query m/z
|
27
|
+
def delta
|
28
|
+
@observed_mz - @db_isobar_group.first.mz.to_f
|
29
|
+
end
|
30
|
+
|
31
|
+
alias_method :amu, :delta
|
32
|
+
|
33
|
+
# the absolute value of distance from true val
|
34
|
+
def delta_abs
|
35
|
+
delta.abs
|
36
|
+
end
|
37
|
+
|
38
|
+
# parts per million (divided by theoretical m/z)
|
39
|
+
def ppm
|
40
|
+
(delta / @db_isobar_group.first.mz) * 1e6
|
41
|
+
end
|
42
|
+
|
43
|
+
def theoretical_mz
|
44
|
+
@db_isobar_group.first.mz
|
45
|
+
end
|
46
|
+
|
47
|
+
def inspect
|
48
|
+
"<<#{super} -- <ppm=#{ppm} delta=#{delta} theoretical_mz=#{theoretical_mz}>>"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# A query that matched multiple items. Each search returns a hit group
|
53
|
+
# which consists of the best hits for that experimental m/z. When
|
54
|
+
# queried for values like delta or ppm, it will delegate to the first hit.
|
55
|
+
# So, in many ways it can be used as a container for hits, but it puts
|
56
|
+
# its best face forward.
|
57
|
+
class HitGroup < Array
|
58
|
+
|
59
|
+
# should implement with delegator obviously...
|
60
|
+
# should allow setting ???
|
61
|
+
|
62
|
+
def delta() first.delta end
|
63
|
+
def ppm() first.ppm end
|
64
|
+
def theoretical_mz() first.theoretical_mz end
|
65
|
+
def query_group() first.query_group end
|
66
|
+
def observed_mz() first.observed_mz end
|
67
|
+
def pvalue() first.pvalue end
|
68
|
+
def qvalue() first.pvalue end
|
69
|
+
|
70
|
+
def best_hit() first end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
|
2
|
+
module MS
|
3
|
+
class Lipid
|
4
|
+
class Search
|
5
|
+
class ProbabilityDistribution
|
6
|
+
DEFAULT_TYPE = :ppm
|
7
|
+
R = Rserve::Simpler.new
|
8
|
+
|
9
|
+
# takes location, scale and shape parameters
|
10
|
+
attr_accessor :location, :scale, :shape
|
11
|
+
# type is :ppm or :delta_abs
|
12
|
+
attr_accessor :type
|
13
|
+
def initialize(location, scale, shape, type=DEFAULT_TYPE)
|
14
|
+
@location, @scale, @shape = location, scale, shape
|
15
|
+
@type = type
|
16
|
+
end
|
17
|
+
|
18
|
+
# takes a deviation and returns the pvalue
|
19
|
+
def pvalue(hit)
|
20
|
+
R.converse "pgev(log(#{hit.send(type)}), #{@location}, #{@scale}, #{@shape})"
|
21
|
+
end
|
22
|
+
|
23
|
+
# same as pvalue, just tries to limit the number of calls to R to
|
24
|
+
# speed things up!
|
25
|
+
def pvalues(hits)
|
26
|
+
deltas = hits.map {|v| v.send(type).abs }
|
27
|
+
R.converse("sapply(r_devs, function(elt) pgev(log(elt), #{@location}, #{@scale}, #{@shape}))", :r_devs => deltas)
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.require_r_library(lib)
|
31
|
+
reply = R.converse "library(#{lib})"
|
32
|
+
unless reply.size > 4 # ~roughly
|
33
|
+
$stderr.puts "The libraries ismev and evd must be installed in your R env!"
|
34
|
+
$stderr.puts "From within R (works best if R is started with sudo or root for installing):"
|
35
|
+
$stderr.puts %Q{install.packages("ismev") ; install.packages("evd")}
|
36
|
+
raise "must have R (rserve) and ismev and evd installed!"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# returns an EVD object
|
41
|
+
def self.deviations_to_probability_distribution(type, devs)
|
42
|
+
%w(ismev evd).each {|lib| require_r_library(lib) }
|
43
|
+
params = R.converse("m <- gev.fit(log(devs_r))\n c(m$mle[1], m$mle[2], m$mle[3])", :devs_r => devs )
|
44
|
+
self.new(*params, type)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
module MS
|
5
|
+
class Lipid
|
6
|
+
class Search
|
7
|
+
class Query
|
8
|
+
|
9
|
+
# the experimentally observed lowest mz
|
10
|
+
attr_accessor :mz
|
11
|
+
|
12
|
+
# the index of search spectrum that the m/z was derived from
|
13
|
+
# this allows for the creation of an isotope envelope starting from a
|
14
|
+
# particular m/z value.
|
15
|
+
attr_accessor :index
|
16
|
+
|
17
|
+
def initialize(mz, index)
|
18
|
+
@mz, @index = mz, index
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'ms/lipid'
|
2
|
+
require 'ms/mass'
|
3
|
+
|
4
|
+
module MS
|
5
|
+
module LipidMaps
|
6
|
+
# returns an array of Lipids
|
7
|
+
# if high_res_mass is true (default), then the formula is used to calculate a higher
|
8
|
+
# resolution mass than what is in lipidmaps
|
9
|
+
def self.parse_file(lipidmaps_tsv, high_res_mass=true, skip_clas_defs=true)
|
10
|
+
seen_first_line = false
|
11
|
+
IO.foreach(lipidmaps_tsv).map do |line|
|
12
|
+
line.chomp!
|
13
|
+
pieces = line.split("\t")
|
14
|
+
if pieces[3] !~ /[A-Z]/ # <- there is no formula!
|
15
|
+
nil
|
16
|
+
else
|
17
|
+
if seen_first_line
|
18
|
+
pieces[4] = MS::Mass.formula_to_exact_mass(pieces[3]) if high_res_mass
|
19
|
+
l = MS::Lipid.new *pieces
|
20
|
+
else
|
21
|
+
seen_first_line = true
|
22
|
+
warn "lipidmaps column headers are not right!" unless pieces.map(&:downcase) == MS::Lipid.members.map(&:to_s)
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end.compact
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
|
4
|
+
require 'ms/lipid'
|
5
|
+
require 'ms/lipid/modification'
|
6
|
+
require 'ms/lipid/ion'
|
7
|
+
|
8
|
+
describe MS::Lipid::Ion do
|
9
|
+
before do
|
10
|
+
lipid = MS::Lipid.new
|
11
|
+
lipid.mass = 300.2
|
12
|
+
proton = MS::Lipid::Modification.new(:proton)
|
13
|
+
h2o_loss = MS::Lipid::Modification.new(:water, :loss => true)
|
14
|
+
@plus1 = MS::Lipid::Ion.new(lipid, [proton, h2o_loss])
|
15
|
+
@plus2 = MS::Lipid::Ion.new(lipid, [proton, proton, h2o_loss])
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'calculates the correct m/z' do
|
19
|
+
@plus1.mz.should be_within(1e5).of(283.196711735)
|
20
|
+
@plus2.mz.should be_within(1e5).of(142.101994085)
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/lipid/modification'
|
4
|
+
|
5
|
+
describe MS::Lipid::Modification do
|
6
|
+
Mod = MS::Lipid::Modification
|
7
|
+
|
8
|
+
it 'can create common mods easily' do
|
9
|
+
# water loss
|
10
|
+
water_loss = Mod.new(:water, :loss => true)
|
11
|
+
water_loss.loss?.should be_true
|
12
|
+
water_loss.massdiff.<(0).should be_true
|
13
|
+
water_loss.charge.should == 0
|
14
|
+
water_loss.charged_formula.should == 'H2O'
|
15
|
+
|
16
|
+
# proton gain
|
17
|
+
prot = Mod.new(:proton)
|
18
|
+
prot.gain?.should be_true
|
19
|
+
prot.massdiff.>(0).should be_true
|
20
|
+
prot.charge.should == 1
|
21
|
+
prot.charged_formula.should == 'H+'
|
22
|
+
|
23
|
+
ammon = Mod.new(:ammonium)
|
24
|
+
ammon.gain?.should be_true
|
25
|
+
ammon.massdiff.>(0).should be_true
|
26
|
+
ammon.charge.should == 1
|
27
|
+
ammon.charged_formula.should == 'NH3H+'
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'can create custom mods' do
|
31
|
+
mymod = Mod.new(:super_snazzy)
|
32
|
+
mymod.formula.should be_nil
|
33
|
+
mymod.massdiff.should be_nil
|
34
|
+
mymod.charge.should be_nil
|
35
|
+
|
36
|
+
mymod.formula = 'CH4'
|
37
|
+
mymod.charge = 2
|
38
|
+
mymod.massdiff = MS::Lipid::Modification.massdiff(mymod.formula, mymod.charge)
|
39
|
+
mymod.massdiff.should be_within(1e4).of(16.030202)
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/lipid_maps'
|
4
|
+
require 'ms/lipid/search'
|
5
|
+
require 'ms/lipid/search/query'
|
6
|
+
require 'ms/lipid/modification'
|
7
|
+
|
8
|
+
describe MS::Lipid::Search do
|
9
|
+
before do
|
10
|
+
@proton = MS::Lipid::Modification.new(:proton)
|
11
|
+
@h2o_loss = MS::Lipid::Modification.new(:water, :loss => true)
|
12
|
+
end
|
13
|
+
describe 'searching a section of lipid maps' do
|
14
|
+
before do
|
15
|
+
@lipids = MS::LipidMaps.parse_file(TESTFILES + '/lipidmaps_short.tsv')
|
16
|
+
@ions = @lipids.map do |lipid|
|
17
|
+
[[@proton], [@proton, @h2o_loss]].map do |mods|
|
18
|
+
MS::Lipid::Ion.new(lipid, mods)
|
19
|
+
end
|
20
|
+
end.flatten(1)
|
21
|
+
@samples = Hash[ {
|
22
|
+
:sample1 => [[187.1633, 244.22, 616.51, 717.50],
|
23
|
+
[100, 200, 100, 200]],
|
24
|
+
:sample2 => [[187.164, 396.15, 244.24, 347.28, 618.502],
|
25
|
+
[110, 210, 110, 210, 110]],
|
26
|
+
:sample3 => [[187.160, 396.28, 244.24, 347.263, 618.511],
|
27
|
+
[120, 220, 120, 220, 120]],
|
28
|
+
:sample4 => [[187.157, 396.20, 244.30, 618.22, 933.01],
|
29
|
+
[30, 33, 38, 99, 22]],
|
30
|
+
}.map {|key,data| [key, MS::Spectrum.new(data)] } ]
|
31
|
+
@pretend_search_mzs = [187.157, 396.20, 244.30, 618.22, 933.01]
|
32
|
+
end
|
33
|
+
|
34
|
+
xit 'creates a query search spectrum' do
|
35
|
+
#spec = .create_query_search_spectrum(@ions)
|
36
|
+
#spec.mzs.any? {|mz| mz.nil? }.should be_false
|
37
|
+
#spec.mzs.size.should == 56
|
38
|
+
#spec.intensities.map(&:size).count(2).should == 4
|
39
|
+
#spec.intensities.map(&:size).count(1).should == 52
|
40
|
+
end
|
41
|
+
|
42
|
+
xit 'creates a probability function' do
|
43
|
+
#subject.create_search_function(@ions, :prob_min_bincnt => 20)
|
44
|
+
end
|
45
|
+
|
46
|
+
xit 'searches mz values' do
|
47
|
+
searcher = MS::Lipid::Search.new(@ions, :query_min_count_per_bin => 8, :num_rand_samples_per_bin => 1000, :ppm => false)
|
48
|
+
num_nearest_hits = 3
|
49
|
+
(hit_groups, qvals) = searcher.search(@pretend_search_mzs, 3)
|
50
|
+
p hit_groups.map(&:first).map(&:pvalue)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe 'searching a full lipid maps' do
|
55
|
+
|
56
|
+
before do
|
57
|
+
# this will be specific to your install since it's not part of install
|
58
|
+
path_to_lipidmaps_db = "#{ENV['HOME']}/tmp/tamil/lipidmaps_20120103_classes_1_2_3_4_5_6_7_8.exact_mass.tsv"
|
59
|
+
@lipids = MS::LipidMaps.parse_file(path_to_lipidmaps_db)
|
60
|
+
@ions = @lipids.map do |lipid|
|
61
|
+
[[@proton], [@proton, @proton], [@proton, @h2o_loss]].map do |mods|
|
62
|
+
MS::Lipid::Search::Query.new(lipid, mods)
|
63
|
+
end
|
64
|
+
end.flatten(1)
|
65
|
+
@pretend_search_mzs = [187.157, 396.20, 244.30, 618.22, 933.01]
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'returns hit groups parallel with input m/zs' do
|
69
|
+
searcher = MS::Lipid::Search.new(@ions, :query_min_count_per_bin => 1000, :ppm => false)
|
70
|
+
hit_groups = searcher.search(@pretend_search_mzs, 3)
|
71
|
+
best_hits = hit_groups.map(&:best_hit)
|
72
|
+
best_hits.map {|hit| hit.observed_mz }.should == @pretend_search_mzs
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'works with :ppm => true'
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/lipid_maps'
|
4
|
+
|
5
|
+
describe MS::LipidMaps do
|
6
|
+
|
7
|
+
before do
|
8
|
+
@tfile = TESTFILES + '/lipidmaps_short.tsv'
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'parses lipid maps files' do
|
12
|
+
lipids = MS::LipidMaps.parse_file(@tfile)
|
13
|
+
lipids.size.should == 30 # one is rejected for no formula
|
14
|
+
ll = lipids.last
|
15
|
+
ll.sub_class.should == 'Isoflavonoids [PK1205]'
|
16
|
+
ll.lm_id.should == "LMPK12050388"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'ms/lipid'
|
4
|
+
|
5
|
+
describe MS::Lipid do
|
6
|
+
|
7
|
+
before do
|
8
|
+
@data = ['LMFA00000007', 'n-decanohydroxamic acid', 'N-hydroxydecanamide', 'C10H21NO2', 187.16, 'Fatty Acyls [FA]', 'Other Fatty Acyls [FA00]']
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'can be initialized with an array' do
|
12
|
+
lipid = MS::Lipid.new(*@data)
|
13
|
+
lipid.mass.should == @data[4]
|
14
|
+
lipid.sub_class.should == nil
|
15
|
+
end
|
16
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
|
5
|
+
# Requires supporting files with custom matchers and macros, etc,
|
6
|
+
# in ./support/ and its subdirectories.
|
7
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
8
|
+
|
9
|
+
RSpec.configure do |config|
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
TESTFILES = File.expand_path(File.join(File.dirname(__FILE__), "testfiles"))
|
@@ -0,0 +1,32 @@
|
|
1
|
+
LM_ID COMMON_NAME SYSTEMATIC_NAME FORMULA MASS CATEGORY MAIN_CLASS SUB_CLASS
|
2
|
+
LMFA00000007 n-decanohydroxamic acid N-hydroxydecanamide C10H21NO2 187.16 Fatty Acyls [FA] Other Fatty Acyls [FA00]
|
3
|
+
LMFA00000008 (9S,10S)-10-hydroxy-9-(phosphonooxy)octadecanoic acid (9S,10S)-10-hydroxy-9-(phosphonooxy)octadecanoic acid C18H37O7P 396.23 Fatty Acyls [FA] Other Fatty Acyls [FA00]
|
4
|
+
LMFA00000009 N-(6-aminohexanoyl)-6-aminohexanoic acid 6-(6-aminohexanamido)hexanoic acid C12H24N2O3 244.18 Fatty Acyls [FA] Other Fatty Acyls [FA00]
|
5
|
+
LMFA00000014 Virodhamine 2-aminoethyl-5Z,8Z,11Z,14Z-eicosatetraenoate C22H37NO2 347.28 Fatty Acyls [FA] Other Fatty Acyls [FA00]
|
6
|
+
MGL02010062 DG(16:0/20:3(8Z,11Z,14Z)/0:0)[iso2] 1-hexadecanoyl-2-(8Z,11Z,14Z-eicosatrienoyl)-sn-glycerol C39H70O5 618.52 Glycerolipids [GL] Diradylglycerols [GL02] Diacylglycerols [GL0201]
|
7
|
+
LMGL02010063 DG(18:2(9Z,12Z)/18:2(9Z,12Z)/0:0) 1,2-di-(9Z,12Z-octadecadienoyl)-sn-glycerol C39H68O5 616.51 Glycerolipids [GL] Diradylglycerols [GL02] Diacylglycerols [GL0201]
|
8
|
+
LMGL02010064 DG(18:1(9Z)/18:3(9Z,12Z,15Z)/0:0)[iso2] 1-(9Z-octadecenoyl)-2-(9Z,12Z,15Z-octadecatrienoyl)-sn-glycerol C39H68O5 616.51 Glycerolipids [GL] Diradylglycerols [GL02] Diacylglycerols [GL0201]
|
9
|
+
LMGL02010065 DG(18:0/19:0/0:0)[iso2] 1-octadecanoyl-2-nonadecanoyl-sn-glycerol C40H78O5 638.58 Glycerolipids [GL] Diradylglycerols [GL02] Diacylglycerols [GL0201]
|
10
|
+
MGP01011596 PC(18:1(9Z)/13:0) 1-(9Z-octadecenoyl)-2-tridecanoyl-glycero-3-phosphocholine C39H76NO8P 717.53 Glycerophospholipids [GP] Glycerophosphocholines [GP01] Diacylglycerophosphocholines [GP0101]
|
11
|
+
LMGP01011597 PC(18:1(9Z)/14:1(9Z)) 1-(9Z-octadecenoyl)-2-(9Z-tetradecenoyl)-glycero-3-phosphocholine C40H76NO8P 729.53 Glycerophospholipids [GP] Glycerophosphocholines [GP01] Diacylglycerophosphocholines [GP0101]
|
12
|
+
LMGP01011598 PC(18:1(9Z)/15:0) 1-(9Z-octadecenoyl)-2-pentadecanoyl-glycero-3-phosphocholine C41H80NO8P 745.56 Glycerophospholipids [GP] Glycerophosphocholines [GP01] Diacylglycerophosphocholines [GP0101]
|
13
|
+
LMGP01011599 PC(18:1(9Z)/15:1(9Z)) 1-(9Z-octadecenoyl)-2-(9Z-pentadecenoyl)-glycero-3-phosphocholine C41H78NO8P 743.55 Glycerophospholipids [GP] Glycerophosphocholines [GP01] Diacylglycerophosphocholines [GP0101]
|
14
|
+
LMGP01011600 PC(18:1(9Z)/17:0) 1-(9Z-octadecenoyl)-2-heptadecanoyl-glycero-3-phosphocholine C43H84NO8P 773.59 Glycerophospholipids [GP] Glycerophosphocholines [GP01] Diacylglycerophosphocholines [GP0101]
|
15
|
+
MSP0502AN07 - Galalpha1-3Galalpha1-3Galalpha1-3Galalpha1-4Galbeta1-4Glcbeta-Cer(d18:1/24:1(15Z)) C78H141NO33 1619.94 Sphingolipids [SP] Neutral glycosphingolipids [SP05] GalNAcbeta1-3Galalpha1-4Galbeta1-4Glc- (Globo series) [SP0502]
|
16
|
+
LMSP0502AN08 - Galalpha1-3Galalpha1-3Galalpha1-3Galalpha1-4Galbeta1-4Glcbeta-Cer(d18:1/26:1(17Z)) C80H145NO33 1647.97 Sphingolipids [SP] Neutral glycosphingolipids [SP05] GalNAcbeta1-3Galalpha1-4Galbeta1-4Glc- (Globo series) [SP0502]
|
17
|
+
LMSP0502AO00 - Galbeta1-3Galalpha1-4Galbeta1-4Glcbeta-Cer - - Sphingolipids [SP] Neutral glycosphingolipids [SP05] GalNAcbeta1-3Galalpha1-4Galbeta1-4Glc- (Globo series) [SP0502]
|
18
|
+
LMSP0502AO01 - Galbeta1-3Galalpha1-4Galbeta1-4Glcbeta-Cer(d18:1/16:0) C58H107NO23 1185.72 Sphingolipids [SP] Neutral glycosphingolipids [SP05] GalNAcbeta1-3Galalpha1-4Galbeta1-4Glc- (Globo series) [SP0502]
|
19
|
+
MST02020021 Dehydroepiandrosterone 3beta-hydroxyandrost-5-en-17-one C19H28O2 288.21 Sterol Lipids [ST] Steroids [ST02] C19 steroids (androgens) and derivatives [ST0202]
|
20
|
+
LMST02020023 Epiandrosterone 3beta-hydroxy-androstan-17-one C19H30O2 290.22 Sterol Lipids [ST] Steroids [ST02] C19 steroids (androgens) and derivatives [ST0202]
|
21
|
+
LMST02020025 Fluoxymesterone 9alpha-fluoro-11beta,17beta-dihydroxy-17-methylandrost-4-en-3-one C20H29FO3 336.21 Sterol Lipids [ST] Steroids [ST02] C19 steroids (androgens) and derivatives [ST0202]
|
22
|
+
LMST02020026 Formyldienolone 11alpha,17beta-dihydroxy-17-methyl-3-oxoandrosta-1,4-diene-2-carboxaldehyde C21H28O4 344.20 Sterol Lipids [ST] Steroids [ST02] C19 steroids (androgens) and derivatives [ST0202]
|
23
|
+
LMST02020027 Mestanolone 17beta-hydroxy-17-methyl-androstan-3-one C20H32O2 304.24 Sterol Lipids [ST] Steroids [ST02] C19 steroids (androgens) and derivatives [ST0202]
|
24
|
+
MPR03010008 Decaprenol 3,7,11,15,19,23,27,31,35,39-decamethyltetraconta-2Z,6Z,10Z,14Z,18Z,22Z,26Z,30Z,34E,38-decaen-1-ol C50H82O 698.64 Prenol Lipids [PR] Polyprenols [PR03] Bactoprenols [PR0301]
|
25
|
+
LMPR03020001 undecaprenyl phosphate 3,7,11,15,19,23,27,31,35,39,43-undecamethyltetratetraconta-2Z,6Z,10Z,14Z,18Z,22Z,26Z,30Z,34E,38E,42-undecaen-1-yl phosphate C55H91O4P 846.67 Prenol Lipids [PR] Polyprenols [PR03] Bactoprenol monophosphates [PR0302]
|
26
|
+
LMPR03020002 Undecaprenyl phosphate mannose Alpha-mannosyl-undecaprenyl phosphate C61H101O9P 1008.72 Prenol Lipids [PR] Polyprenols [PR03] Bactoprenol monophosphates [PR0302]
|
27
|
+
LMPR03020004 Dodecaprenyl phosphate-galacturonic acid - C66H107O10P 1090.76 Prenol Lipids [PR] Polyprenols [PR03] Bactoprenol monophosphates [PR0302]
|
28
|
+
MPK12050384 Irisolidone 5,7-Dihydroxy-6,4'-dimethoxyisoflavone C17H14O6 314.08 Polyketides [PK] Flavonoids [PK12] Isoflavonoids [PK1205]
|
29
|
+
LMPK12050385 Tectorigenin 5,7,4'-Trihydroxy-6-methoxyisoflavone C16H12O6 300.06 Polyketides [PK] Flavonoids [PK12] Isoflavonoids [PK1205]
|
30
|
+
LMPK12050386 6-Hydroxygenistein - C15H10O6 286.05 Polyketides [PK] Flavonoids [PK12] Isoflavonoids [PK1205]
|
31
|
+
LMPK12050387 Irisolone 4'-Hydroxy-5-methoxy-6,7-methylenedioxyisoflavone C17H12O6 312.06 Polyketides [PK] Flavonoids [PK12] Isoflavonoids [PK1205]
|
32
|
+
LMPK12050388 7-O-Methyltectorigenin 5,4'-Dihydroxy-6,7-dimethoxyisoflavone C17H14O6 314.08 Polyketides [PK] Flavonoids [PK12] Isoflavonoids [PK1205]
|
metadata
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ms-lipidomics
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- John T. Prince
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-02-24 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &7075680 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.3.0
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *7075680
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: bundler
|
27
|
+
requirement: &7075180 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.0.0
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *7075180
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: jeweler
|
38
|
+
requirement: &7074680 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ~>
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.6.4
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *7074680
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: rcov
|
49
|
+
requirement: &7074200 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *7074200
|
58
|
+
description: does ms lipidomics
|
59
|
+
email: jtprince@gmail.com
|
60
|
+
executables:
|
61
|
+
- lipidomic-search.rb
|
62
|
+
extensions: []
|
63
|
+
extra_rdoc_files:
|
64
|
+
- LICENSE
|
65
|
+
- README.rdoc
|
66
|
+
files:
|
67
|
+
- .document
|
68
|
+
- .rspec
|
69
|
+
- LICENSE
|
70
|
+
- README.rdoc
|
71
|
+
- Rakefile
|
72
|
+
- bin/lipidomic-search.rb
|
73
|
+
- lib/ms/lipid.rb
|
74
|
+
- lib/ms/lipid/ion.rb
|
75
|
+
- lib/ms/lipid/modification.rb
|
76
|
+
- lib/ms/lipid/search.rb
|
77
|
+
- lib/ms/lipid/search/bin.rb
|
78
|
+
- lib/ms/lipid/search/db_isobar_group.rb
|
79
|
+
- lib/ms/lipid/search/hit.rb
|
80
|
+
- lib/ms/lipid/search/probability_distribution.rb
|
81
|
+
- lib/ms/lipid/search/query.rb
|
82
|
+
- lib/ms/lipid_maps.rb
|
83
|
+
- spec/ms/lipid/ion_spec.rb
|
84
|
+
- spec/ms/lipid/modification_spec.rb
|
85
|
+
- spec/ms/lipid/search_spec.rb
|
86
|
+
- spec/ms/lipid_maps_spec.rb
|
87
|
+
- spec/ms/lipid_spec.rb
|
88
|
+
- spec/spec_helper.rb
|
89
|
+
- spec/testfiles/lipidmaps_short.tsv
|
90
|
+
homepage: http://github.com/princelab/ms-lipidomics
|
91
|
+
licenses:
|
92
|
+
- MIT
|
93
|
+
post_install_message:
|
94
|
+
rdoc_options: []
|
95
|
+
require_paths:
|
96
|
+
- lib
|
97
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
98
|
+
none: false
|
99
|
+
requirements:
|
100
|
+
- - ! '>='
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
+
none: false
|
105
|
+
requirements:
|
106
|
+
- - ! '>='
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
109
|
+
requirements: []
|
110
|
+
rubyforge_project:
|
111
|
+
rubygems_version: 1.8.15
|
112
|
+
signing_key:
|
113
|
+
specification_version: 3
|
114
|
+
summary: mass spectrometry based lipidomics - especially shotgun lipidomics
|
115
|
+
test_files: []
|