mspire-lipid 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitignore +53 -0
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/README.md +11 -0
- data/Rakefile +24 -0
- data/bin/lipidomic-search.rb +203 -0
- data/lib/mspire/lipid.rb +19 -0
- data/lib/mspire/lipid/ion.rb +71 -0
- data/lib/mspire/lipid/ion/fragment.rb +68 -0
- data/lib/mspire/lipid/modification.rb +120 -0
- data/lib/mspire/lipid/search.rb +205 -0
- data/lib/mspire/lipid/search/bin.rb +79 -0
- data/lib/mspire/lipid/search/db_isobar_group.rb +20 -0
- data/lib/mspire/lipid/search/hit.rb +79 -0
- data/lib/mspire/lipid/search/probability_distribution.rb +50 -0
- data/lib/mspire/lipid/search/query.rb +23 -0
- data/lib/mspire/lipid/version.rb +6 -0
- data/lib/mspire/lipid_maps.rb +110 -0
- data/mspire-lipid.gemspec +38 -0
- data/scratch/OBConversion_methods.txt +47 -0
- data/scratch/atom_methods.txt +145 -0
- data/scratch/bond_methods.txt +867 -0
- data/scratch/mol_methods.txt +183 -0
- data/scratch/split_molecules.rb +93 -0
- data/script/find_nearest_lipid.rb +134 -0
- data/spec/mspire/lipid/ion_spec.rb +96 -0
- data/spec/mspire/lipid/modification_spec.rb +70 -0
- data/spec/mspire/lipid/search_spec.rb +82 -0
- data/spec/mspire/lipid_maps_spec.rb +64 -0
- data/spec/mspire/lipid_spec.rb +16 -0
- data/spec/spec_helper.rb +13 -0
- data/spec/testfiles/lipidmaps_download.tsv +11 -0
- data/spec/testfiles/lipidmaps_programmatic_short.tsv +32 -0
- data/spec/testfiles/lipidmaps_sd_download.tsv +11 -0
- metadata +202 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
require 'mspire/mass'
|
|
2
|
+
require 'mspire/molecular_formula'
|
|
3
|
+
|
|
4
|
+
module Mspire
|
|
5
|
+
class Lipid
|
|
6
|
+
|
|
7
|
+
# the convention is all mods are gains unless the name ends in an
|
|
8
|
+
# underscore
|
|
9
|
+
class Modification < Mspire::MolecularFormula
|
|
10
|
+
|
|
11
|
+
# calculates the mass diff. For every positive charge the mass of an
|
|
12
|
+
# electron is subtracted; for every negative charge the mass of an
|
|
13
|
+
# electron is added. If gain is false, then the mass diff will be
|
|
14
|
+
# negative. Formula may be a string.
|
|
15
|
+
def self.massdiff(formula, charge, gain=true)
|
|
16
|
+
massdiff = Mspire::MolecularFormula[formula].mass
|
|
17
|
+
massdiff -= (charge * Mspire::Mass::ELECTRON) # + charge subtracts, - charge adds
|
|
18
|
+
massdiff = -massdiff unless gain
|
|
19
|
+
massdiff
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# the charge on the mod should be represented by the number of plusses
|
|
23
|
+
# or minuses after the formula (Li+ for a +1 charge Lithium or H2++, 2
|
|
24
|
+
# protons with a total of 2 charges)
|
|
25
|
+
FORMULAS = {
|
|
26
|
+
:proton => 'H',
|
|
27
|
+
:ammonium => 'NH4',
|
|
28
|
+
:lithium => 'Li',
|
|
29
|
+
:sodium => 'Na',
|
|
30
|
+
:water => 'H2O',
|
|
31
|
+
:ammonia => 'NH3',
|
|
32
|
+
:carbon_dioxide => 'CO2',
|
|
33
|
+
:acetate => 'C2H3O2', # OAc- # need to work out negative charge
|
|
34
|
+
}
|
|
35
|
+
CHARGE = {
|
|
36
|
+
:proton => 1,
|
|
37
|
+
:ammonium => 1,
|
|
38
|
+
:lithium => 1,
|
|
39
|
+
:sodium=> 1,
|
|
40
|
+
:water => 0,
|
|
41
|
+
:ammonia => 0,
|
|
42
|
+
:carbon_dioxide => 0,
|
|
43
|
+
:acetate => -1,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# determined by running formulas through Mspire::Mass.massdiff
|
|
47
|
+
MASSDIFFS = {}
|
|
48
|
+
FORMULAS.each do |name, formula|
|
|
49
|
+
MASSDIFFS[name] = self.massdiff(formula, CHARGE[name])
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# as a symbol
|
|
53
|
+
attr_accessor :name
|
|
54
|
+
# a MolecularFormula object
|
|
55
|
+
attr_accessor :formula
|
|
56
|
+
# negative indicates a loss
|
|
57
|
+
attr_accessor :massdiff
|
|
58
|
+
# the charge
|
|
59
|
+
attr_accessor :charge
|
|
60
|
+
|
|
61
|
+
# if no mass or formula is given then it searches command mods for the name
|
|
62
|
+
# @param [Symbol] name the name of the mod
|
|
63
|
+
# A number of opts are expected if they are not found in the FORMULAS,
|
|
64
|
+
# CHARGE, or MASSDIFFS hashes. However, the massdiff will be inferred
|
|
65
|
+
# from the formula if it is not given:
|
|
66
|
+
#
|
|
67
|
+
# attributes:
|
|
68
|
+
# :formula = the chemical formula, lipidmaps style ("C2H4BrO") or
|
|
69
|
+
# any valid argument to MolecularFormula.from_any
|
|
70
|
+
# :massdiff = +/-Float
|
|
71
|
+
# :charge = +/- Integer
|
|
72
|
+
#
|
|
73
|
+
# instruction:
|
|
74
|
+
# :loss = true negates the mass diff sign and charge during initialization
|
|
75
|
+
# this option is typically only done for molecules
|
|
76
|
+
# already present in the FORMULA hash (e.g.)
|
|
77
|
+
#
|
|
78
|
+
# proton_loss = Mspire::Lipid::Modification.new(:proton, :loss => true)
|
|
79
|
+
# water_loss = Mspire::Lipid::Modification.new(:water, :loss => true)
|
|
80
|
+
#
|
|
81
|
+
def initialize(name, opts={})
|
|
82
|
+
@name = name
|
|
83
|
+
@formula =
|
|
84
|
+
if ( form_string = (opts[:formula] || FORMULAS[name]) )
|
|
85
|
+
Mspire::MolecularFormula.from_any( form_string )
|
|
86
|
+
end
|
|
87
|
+
@massdiff = opts[:massdiff] || MASSDIFFS[name]
|
|
88
|
+
@charge = opts[:charge] || CHARGE[name]
|
|
89
|
+
|
|
90
|
+
if opts[:loss]
|
|
91
|
+
@charge = -@charge
|
|
92
|
+
# necessary if you are using a named molecule and you want its loss
|
|
93
|
+
# rather than gain (i.e., you want a negative massdiff)
|
|
94
|
+
@massdiff = -@massdiff
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def charged_formula_string
|
|
99
|
+
@formula.to_s + @charge.abs.times.map { (@charge > 0) ? '+' : '-' }.join
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
alias_method :to_s, :charged_formula_string
|
|
103
|
+
|
|
104
|
+
def gain?
|
|
105
|
+
massdiff > 0
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def loss?
|
|
109
|
+
!gain?
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def inspect
|
|
113
|
+
"<Mod: #{to_s}>"
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
require 'mspire/spectrum'
|
|
2
|
+
require 'rserve/simpler' # TODO: move to integrated interface with rserve when available
|
|
3
|
+
require 'core_ext/array/in_groups'
|
|
4
|
+
require 'mspire/lipid/search/hit'
|
|
5
|
+
require 'mspire/lipid/search/bin'
|
|
6
|
+
require 'mspire/lipid/modification'
|
|
7
|
+
require 'mspire/lipid/search/probability_distribution'
|
|
8
|
+
|
|
9
|
+
module Mspire
|
|
10
|
+
class Lipid
|
|
11
|
+
class Search
|
|
12
|
+
STANDARD_MODIFICATIONS = {
|
|
13
|
+
:proton => [1,2],
|
|
14
|
+
:ammonium => [1],
|
|
15
|
+
:lithium => [1],
|
|
16
|
+
:water => [1,2],
|
|
17
|
+
}
|
|
18
|
+
STANDARD_SEARCH = {
|
|
19
|
+
:units => :ppm,
|
|
20
|
+
:query_min_count_per_bin => 500, # min number of peaks per bin
|
|
21
|
+
:num_rand_samples_per_bin => 1000,
|
|
22
|
+
:num_nearest => 2,
|
|
23
|
+
:return_order => :as_given, # or :sorted
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
attr_accessor :options
|
|
27
|
+
attr_accessor :search_function
|
|
28
|
+
|
|
29
|
+
# will generate PossibleLipid objects and return a new search object
|
|
30
|
+
# uses only one kind of loss at a time and one type of gain at a time
|
|
31
|
+
# will also do the combination of a gain and a loss if gain_and_loss is
|
|
32
|
+
# true
|
|
33
|
+
def self.generate_simple_queries(lipids, mods=STANDARD_MODIFICATIONS, gain_and_loss=false)
|
|
34
|
+
possible_lipids = []
|
|
35
|
+
real_mods_and_cnts = mods.map {|name, cnts| [Mspire::Lipid::Modification.new(name), cnts] }
|
|
36
|
+
# one of each
|
|
37
|
+
real_mods_and_cnts.each do |mod, counts|
|
|
38
|
+
counts.each do |cnt|
|
|
39
|
+
possible_lipids << Mspire::Lipid::Search::Query.new(lipid, Array.new(cnt, mod))
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
if gain_and_loss
|
|
43
|
+
# one of each gain + one of each loss
|
|
44
|
+
(gain_mod_cnt_pairs, loss_mod_cnt_pairs) = real_mods_and_cnts.partition {|mod, count| mod.gain }
|
|
45
|
+
gain_mod_cnt_pairs.each do |mod, cnt|
|
|
46
|
+
lipids.each do |lipid|
|
|
47
|
+
#### need to implement still (use combinations or something...)
|
|
48
|
+
get_this_working!
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
self.new(possible_lipids)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# ions are Mspire::Lipid::Ion objects
|
|
56
|
+
# each one should give a non-nil m/z value
|
|
57
|
+
def initialize(ions=[], opts={})
|
|
58
|
+
@options = STANDARD_SEARCH.merge(opts)
|
|
59
|
+
@db_isobar_spectrum = create_db_isobar_spectrum(ions)
|
|
60
|
+
@search_function = create_search_function(ions, @options)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# returns an array of HitGroup and a parallel array of BH derived
|
|
64
|
+
# q-values (will switch to Storey soon enough). The HitGroups are
|
|
65
|
+
# returned in the order in which the mz_values are given.
|
|
66
|
+
# assumes search_queries are in ascending m/z order
|
|
67
|
+
def search(search_queries, opts={})
|
|
68
|
+
opt = @options.merge( opts )
|
|
69
|
+
hit_groups = @search_function.call(search_queries, opt[:num_nearest])
|
|
70
|
+
sorted_hit_groups = qvalues!(hit_groups, opt)
|
|
71
|
+
case opts[:return_order]
|
|
72
|
+
when :given
|
|
73
|
+
hit_groups
|
|
74
|
+
when :sorted
|
|
75
|
+
sorted_hit_groups
|
|
76
|
+
else
|
|
77
|
+
raise ArgumentError, "invalid :return_order"
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def qvalues!(hit_groups, opts)
|
|
82
|
+
|
|
83
|
+
# from http://stats.stackexchange.com/questions/870/multiple-hypothesis-testing-correction-with-benjamini-hochberg-p-values-or-q-va
|
|
84
|
+
# but I've already coded this up before, too, in multiple ways...
|
|
85
|
+
prev_bh_value = 0
|
|
86
|
+
num_total_tests = hit_groups.size
|
|
87
|
+
|
|
88
|
+
#hit_groups.each {|hg| p [hg.first.pvalue, hg] }
|
|
89
|
+
|
|
90
|
+
# calculate Q-values BH style for now:
|
|
91
|
+
# first hit is the best hit in the group
|
|
92
|
+
pval_hg_index_tuples = hit_groups.each_with_index.map {|hg,i| [hg.pvalue, hg.delta.abs, hg.ppm.abs, i, hg] }
|
|
93
|
+
|
|
94
|
+
if pval_hg_index_tuples.any? {|pair| pair.first.nan? }
|
|
95
|
+
$stderr.puts "pvalue of NaN!"
|
|
96
|
+
$stderr.puts ">>> Consider increasing query_min_count_per_bin or setting ppm to false <<<"
|
|
97
|
+
raise
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
sorted_pval_index_tuples = pval_hg_index_tuples.sort
|
|
101
|
+
|
|
102
|
+
sorted_pval_index_tuples.each_with_index do |tuple,i|
|
|
103
|
+
pval = tuple.first
|
|
104
|
+
bh_value = pval * num_total_tests / (i + 1)
|
|
105
|
+
# Sometimes this correction can give values greater than 1,
|
|
106
|
+
# so we set those values at 1
|
|
107
|
+
bh_value = [bh_value, 1].min
|
|
108
|
+
|
|
109
|
+
# To preserve monotonicity in the values, we take the
|
|
110
|
+
# maximum of the previous value or this one, so that we
|
|
111
|
+
# don't yield a value less than the previous.
|
|
112
|
+
bh_value = [bh_value, prev_bh_value].max
|
|
113
|
+
prev_bh_value = bh_value
|
|
114
|
+
tuple.last.first.qvalue = bh_value # give the top hit the q-value
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
sorted_pval_index_tuples.map(&:last)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def create_search_function(ions, opt)
|
|
121
|
+
|
|
122
|
+
db_isobar_spectrum = create_db_isobar_spectrum(ions)
|
|
123
|
+
|
|
124
|
+
search_bins = create_search_bins(db_isobar_spectrum, opt[:query_min_count_per_bin])
|
|
125
|
+
|
|
126
|
+
create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, opt[:num_rand_samples_per_bin], opt[:ppm])
|
|
127
|
+
|
|
128
|
+
# create the actual search function
|
|
129
|
+
# returns an array of hit_groups
|
|
130
|
+
lambda do |search_queries, num_nearest_hits|
|
|
131
|
+
Bin.bin(search_bins, search_queries, &:mz)
|
|
132
|
+
search_bins_with_data = search_bins.reject {|bin| bin.data.empty? }
|
|
133
|
+
hit_groups = search_bins_with_data.map {|bin| bin.queries_to_hit_groups!(opt[:num_nearest]) }.flatten(1)
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
#####################################################
|
|
138
|
+
# Ancillary to create_search_function:
|
|
139
|
+
#####################################################
|
|
140
|
+
|
|
141
|
+
# returns a DB isobar spectrum where the m/z values are all the m/z
|
|
142
|
+
# values to search for and the intensities each an array corresponding
|
|
143
|
+
# to all the lipid ions matching that m/z value
|
|
144
|
+
def create_db_isobar_spectrum(ions)
|
|
145
|
+
mzs = [] ; query_groups = []
|
|
146
|
+
pairs = ions.group_by(&:mz).sort_by(&:first)
|
|
147
|
+
pairs.each {|mz, ar| mzs << mz ; query_groups << ar }
|
|
148
|
+
Mspire::Spectrum.new([mzs, query_groups])
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# use_ppm uses ppm or amu if false
|
|
152
|
+
# returns the search_bins
|
|
153
|
+
def create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, num_rand_samples_per_bin, use_ppm=true)
|
|
154
|
+
search_bins.each do |search_bin|
|
|
155
|
+
rng = Random.new
|
|
156
|
+
random_mzs = num_rand_samples_per_bin.times.map { rng.rand(search_bin.to_range) }
|
|
157
|
+
# find the deltas
|
|
158
|
+
diffs = random_mzs.map do |random_mz|
|
|
159
|
+
nearest_random_mz = db_isobar_spectrum.find_nearest(random_mz)
|
|
160
|
+
delta = (random_mz - nearest_random_mz).abs
|
|
161
|
+
use_ppm ? delta./(nearest_random_mz).*(1e6) : delta
|
|
162
|
+
end
|
|
163
|
+
search_bin.probability_distribution = ProbabilityDistribution.deviations_to_probability_distribution((use_ppm ? :ppm : :amu), diffs)
|
|
164
|
+
end
|
|
165
|
+
search_bins
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def create_search_bins(db_isobar_spectrum, min_n_per_bin)
|
|
169
|
+
# make sure we get the right bin size based on the input
|
|
170
|
+
ss = db_isobar_spectrum.mzs.size ; optimal_num_groups = 1
|
|
171
|
+
(1..ss).each do |divisions|
|
|
172
|
+
if (ss.to_f / divisions) >= min_n_per_bin
|
|
173
|
+
optimal_num_groups = divisions
|
|
174
|
+
else ; break
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
mz_ranges = []
|
|
179
|
+
prev = nil
|
|
180
|
+
|
|
181
|
+
groups = db_isobar_spectrum.points.in_groups(optimal_num_groups,false).to_a
|
|
182
|
+
|
|
183
|
+
case groups.size
|
|
184
|
+
when 0
|
|
185
|
+
raise 'I think you need some data in your query spectrum!'
|
|
186
|
+
when 1
|
|
187
|
+
group = groups.first
|
|
188
|
+
[ Mspire::Lipid::Search::Bin.new( Range.new(group.first.first, group.last.first), db_isobar_spectrum ) ]
|
|
189
|
+
else
|
|
190
|
+
search_bins = groups.each_cons(2).map do |points1, points2|
|
|
191
|
+
bin = Mspire::Lipid::Search::Bin.new( Range.new(points1.first.first, points2.first.first, true), db_isobar_spectrum )
|
|
192
|
+
prev = points2
|
|
193
|
+
bin
|
|
194
|
+
end
|
|
195
|
+
_range = Range.new(prev.first.first, prev.last.first)
|
|
196
|
+
search_bins << Mspire::Lipid::Search::Bin.new(_range, db_isobar_spectrum) # inclusive
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
require 'mspire/bin'
|
|
2
|
+
|
|
3
|
+
module Mspire
|
|
4
|
+
class Lipid
|
|
5
|
+
class Search
|
|
6
|
+
|
|
7
|
+
# A Search::Bin is a range that contains the *entire* query spectrum
|
|
8
|
+
# (not just the portion covered by the range). the query spectrum, and
|
|
9
|
+
# a ProbabilityDistribution -- the probability that a peak's delta to
|
|
10
|
+
# nearest peak is that small by chance.
|
|
11
|
+
class Bin < Mspire::Bin
|
|
12
|
+
# the intensity value of the query spectrum should be a query
|
|
13
|
+
attr_accessor :db_spectrum
|
|
14
|
+
attr_accessor :probability_distribution
|
|
15
|
+
|
|
16
|
+
def initialize(range_obj, db_spectrum)
|
|
17
|
+
super(range_obj.begin, range_obj.end, range_obj.exclude_end?)
|
|
18
|
+
@db_spectrum = db_spectrum
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def <<(query)
|
|
22
|
+
@data << query
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# returns the nearest num_hits Mspire::Lipid::Search::Hits sorted by delta
|
|
26
|
+
# [with tie going to the lower m/z]
|
|
27
|
+
# searches all queries and removes them from the data queue
|
|
28
|
+
def queries_to_hit_groups!(num_hits=1)
|
|
29
|
+
queries = @data.dup
|
|
30
|
+
@data.clear
|
|
31
|
+
|
|
32
|
+
@db_isobar_groups_by_index = @db_spectrum.intensities
|
|
33
|
+
|
|
34
|
+
hit_groups = queries.map do |query|
|
|
35
|
+
best_hits(query, num_hits)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
all_top_hits = hit_groups.map(&:first)
|
|
39
|
+
|
|
40
|
+
# updates the pvalues for all the hits
|
|
41
|
+
pvalues = probability_distribution.pvalues( all_top_hits )
|
|
42
|
+
all_top_hits.zip(pvalues) {|hit, pvalue| hit.pvalue = pvalue }
|
|
43
|
+
|
|
44
|
+
hit_groups
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# returns a HitGroup object
|
|
48
|
+
def best_hits(query, num_hits)
|
|
49
|
+
query_mz = query.mz
|
|
50
|
+
#puts "MZ: #{query_mz}"
|
|
51
|
+
db_mzs = @db_spectrum.mzs
|
|
52
|
+
index = @db_spectrum.find_nearest_index(query_mz)
|
|
53
|
+
_min = index - (num_hits-1)
|
|
54
|
+
(_min >= 0) || (_min = 0)
|
|
55
|
+
_max = index + (num_hits-1)
|
|
56
|
+
(_max < db_mzs.size) || (_max = @db_spectrum - 1)
|
|
57
|
+
delta_index_pairs = (_min.._max).map {|i| [query_mz.-(db_mzs[i]).abs, i] }
|
|
58
|
+
closest_delta_index_pairs = delta_index_pairs.sort
|
|
59
|
+
top_num_hits_delta_index_pairs = closest_delta_index_pairs[0, num_hits]
|
|
60
|
+
top_num_hit_indices = top_num_hits_delta_index_pairs.map(&:last)
|
|
61
|
+
hit_group = top_num_hit_indices.map do |index|
|
|
62
|
+
Hit.new( :db_isobar_group => @db_isobar_groups_by_index[index], :observed_mz => query_mz)
|
|
63
|
+
end
|
|
64
|
+
HitGroup.new(hit_group)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def inspect
|
|
68
|
+
"<(#{super}) @db_spectrum(points size)=#{db_spectrum.mzs.size} @probability_distribution=#{probability_distribution}>"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def to_range
|
|
72
|
+
Range.new( self.begin, self.end, self.exclude_end? )
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
|
|
2
|
+
module Mspire
|
|
3
|
+
class Lipid
|
|
4
|
+
class Search
|
|
5
|
+
# this is a group of Lipid::Ion objects that all have the same (or
|
|
6
|
+
# possibly similar) m/z
|
|
7
|
+
class DBIsobarGroup < Array
|
|
8
|
+
# it is implemented like this so that the isobar group *could* have
|
|
9
|
+
# individuals in it with slightly different m/z values and this coudl
|
|
10
|
+
# still be used as a container. In my current implementation they
|
|
11
|
+
# have exactly the same m/z
|
|
12
|
+
attr_accessor :mz
|
|
13
|
+
def initialize( ar=[], mz=nil)
|
|
14
|
+
@mz = mz if mz
|
|
15
|
+
self.replace(ar)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
|
|
2
|
+
module Mspire
|
|
3
|
+
class Lipid
|
|
4
|
+
class Search
|
|
5
|
+
class Hit
|
|
6
|
+
# the db_isobar_group this hit is associated with. Each hit is only
|
|
7
|
+
# associated with a single db_isobar_group!
|
|
8
|
+
attr_accessor :db_isobar_group
|
|
9
|
+
# the experimental m/z value
|
|
10
|
+
attr_accessor :observed_mz
|
|
11
|
+
# the probability the hit is due to random chance
|
|
12
|
+
attr_accessor :pvalue
|
|
13
|
+
# the FDR if the threshold accepts this pvalue. Note that this value
|
|
14
|
+
# is relative to the number of tests performed and not completely
|
|
15
|
+
# intrinsic to the hit itself.
|
|
16
|
+
attr_accessor :qvalue
|
|
17
|
+
|
|
18
|
+
# qvalue derived from decoy testing
|
|
19
|
+
attr_accessor :decoy_qvalue
|
|
20
|
+
|
|
21
|
+
# the probability distribution that can be used to determine its
|
|
22
|
+
# pvalue
|
|
23
|
+
attr_accessor :probability_distribution
|
|
24
|
+
|
|
25
|
+
def initialize(hash={})
|
|
26
|
+
hash.each {|k,v| instance_variable_set("@#{k}", v) }
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# observed_mz - query m/z
|
|
30
|
+
def delta
|
|
31
|
+
@observed_mz - @db_isobar_group.first.mz.to_f
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
alias_method :amu, :delta
|
|
35
|
+
|
|
36
|
+
# the absolute value of distance from true val
|
|
37
|
+
def delta_abs
|
|
38
|
+
delta.abs
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# parts per million (divided by theoretical m/z)
|
|
42
|
+
def ppm
|
|
43
|
+
(delta / @db_isobar_group.first.mz) * 1e6
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def theoretical_mz
|
|
47
|
+
@db_isobar_group.first.mz
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def inspect
|
|
51
|
+
"<<#{super} -- <ppm=#{ppm} delta=#{delta} theoretical_mz=#{theoretical_mz}>>"
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# A query that matched multiple items. Each search returns a hit group
|
|
56
|
+
# which consists of the best hits for that experimental m/z. When
|
|
57
|
+
# queried for values like delta or ppm, it will delegate to the first hit.
|
|
58
|
+
# So, in many ways it can be used as a container for hits, but it puts
|
|
59
|
+
# its best face forward.
|
|
60
|
+
class HitGroup < Array
|
|
61
|
+
|
|
62
|
+
# should implement with delegator obviously...
|
|
63
|
+
# should allow setting ???
|
|
64
|
+
|
|
65
|
+
def delta() first.delta end
|
|
66
|
+
def ppm() first.ppm end
|
|
67
|
+
def theoretical_mz() first.theoretical_mz end
|
|
68
|
+
def query_group() first.query_group end
|
|
69
|
+
def observed_mz() first.observed_mz end
|
|
70
|
+
def pvalue() ; first.pvalue end
|
|
71
|
+
def qvalue() ; first.qvalue end
|
|
72
|
+
def decoy_qvalue() ; first.decoy_qvalue end
|
|
73
|
+
|
|
74
|
+
def best_hit() first end
|
|
75
|
+
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|