mspire-lipid 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 93cd81eb62ea08585b83abf723cc245b5310b5a6
4
+ data.tar.gz: 733b21483959d3df18dd60477ef132b6f474169a
5
+ SHA512:
6
+ metadata.gz: 4947e43e16462b8cf9371ea2d37472d6409bab88e9c26416d1483ab4c152b8ad1da3f5f2f1618be4fe26d9076b783196388ad47448e086caaac439bfb85ab025
7
+ data.tar.gz: 16c2104cbe2813a6c3e7cc18e773396853853cbde218e7a8d18c830e1c7af96d47e6e6f1bd0b2c7bb6621bd8d8d4dc0b0ca28525e7fb11deecef81fa23c8ca35
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
@@ -0,0 +1,53 @@
1
+ # rcov generated
2
+ coverage
3
+
4
+ # rdoc generated
5
+ rdoc
6
+
7
+ # yard generated
8
+ doc
9
+ .yardoc
10
+
11
+ # bundler
12
+ .bundle
13
+
14
+ # jeweler generated
15
+ pkg
16
+
17
+ # Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
18
+ #
19
+ # * Create a file at ~/.gitignore
20
+ # * Include files you want ignored
21
+ # * Run: git config --global core.excludesfile ~/.gitignore
22
+ #
23
+ # After doing this, these files will be ignored in all your git projects,
24
+ # saving you from having to 'pollute' every project you touch with them
25
+ #
26
+ # Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
27
+ #
28
+ # For MacOS:
29
+ #
30
+ #.DS_Store
31
+
32
+ # For TextMate
33
+ #*.tmproj
34
+ #tmtags
35
+
36
+ # For emacs:
37
+ #*~
38
+ #\#*
39
+ #.\#*
40
+
41
+ # For vim:
42
+ .*.swp
43
+
44
+ # For redcar:
45
+ #.redcar
46
+
47
+ # For rubinius:
48
+ #*.rbc
49
+
50
+ .RData
51
+ .Rhistory
52
+
53
+ *.dataset
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in mspire-lipid.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2012 Brigham Young University
2
+ authored by: John T. Prince
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,11 @@
1
+ # Mspire::Lipid
2
+
3
+ Identify and quantify (shotgun) lipidomics samples.
4
+
5
+ ## Installation
6
+
7
+ gem 'mspire-lipid'
8
+
9
+ ## Usage
10
+
11
+ TBD
@@ -0,0 +1,24 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ @module_name = Mspire::Lipid
4
+ @gem_name = 'mspire-lipid'
5
+ @gem_path_name = @gem_name.gsub('-','/')
6
+
7
+ require "#{@gem_path_name}/version"
8
+
9
+ require 'rspec/core'
10
+ require 'rspec/core/rake_task'
11
+ RSpec::Core::RakeTask.new(:spec) do |spec|
12
+ spec.pattern = FileList['spec/**/*_spec.rb']
13
+ end
14
+
15
+ task :default => :spec
16
+
17
+ require 'rdoc/task'
18
+ Rake::RDocTask.new do |rdoc|
19
+ version = @module_name.const_get('VERSION')
20
+ rdoc.rdoc_dir = 'rdoc'
21
+ rdoc.title = "#{@gem_name} #{version}"
22
+ rdoc.rdoc_files.include('README*')
23
+ rdoc.rdoc_files.include('lib/**/*.rb')
24
+ end
@@ -0,0 +1,203 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ puts "under development"
4
+
5
+ =begin
6
+ require 'trollop'
7
+ require 'ms/mzml'
8
+ require 'ms/lipid/search'
9
+ require 'ms/lipid/ion'
10
+ require 'ms/lipid/search/query'
11
+ require 'ms/lipid_maps'
12
+ require 'ms/error_rate/qvalue'
13
+
14
+ # for html output: (just make the id clickable)
15
+ LIPIDMAPS_SEARCH = "http://www.lipidmaps.org/data/LMSDRecord.php?LMID="
16
+
17
+ DECOY_MODULATOR = 0.8319
18
+
19
+ DEFAULTS = {
20
+ :bin_width => 5,
21
+ :bin_unit => :ppm,
22
+ :search_unit => :ppm,
23
+ }
24
+
25
+ def LipidPoint < Array
26
+ attr_accessor :sample
27
+ end
28
+
29
+ class Sample
30
+ attr_accessor :file
31
+ attr_accessor :spectrum
32
+ def initialize(file, merge_opts={})
33
+ @file = file
34
+ @spectrum = merge_ms1_spectra(file, DEFAULTS.merge(merge_opts))
35
+ end
36
+
37
+ # returns a single spectrum object
38
+ def self.merge_ms1_spectra(files, opts)
39
+ files.map do |file|
40
+ MS::Mzml.foreach(file).select {|spec| spec.ms_level == 1 }.map(&:sort!)
41
+ end
42
+ MS::Spectrum.merge(spectra, opts)
43
+ end
44
+ end
45
+
46
+ ext = ".lipidID.tsv"
47
+
48
+ parser = Trollop::Parser.new do
49
+ banner "usage: #{File.basename(__FILE__)} [OPTIONS] <lipidmaps>.tsv <file>.mzML ..."
50
+ text "output: <file>#{ext} ..."
51
+ text ""
52
+ text "note that sometimes you get an error from R like this:"
53
+ text "(`eval': voidEval failed: Packet[cmd=2130771970,len=<nil>, con='<nil>', status=error...)"
54
+ text "just re-run it and it will work"
55
+ text ""
56
+ opt :bin_width, "width of the bins for merging", :default => DEFAULTS[:bin_width]
57
+ opt :bin_unit, "units for binning (ppm or amu)", :default => DEFAULTS[:bin_unit].to_s
58
+ opt :search_unit, "unit for searching nearest hit (ppm or amu)", :default => DEFAULTS[:search_unit].to_s
59
+ opt :top_n_peaks, "the number of highest intensity peaks to query the DB with", :default => 1000
60
+ opt :display_n, "the number of best hits to display", :default => 20
61
+ text ""
62
+ text "modifications (at least 1 charged mod is required):"
63
+ opt :lithium, "search for lithium adducts"
64
+ opt :ammonium, "search for ammonium adducts"
65
+ opt :proton_gain, "search for proton gain"
66
+ opt :proton_loss, "search for proton loss"
67
+ opt :water_loss, "*all* mods are also considered with water loss"
68
+ opt :decoy, "search with an equal number of decoy modifications"
69
+ opt :verbose, "talk about it"
70
+ end
71
+
72
+ opts = parser.parse(ARGV)
73
+ opts[:bin_unit] = opts[:bin_unit].to_sym
74
+ opts[:search_unit] = opts[:search_unit].to_sym
75
+
76
+ if ARGV.size < 2
77
+ parser.educate
78
+ exit
79
+ end
80
+
81
+ CHARGED_MODS = [:lithium, :ammonium, :proton_gain, :proton_loss]
82
+
83
+ unless CHARGED_MODS.any? {|key| opts[key] }
84
+ puts "*" * 78
85
+ puts "ArgumentError: need at least one charged mod!"
86
+ puts "*" * 78
87
+ parser.educate
88
+ exit
89
+ end
90
+
91
+ (lipidmaps, *files) = ARGV
92
+
93
+ $VERBOSE = opts[:verbose]
94
+
95
+ MSLM = MS::Lipid::Modification
96
+
97
+ mods = {
98
+ proton_gain: MSLM.new(:proton),
99
+ water_loss: MSLM.new(:water, :loss => true),
100
+ lithium: MSLM.new(:lithium),
101
+ ammonium: MSLM.new(:ammonium),
102
+ proton_loss: MS::Lipid::Modification.new(:proton, :loss => true, :charge => -1)
103
+ }
104
+
105
+ lipids = MS::LipidMaps.parse_file(lipidmaps)
106
+
107
+
108
+ ions = []
109
+ lipids.each do |lipid|
110
+ CHARGED_MODS.each do |key|
111
+ if opts[key]
112
+ ions << MS::Lipid::Ion.new(lipid, [mods[key]])
113
+ if opts[:water_loss]
114
+ ions << MS::Lipid::Ion.new(lipid, [mods[key], mods[:water_loss]])
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+
121
+ searcher = MS::Lipid::Search.new(ions, :ppm => (opts[:search_unit] == :ppm))
122
+
123
+ if opts[:decoy]
124
+ # assumes a mod group that is either the mod or a mod and water loss
125
+ decoy_ions = ions.map do |ion|
126
+ # modify the first mod and leave the second untouched (if any)
127
+ mod_group = ion.modifications
128
+ fake_mod = mod_group.first.dup
129
+ fake_mod.massdiff *= DECOY_MODULATOR
130
+ fake_mod.formula = "FAKE#{mod_group.first.formula}(#{fake_mod.massdiff})"
131
+ fake_mod.name = "fake_#{mod_group.first.name}".to_sym
132
+ new_mod_group = [fake_mod, *mod_group[1..-1]]
133
+ MS::Lipid::Ion.new(ion.lipid, new_mod_group)
134
+ end
135
+ decoy_searcher = MS::Lipid::Search.new(decoy_ions, :ppm => (opts[:search_unit] == :ppm))
136
+ end
137
+
138
+ files.each do |file|
139
+ base = file.chomp(File.extname(file))
140
+ puts "processing file: #{file}" if $VERBOSE
141
+ sample = Sample.new(file, opts)
142
+
143
+ num_points = sample.spectrum.mzs.size
144
+ puts "#{num_points} merged peaks in #{file}" if $VERBOSE
145
+
146
+ highest_points = sample.spectrum.points.sort_by(&:last).reverse[0,opts[:top_n_peaks]].sort
147
+
148
+ sample.spectrum = MS::Spectrum.from_points( highest_points )
149
+
150
+ queries = sample.spectrum.mzs.each_with_index.map {|mz,index| MS::Lipid::Search::Query.new(mz, index) }
151
+ hit_groups = searcher.search(queries, :return_order => :sorted)
152
+ if opts[:decoy]
153
+ decoy_hit_groups = decoy_searcher.search(queries, :return_order => :sorted)
154
+ hit_group_qvalue_pairs = MS::ErrorRate::Qvalue.target_decoy_qvalues(hit_groups, decoy_hit_groups, :monotonic => true, &:pvalue)
155
+ hit_group_qvalue_pairs.each do |hit_group, qval|
156
+ hit_group.first.decoy_qvalue = qval
157
+ end
158
+ end
159
+
160
+ # all info is relative to the hit_group
161
+ info = {
162
+ decoy_qvalue: :decoy_qvalue.to_proc,
163
+ qvalue: :qvalue.to_proc,
164
+ pvalue: :pvalue.to_proc,
165
+ observed_mz: :observed_mz.to_proc,
166
+ theoretical_mz: :theoretical_mz.to_proc,
167
+ delta: :delta.to_proc,
168
+ ppm: :ppm.to_proc,
169
+ hit2_ppm: proc {|hg| hg[1].ppm },
170
+ first_isobar_name: proc {|hg| (lipid=hg.first.db_isobar_group.first.lipid).common_name || lipid.systematic_name },
171
+ num_isobars: proc {|hg| hg.first.db_isobar_group.size },
172
+ ions: proc {|hg|
173
+ hg.first.db_isobar_group.map do |ion|
174
+ [ion.lipid.lm_id, ion.modifications.map do |mod|
175
+ (mod.gain? ? '+' : '-') + "(#{mod.charged_formula})"
176
+ end.join
177
+ ].join(":")
178
+ end.join(' ')
179
+ }
180
+ }
181
+
182
+ output = base + ext
183
+ puts "writing to #{output}" if $VERBOSE
184
+ File.open(output, 'w') do |out|
185
+ out.puts info.keys.join("\t")
186
+ hit_groups[0,opts[:display_n]].each do |hit_group|
187
+ out.puts info.values.map {|prc| prc.call(hit_group) }.join("\t")
188
+ end
189
+ end
190
+
191
+ if opts[:decoy]
192
+ decoy_output = base + '.decoy' + ext
193
+ File.open(decoy_output, 'w') do |dout|
194
+ decoy_info = info.dup
195
+ [:qvalue, :decoy_qvalue].each {|key| decoy_info.delete(key) }
196
+ dout.puts decoy_info.keys.join("\t")
197
+ decoy_hit_groups[0,opts[:display_n]].each do |hit_group|
198
+ dout.puts decoy_info.values.map {|prc| prc.call(hit_group) }.join("\t")
199
+ end
200
+ end
201
+ end
202
+ end
203
+ =end
@@ -0,0 +1,19 @@
1
+
2
+ module Mspire
3
+ class Lipid
4
+ def self.members
5
+ [:lm_id,:common_name,:systematic_name,:formula,:mass,:category,:main_class,:sub_class,:pubchem_id,:inchi_key,:kegg_id,:chebi_id,:structure]
6
+ end
7
+
8
+ members.each {|mem| attr_accessor mem }
9
+
10
+ def initialize(*args)
11
+ (@lm_id,@common_name,@systematic_name,@formula,@mass,@category,@main_class,@sub_class,@pubchem_sid, @inchi_key, @kegg_id, @chebi_id, @structure) = args
12
+ end
13
+
14
+ def inspect
15
+ cut_common_name = (common_name.size <= 20) ? common_name : (common_name[0,20]+"...")
16
+ "<#{lm_id}: #{formula}: #{mass} #{cut_common_name}>"
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,71 @@
1
+ require 'mspire/lipid/ion/fragment'
2
+ require 'mspire/molecular_formula'
3
+
4
+ module Mspire
5
+ class Lipid
6
+ # a lipid with modifications (typically the mods give it a charge so that
7
+ # it can be seen in the mass spec)
8
+ class Ion
9
+ # an Mspire::Lipid object
10
+ attr_accessor :lipid
11
+ # an Mspire::Lipid::Modifications object
12
+ attr_accessor :modifications
13
+ # the key attribute of a query
14
+
15
+ def initialize(lipid, mods=[])
16
+ @lipid = lipid
17
+ @modifications = mods
18
+ @mz = nil
19
+ end
20
+
21
+ def charge
22
+ z = 0
23
+ @modifications.each do |mod|
24
+ z += mod.charge
25
+ end
26
+ z
27
+ end
28
+
29
+ # a MolecularFormula object
30
+ def formula
31
+ _formula = @lipid.formula
32
+ _formula = Mspire::MolecularFormula.from_any(_formula) unless _formula.is_a?(Mspire::MolecularFormula)
33
+ modifications.each do |mod|
34
+ if mod.gain?
35
+ _formula += mod.formula
36
+ else
37
+ _formula -= mod.formula
38
+ end
39
+ end
40
+ _formula
41
+ end
42
+
43
+ # value is cached
44
+ def mz_signed
45
+ return @mz if @mz
46
+ mass = @lipid.mass
47
+ charge = 0
48
+ @modifications.each do |mod|
49
+ mass += mod.massdiff
50
+ charge += mod.charge
51
+ end
52
+ if charge == 0
53
+ @mz = nil
54
+ else
55
+ @mz = mass / charge
56
+ end
57
+ end
58
+
59
+ # the unsigned m/z value
60
+ def mz
61
+ _mz_signed = mz_signed
62
+ _mz_signed >= 0 ? _mz_signed : -_mz_signed
63
+ end
64
+
65
+ def inspect
66
+ "<|| Ion mz=#{mz} #{lipid.inspect} + #{modifications.map(&:inspect).join(', ')} ||>"
67
+ end
68
+
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,68 @@
1
+
2
+ module Mspire
3
+ class Lipid
4
+
5
+ # goes from 1 to 99
6
+ CHAIN_PREFIXES = {
7
+ 'meth' => 1,
8
+ 'eth' => 2,
9
+ 'prop' => 3,
10
+ 'but' => 4,
11
+ 'pent' => 5,
12
+ 'hex' => 6,
13
+ 'hept' => 7,
14
+ 'oct' => 8,
15
+ 'non' => 9,
16
+ 'dec' => 10,
17
+ 'undec' => 11,
18
+ 'dodec' => 12,
19
+ 'tridec' => 13,
20
+ 'tetradec' => 14,
21
+ 'pentadec' => 15,
22
+ 'hexadec' => 16,
23
+ 'heptadec' => 17,
24
+ 'octadec' => 18,
25
+ 'nonadec' => 19,
26
+ 'eicos' => 20,
27
+ 'heneicos' => 21,
28
+ 'docos' => 22,
29
+ 'tricos' => 23,
30
+ 'tetracos' => 24,
31
+ 'pentacos' => 25,
32
+ 'hexacos' => 26,
33
+ 'heptacos' => 27,
34
+ 'octacos' => 28,
35
+ 'nonacos' => 29
36
+ }
37
+
38
+ consistent = {
39
+ 0 => '',
40
+ 1 => 'hen',
41
+ 2 => 'do',
42
+ 3 => 'tri',
43
+ 4 => 'tetra',
44
+ 5 => 'penta',
45
+ 6 => 'hexa',
46
+ 7 => 'hepta',
47
+ 8 => 'octa',
48
+ 9 => 'nona',
49
+ }
50
+
51
+ (3..9).each do |tens_place|
52
+ (0..9).each do |ones_place|
53
+ key = consistent[ones_place] + consistent[tens_place] + "cont"
54
+ CHAIN_PREFIXES[key] = 10*tens_place + ones_place
55
+ end
56
+ end
57
+
58
+ class Ion
59
+ module Fragment
60
+ # predicts the MS/MS fragments for this ion
61
+ def predict_fragment_mzs
62
+ end
63
+ end
64
+
65
+ include Fragment
66
+ end
67
+ end
68
+ end