RubyGems - mspire-lipid - Versions diffs - 0.2.0 - Mend

mspire-lipid 0.2.0

Files changed (38) hide show

checksums.yaml +7 -0
data/.document +5 -0
data/.gitignore +53 -0
data/.rspec +1 -0
data/Gemfile +4 -0
data/LICENSE +21 -0
data/README.md +11 -0
data/Rakefile +24 -0
data/bin/lipidomic-search.rb +203 -0
data/lib/mspire/lipid.rb +19 -0
data/lib/mspire/lipid/ion.rb +71 -0
data/lib/mspire/lipid/ion/fragment.rb +68 -0
data/lib/mspire/lipid/modification.rb +120 -0
data/lib/mspire/lipid/search.rb +205 -0
data/lib/mspire/lipid/search/bin.rb +79 -0
data/lib/mspire/lipid/search/db_isobar_group.rb +20 -0
data/lib/mspire/lipid/search/hit.rb +79 -0
data/lib/mspire/lipid/search/probability_distribution.rb +50 -0
data/lib/mspire/lipid/search/query.rb +23 -0
data/lib/mspire/lipid/version.rb +6 -0
data/lib/mspire/lipid_maps.rb +110 -0
data/mspire-lipid.gemspec +38 -0
data/scratch/OBConversion_methods.txt +47 -0
data/scratch/atom_methods.txt +145 -0
data/scratch/bond_methods.txt +867 -0
data/scratch/mol_methods.txt +183 -0
data/scratch/split_molecules.rb +93 -0
data/script/find_nearest_lipid.rb +134 -0
data/spec/mspire/lipid/ion_spec.rb +96 -0
data/spec/mspire/lipid/modification_spec.rb +70 -0
data/spec/mspire/lipid/search_spec.rb +82 -0
data/spec/mspire/lipid_maps_spec.rb +64 -0
data/spec/mspire/lipid_spec.rb +16 -0
data/spec/spec_helper.rb +13 -0
data/spec/testfiles/lipidmaps_download.tsv +11 -0
data/spec/testfiles/lipidmaps_programmatic_short.tsv +32 -0
data/spec/testfiles/lipidmaps_sd_download.tsv +11 -0
metadata +202 -0

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 93cd81eb62ea08585b83abf723cc245b5310b5a6
+  data.tar.gz: 733b21483959d3df18dd60477ef132b6f474169a
+SHA512:
+  metadata.gz: 4947e43e16462b8cf9371ea2d37472d6409bab88e9c26416d1483ab4c152b8ad1da3f5f2f1618be4fe26d9076b783196388ad47448e086caaac439bfb85ab025
+  data.tar.gz: 16c2104cbe2813a6c3e7cc18e773396853853cbde218e7a8d18c830e1c7af96d47e6e6f1bd0b2c7bb6621bd8d8d4dc0b0ca28525e7fb11deecef81fa23c8ca35

data/.document ADDED

@@ -0,0 +1,5 @@
+lib/**/*.rb
+bin/*
+-
+features/**/*.feature
+LICENSE.txt

data/.gitignore ADDED

@@ -0,0 +1,53 @@
+# rcov generated
+coverage
+# rdoc generated
+rdoc
+# yard generated
+doc
+.yardoc
+# bundler
+.bundle
+# jeweler generated
+pkg
+# Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
+#
+# * Create a file at ~/.gitignore
+# * Include files you want ignored
+# * Run: git config --global core.excludesfile ~/.gitignore
+#
+# After doing this, these files will be ignored in all your git projects,
+# saving you from having to 'pollute' every project you touch with them
+#
+# Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
+#
+# For MacOS:
+#
+#.DS_Store
+# For TextMate
+#*.tmproj
+#tmtags
+# For emacs:
+#*~
+#\#*
+#.\#*
+# For vim:
+.*.swp
+# For redcar:
+#.redcar
+# For rubinius:
+#*.rbc
+.RData
+.Rhistory
+*.dataset

data/.rspec ADDED

	@@ -0,0 +1 @@
1	+ --color

data/Gemfile ADDED

@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+# Specify your gem's dependencies in mspire-lipid.gemspec
+gemspec

data/LICENSE ADDED

@@ -0,0 +1,21 @@
+Copyright (c) 2012 Brigham Young University
+authored by: John T. Prince
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED

@@ -0,0 +1,11 @@
+# Mspire::Lipid
+Identify and quantify (shotgun) lipidomics samples.
+## Installation
+    gem 'mspire-lipid'
+## Usage
+TBD

data/Rakefile ADDED

@@ -0,0 +1,24 @@
+require "bundler/gem_tasks"
+@module_name = Mspire::Lipid
+@gem_name = 'mspire-lipid'
+@gem_path_name = @gem_name.gsub('-','/')
+require "#{@gem_path_name}/version"
+require 'rspec/core'
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new(:spec) do |spec|
+  spec.pattern = FileList['spec/**/*_spec.rb']
+end
+task :default => :spec
+require 'rdoc/task'
+Rake::RDocTask.new do |rdoc|
+  version = @module_name.const_get('VERSION')
+  rdoc.rdoc_dir = 'rdoc'
+  rdoc.title = "#{@gem_name} #{version}"
+  rdoc.rdoc_files.include('README*')
+  rdoc.rdoc_files.include('lib/**/*.rb')
+end

data/bin/lipidomic-search.rb ADDED

@@ -0,0 +1,203 @@
+#!/usr/bin/env ruby
+puts "under development"
+=begin
+require 'trollop'
+require 'ms/mzml'
+require 'ms/lipid/search'
+require 'ms/lipid/ion'
+require 'ms/lipid/search/query'
+require 'ms/lipid_maps'
+require 'ms/error_rate/qvalue'
+# for html output: (just make the id clickable)
+LIPIDMAPS_SEARCH = "http://www.lipidmaps.org/data/LMSDRecord.php?LMID="
+DECOY_MODULATOR = 0.8319
+DEFAULTS = {
+  :bin_width => 5,
+  :bin_unit => :ppm,
+  :search_unit => :ppm,
+}
+def LipidPoint < Array
+  attr_accessor :sample
+end
+class Sample
+  attr_accessor :file
+  attr_accessor :spectrum
+  def initialize(file, merge_opts={})
+    @file = file
+    @spectrum = merge_ms1_spectra(file, DEFAULTS.merge(merge_opts))
+  end
+  # returns a single spectrum object
+  def self.merge_ms1_spectra(files, opts)
+    files.map do |file|
+      MS::Mzml.foreach(file).select {|spec| spec.ms_level == 1 }.map(&:sort!)
+    end
+    MS::Spectrum.merge(spectra, opts)
+  end
+end
+ext = ".lipidID.tsv"
+parser = Trollop::Parser.new do
+  banner "usage: #{File.basename(__FILE__)} [OPTIONS] <lipidmaps>.tsv <file>.mzML ..."
+  text "output: <file>#{ext} ..."
+  text ""
+  text "note that sometimes you get an error from R like this:"
+  text "(`eval': voidEval failed: Packet[cmd=2130771970,len=<nil>, con='<nil>', status=error...)"
+  text "just re-run it and it will work"
+  text ""
+  opt :bin_width, "width of the bins for merging", :default => DEFAULTS[:bin_width]
+  opt :bin_unit, "units for binning (ppm or amu)", :default => DEFAULTS[:bin_unit].to_s
+  opt :search_unit, "unit for searching nearest hit (ppm or amu)", :default => DEFAULTS[:search_unit].to_s
+  opt :top_n_peaks, "the number of highest intensity peaks to query the DB with", :default => 1000
+  opt :display_n, "the number of best hits to display", :default => 20
+  text ""
+  text "modifications (at least 1 charged mod is required):"
+  opt :lithium, "search for lithium adducts"
+  opt :ammonium, "search for ammonium adducts"
+  opt :proton_gain, "search for proton gain"
+  opt :proton_loss, "search for proton loss"
+  opt :water_loss, "*all* mods are also considered with water loss"
+  opt :decoy, "search with an equal number of decoy modifications"
+  opt :verbose, "talk about it"
+end
+opts = parser.parse(ARGV)
+opts[:bin_unit] = opts[:bin_unit].to_sym
+opts[:search_unit] = opts[:search_unit].to_sym
+if ARGV.size < 2
+  parser.educate
+  exit
+end
+CHARGED_MODS = [:lithium, :ammonium, :proton_gain, :proton_loss]
+unless CHARGED_MODS.any? {|key| opts[key] }
+  puts "*" * 78
+  puts "ArgumentError: need at least one charged mod!"
+  puts "*" * 78
+  parser.educate
+  exit
+end
+(lipidmaps, *files) = ARGV
+$VERBOSE = opts[:verbose]
+MSLM = MS::Lipid::Modification
+mods = {
+  proton_gain: MSLM.new(:proton),
+  water_loss: MSLM.new(:water, :loss => true),
+  lithium: MSLM.new(:lithium),
+  ammonium: MSLM.new(:ammonium),
+  proton_loss: MS::Lipid::Modification.new(:proton, :loss => true, :charge => -1)
+}
+lipids = MS::LipidMaps.parse_file(lipidmaps)
+ions = []
+lipids.each do |lipid|
+  CHARGED_MODS.each do |key|
+    if opts[key]
+      ions << MS::Lipid::Ion.new(lipid, [mods[key]])
+      if opts[:water_loss]
+        ions << MS::Lipid::Ion.new(lipid, [mods[key], mods[:water_loss]])
+      end
+    end
+  end
+ end
+searcher = MS::Lipid::Search.new(ions, :ppm => (opts[:search_unit] == :ppm))
+if opts[:decoy]
+  # assumes a mod group that is either the mod or a mod and water loss
+  decoy_ions = ions.map do |ion|
+    # modify the first mod and leave the second untouched (if any)
+    mod_group = ion.modifications
+    fake_mod = mod_group.first.dup
+    fake_mod.massdiff *= DECOY_MODULATOR
+    fake_mod.formula = "FAKE#{mod_group.first.formula}(#{fake_mod.massdiff})"
+    fake_mod.name = "fake_#{mod_group.first.name}".to_sym
+    new_mod_group = [fake_mod, *mod_group[1..-1]]
+    MS::Lipid::Ion.new(ion.lipid, new_mod_group)
+  end
+  decoy_searcher = MS::Lipid::Search.new(decoy_ions, :ppm => (opts[:search_unit] == :ppm))
+end
+files.each do |file|
+  base = file.chomp(File.extname(file))
+  puts "processing file: #{file}" if $VERBOSE
+  sample = Sample.new(file, opts)
+  num_points = sample.spectrum.mzs.size
+  puts "#{num_points} merged peaks in #{file}" if $VERBOSE
+  highest_points = sample.spectrum.points.sort_by(&:last).reverse[0,opts[:top_n_peaks]].sort
+  sample.spectrum = MS::Spectrum.from_points( highest_points )
+  queries = sample.spectrum.mzs.each_with_index.map {|mz,index| MS::Lipid::Search::Query.new(mz, index) }
+  hit_groups = searcher.search(queries, :return_order => :sorted)
+  if opts[:decoy]
+    decoy_hit_groups = decoy_searcher.search(queries, :return_order => :sorted)
+    hit_group_qvalue_pairs = MS::ErrorRate::Qvalue.target_decoy_qvalues(hit_groups, decoy_hit_groups, :monotonic => true, &:pvalue)
+    hit_group_qvalue_pairs.each do |hit_group, qval|
+      hit_group.first.decoy_qvalue = qval
+    end
+  end
+  # all info is relative to the hit_group
+  info = {
+    decoy_qvalue: :decoy_qvalue.to_proc,
+    qvalue:  :qvalue.to_proc,
+    pvalue:  :pvalue.to_proc,
+    observed_mz:  :observed_mz.to_proc,
+    theoretical_mz:  :theoretical_mz.to_proc,
+    delta:  :delta.to_proc,
+    ppm:  :ppm.to_proc,
+    hit2_ppm: proc {|hg| hg[1].ppm },
+    first_isobar_name: proc {|hg| (lipid=hg.first.db_isobar_group.first.lipid).common_name || lipid.systematic_name },
+    num_isobars: proc {|hg| hg.first.db_isobar_group.size },
+    ions: proc {|hg|
+      hg.first.db_isobar_group.map do |ion|
+        [ion.lipid.lm_id, ion.modifications.map do |mod|
+          (mod.gain? ? '+' : '-') + "(#{mod.charged_formula})"
+        end.join
+        ].join(":")
+      end.join(' ')
+    }
+  }
+  output = base + ext
+  puts "writing to #{output}" if $VERBOSE
+  File.open(output, 'w') do |out|
+    out.puts info.keys.join("\t")
+    hit_groups[0,opts[:display_n]].each do |hit_group|
+      out.puts info.values.map {|prc| prc.call(hit_group) }.join("\t")
+    end
+  end
+  if opts[:decoy]
+    decoy_output = base + '.decoy' + ext
+    File.open(decoy_output, 'w') do |dout|
+      decoy_info = info.dup
+      [:qvalue, :decoy_qvalue].each {|key| decoy_info.delete(key) }
+      dout.puts decoy_info.keys.join("\t")
+      decoy_hit_groups[0,opts[:display_n]].each do |hit_group|
+        dout.puts decoy_info.values.map {|prc| prc.call(hit_group) }.join("\t")
+      end
+    end
+  end
+end
+=end

data/lib/mspire/lipid.rb ADDED

@@ -0,0 +1,19 @@
+module Mspire
+  class Lipid
+    def self.members
+      [:lm_id,:common_name,:systematic_name,:formula,:mass,:category,:main_class,:sub_class,:pubchem_id,:inchi_key,:kegg_id,:chebi_id,:structure]
+    end
+    members.each {|mem| attr_accessor mem }
+    def initialize(*args)
+      (@lm_id,@common_name,@systematic_name,@formula,@mass,@category,@main_class,@sub_class,@pubchem_sid, @inchi_key, @kegg_id, @chebi_id, @structure) = args
+    end
+    def inspect
+      cut_common_name = (common_name.size <= 20) ? common_name : (common_name[0,20]+"...")
+      "<#{lm_id}: #{formula}: #{mass} #{cut_common_name}>"
+    end
+  end
+end

data/lib/mspire/lipid/ion.rb ADDED

@@ -0,0 +1,71 @@
+require 'mspire/lipid/ion/fragment'
+require 'mspire/molecular_formula'
+module Mspire
+  class Lipid
+    # a lipid with modifications (typically the mods give it a charge so that
+    # it can be seen in the mass spec)
+    class Ion
+      # an Mspire::Lipid object
+      attr_accessor :lipid
+      # an Mspire::Lipid::Modifications object
+      attr_accessor :modifications
+      # the key attribute of a query
+      def initialize(lipid, mods=[])
+        @lipid = lipid
+        @modifications = mods
+        @mz = nil
+      end
+      def charge
+        z = 0
+        @modifications.each do |mod|
+          z += mod.charge
+        end
+        z
+      end
+      # a MolecularFormula object
+      def formula
+        _formula = @lipid.formula
+        _formula = Mspire::MolecularFormula.from_any(_formula) unless _formula.is_a?(Mspire::MolecularFormula)
+        modifications.each do |mod|
+          if mod.gain?
+            _formula += mod.formula
+          else
+            _formula -= mod.formula
+          end
+        end
+        _formula
+      end
+      # value is cached
+      def mz_signed
+        return @mz if @mz
+        mass = @lipid.mass
+        charge = 0
+        @modifications.each do |mod|
+          mass += mod.massdiff
+          charge += mod.charge
+        end
+        if charge == 0
+          @mz = nil
+        else
+          @mz = mass / charge
+        end
+      end
+      # the unsigned m/z value
+      def mz
+        _mz_signed = mz_signed
+        _mz_signed >= 0 ? _mz_signed : -_mz_signed
+      end
+      def inspect
+        "<|| Ion mz=#{mz} #{lipid.inspect} + #{modifications.map(&:inspect).join(', ')} ||>"
+      end
+    end
+  end
+end

data/lib/mspire/lipid/ion/fragment.rb ADDED

@@ -0,0 +1,68 @@
+module Mspire
+  class Lipid
+    # goes from 1 to 99
+    CHAIN_PREFIXES = {
+      'meth' => 1,
+      'eth' => 2,
+      'prop' => 3,
+      'but' => 4,
+      'pent' => 5,
+      'hex' => 6,
+      'hept' => 7,
+      'oct' => 8,
+      'non' => 9,
+      'dec' => 10,
+      'undec' => 11,
+      'dodec' => 12,
+      'tridec' => 13,
+      'tetradec' => 14,
+      'pentadec' => 15,
+      'hexadec' => 16,
+      'heptadec' => 17,
+      'octadec' => 18,
+      'nonadec' => 19,
+      'eicos' => 20,
+      'heneicos' => 21,
+      'docos' => 22,
+      'tricos' => 23,
+      'tetracos' => 24,
+      'pentacos' => 25,
+      'hexacos' => 26,
+      'heptacos' => 27,
+      'octacos' => 28,
+      'nonacos' => 29
+    }
+    consistent = {
+      0 => '',
+      1 => 'hen',
+      2 => 'do',
+      3 => 'tri',
+      4 => 'tetra',
+      5 => 'penta',
+      6 => 'hexa',
+      7 => 'hepta',
+      8 => 'octa',
+      9 => 'nona',
+    }
+    (3..9).each do |tens_place|
+      (0..9).each do |ones_place|
+        key = consistent[ones_place] + consistent[tens_place] + "cont"
+        CHAIN_PREFIXES[key] = 10*tens_place + ones_place
+      end
+    end
+    class Ion
+      module Fragment
+        # predicts the MS/MS fragments for this ion
+        def predict_fragment_mzs
+        end
+      end
+      include Fragment
+    end
+  end
+end