RubyGems - ms-in_silico - Versions diffs - 0.1.0 - Mend

ms-in_silico 0.1.0

Files changed (10) hide show

data/MIT-LICENSE +19 -0
data/README +55 -0
data/lib/ms/in_silico/digest.rb +35 -0
data/lib/ms/in_silico/digester.rb +263 -0
data/lib/ms/in_silico/fragment.rb +74 -0
data/lib/ms/in_silico/spectrum.rb +450 -0
data/lib/ms/in_silico.rb +4 -0
data/tap.yml +0 -0
data/test/tap_test_suite.rb +5 -0
metadata +80 -0

data/MIT-LICENSE ADDED Viewed

@@ -0,0 +1,19 @@
+Copyright (c) 2008, Regents of the University of Colorado.
+Permission is hereby granted, free of charge, to any person obtaining a copy of this
+software and associated documentation files (the "Software"), to deal in the Software
+without restriction, including without limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
+to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or
+substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.

data/README ADDED Viewed

@@ -0,0 +1,55 @@
+= {Ms-InSilico}[http://mspire.rubyforge.org/projects/ms-in_silico]
+An {Mspire}[http://mspire.rubyforge.org] library supporting in-silico calculations for mass spec data.
+== Description
+Ms-InSilico provides the following modules:
+* Ms::InSilico::Digester (protein digestion)
+* Ms::InSilico::Spectrum (peptide fragmentation)
+Corresponding Tap[http://tap.rubyforge.org] tasks are also provided.
+* Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
+* Github[http://github.com/bahuvrihi/ms-in_silico/tree/master]
+* {Google Group}[http://groups.google.com/group/mspire-forum]
+== Usage
+  require 'ms/in_silico/digester'
+  require 'ms/in_silico/spectrum'
+  include Ms::InSilico
+  trypsin = Digester['Trypsin']
+  peptides = trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG')
+  # => [
+  # 'MIVIGR',
+  # 'SIVHPYITNEYEPFAAEK',
+  # 'QQILSIMAG']
+  spectrum = Spectrum.new(peptides[0])
+  spectrum.parent_ion_mass
+  # => 688.417442373391
+  spectrum.series('b')
+  # => [
+  # 132.047761058391,
+  # 245.131825038791,
+  # 344.200238954991,
+  # 457.284302935391,
+  # 514.305766658991,
+  # 670.406877687091]
+== Installation
+Ms-InSilico is available as a gem on RubyForge[http://rubyforge.org/projects/mspire].  Use:
+  % gem install ms-in_silico
+== Info
+Copyright (c) 2006-2008, Regents of the University of Colorado.
+Developer:: {Simon Chiang}[http://bahuvrihi.wordpress.com], {Biomolecular Structure Program}[http://biomol.uchsc.edu/], {Hansen Lab}[http://hsc-proteomics.uchsc.edu/hansenlab/]
+Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
+Licence:: {MIT-Style}[link:files/MIT-LICENSE.html]

data/lib/ms/in_silico/digest.rb ADDED Viewed

@@ -0,0 +1,35 @@
+require 'ms/in_silico/digester'
+module Ms
+  module InSilico
+    # Ms::InSilico::Digest::manifest digest a protein sequence into peptides
+    # Digest a protein sequence into an array of peptides.
+    #
+    #   % rap digest MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG --+ dump --no-audit
+    #     I[14:37:55]             digest MIVIGRSIVHP... to 3 peptides
+    #   # date: 2008-09-15 14:37:55
+    #   ---
+    #   ms/in_silico/digest (23483900):
+    #   - - MIVIGR
+    #     - SIVHPYITNEYEPFAAEK
+    #     - QQILSIMAG
+    #
+    class Digest < Tap::Task
+      config :digester, 'Trypsin'              # the name of the digester
+      config :max_misses, 0, &c.integer        # the max # of missed cleavage sites
+      config :site_digest, false, &c.boolean   # digest to sites (rather than sequences)
+      def process(sequence)
+        unless d = Digester[digester]
+          raise ArgumentError, "unknown digester: #{digester}"
+        end
+        peptides = site_digest ? d.site_digest(sequence, max_misses): d.digest(sequence, max_misses)
+        log 'digest', "#{sequence[0..10]}#{sequence.length > 10 ? '...' : ''} to #{peptides.length} peptides"
+        peptides
+      end
+    end
+  end
+end

data/lib/ms/in_silico/digester.rb ADDED Viewed

@@ -0,0 +1,263 @@
+require 'constants/library'
+require 'strscan'
+module Ms
+  module InSilico
+    # Digester splits a protein sequence into peptides at sites specified
+    # during initialization; in short Digester models a cleavage enzyme.
+    # Digesters support missed cleavage sites, and can return either the
+    # peptide strings or the cleavage sites.
+    #
+    # Digester includes {Constants::Library}[http://bioactive.rubyforge.org/constants/classes/Constants/Library.html],
+    # allowing access to many common digesters using Digester[]:
+    #
+    #   trypsin = Digester['Trypsin']
+    #   trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG')
+    #   # => [
+    #   # 'MIVIGR',
+    #   # 'SIVHPYITNEYEPFAAEK',
+    #   # 'QQILSIMAG']
+    #
+    #   trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1)
+    #   # => [
+    #   # 'MIVIGR',
+    #   # 'MIVIGRSIVHPYITNEYEPFAAEK',
+    #   # 'SIVHPYITNEYEPFAAEK',
+    #   # 'SIVHPYITNEYEPFAAEKQQILSIMAG',
+    #   # 'QQILSIMAG'
+    #   # ]
+    #
+    #   trypsin.site_digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG', 1)
+    #   # => [
+    #   # [0,6],
+    #   # [0,24],
+    #   # [6,24],
+    #   # [6,33],
+    #   # [24,33]
+    #   # ]
+    #
+    # ==== Enzymes
+    #
+    # Enzymes in the library were adapted from the default Mascot[http://www.matrixscience.com/]
+    # enzyme list. Currently supported enzymes include:
+    #
+    # * Arg-C
+    # * Asp-N
+    # * Asp-N_ambic
+    # * Chymotrypsin
+    # * CNBr
+    # * Lys-C
+    # * Lys-C/P
+    # * PepsinA
+    # * Tryp-CNBr
+    # * TrypChymo
+    # * Trypsin/P
+    # * V8-DE
+    # * V8-E
+    # * Trypsin
+    # * V8-E+Trypsin
+    # * V8-DE+Trypsin
+    #
+    # Several enzymes require two or more digesters, or functionality that
+    # is not provided by Digester, and so remain unsupported:
+    #
+    # * CNBr+Trypsin
+    # * Formic_acid
+    # * LysC+AspN
+    # * semiTrypsin
+    #
+    class Digester
+      # The name of the digester
+      attr_reader :name
+      # A string of residues at which cleavage occurs
+      attr_reader :cleave_str
+      # A c-terminal resitriction residue which prevents
+      # cleavage at a potential cleavage site (optional).
+      attr_reader :cterm_exception
+      # True if cleavage occurs at the c-terminus of a
+      # cleavage residue, false if cleavage occurs at
+      # the n-terminus.
+      attr_reader :cterm_cleavage
+      # a multiline whitespace regexp
+      WHITESPACE = /\s*/m
+      def initialize(name, cleave_str, cterm_exception=nil, cterm_cleavage=true)
+        regexp = []
+        0.upto(cleave_str.length - 1) {|i| regexp << cleave_str[i, 1] }
+        @name = name
+        @cleave_str = cleave_str
+        @cleave_regexp = Regexp.new(regexp.join('|'))
+        @cterm_exception = case
+        when cterm_exception == nil || cterm_exception.empty? then nil
+        when cterm_exception.length == 1 then cterm_exception[0]
+        else
+          raise ArgumentError, "cterm exceptions must be a single residue: #{cterm_exception}"
+        end
+        @cterm_cleavage = cterm_cleavage
+        @scanner = StringScanner.new('')
+      end
+      # Returns sites of digestion sites in sequence, as determined by
+      # thecleave_regexp boundaries.  The digestion sites correspond
+      # to the positions where a peptide begins and ends, such that
+      # [n, (n+1) - n] corresponds to the [index, length] for peptide n.
+      #
+      #   d = Digester.new('Trypsin', 'KR', 'P')
+      #   seq = "AARGGR"
+      #   sites = d.cleavage_sites(seq)                 # => [0, 3, 6]
+      #
+      #   seq[sites[0], sites[0+1] - sites[0]]          # => "AAR"
+      #   seq[sites[1], sites[1+1] - sites[1]]          # => "GGR"
+      #
+      # Trailing whitespace is included in the fragment.
+      #
+      #   seq = "AAR  \n  GGR"
+      #   sites = d.cleavage_sites(seq)                 # => [0, 8, 11]
+      #
+      #   seq[sites[0], sites[0+1] - sites[0]]          # => "AAR  \n  "
+      #   seq[sites[1], sites[1+1] - sites[1]]          # => "GGR"
+      #
+      # The digested section of sequence may be specified using offset
+      # and length.
+      def cleavage_sites(seq, offset=0, length=seq.length-offset)
+        adjustment = cterm_cleavage ? 0 : 1
+        limit = offset + length
+        positions = [offset]
+        pos = scan(seq, offset, limit) do |pos|
+          positions << pos - adjustment
+        end
+        # add the final position
+        if pos < limit || positions.length == 1
+          positions << limit
+        end
+        positions
+      end
+      # Returns digestion sites of sequence as [start_index, end_index] pairs,
+      # allowing for missed cleavages.  Digestion sites are determined using
+      # cleavage_sites; as in that method, the digested section of sequence
+      # may be specified using offset and length.
+      #
+      # Each [start_index, end_index] pair is yielded to the block, if given,
+      # and the collected results are returned.
+      def site_digest(seq, max_misses=0, offset=0, length=seq.length-offset) # :yields: start_index, end_index
+        frag_sites = cleavage_sites(seq, offset, length)
+        overlay(frag_sites.length, max_misses, 1) do |start_index, end_index|
+          start_index = frag_sites[start_index]
+          end_index = frag_sites[end_index]
+          block_given? ? yield(start_index, end_index) : [start_index, end_index]
+        end
+      end
+      # Returns an array of peptides produced by digesting sequence, allowing for
+      # missed cleavage sites. Digestion sites are determined using cleavage_sites;
+      # as in that method, the digested section of sequence may be specified using
+      # offset and length.
+      def digest(seq, max_misses=0, offset=0, length=seq.length-offset)
+        site_digest(seq, max_misses, offset, length).collect do |s, e|
+          seq[s, e-s]
+        end
+      end
+      protected
+      # The cleavage regexp used to identify cleavage sites
+      attr_reader :cleave_regexp # :nodoc:
+      # The scanner used to digest strings.
+      attr_reader :scanner # :nodoc:
+      # Scans seq between offset and limit for the cleave_regexp, skipping whitespace
+      # and being mindful of exception characters. The positions of the scanner at
+      # each match are yielded to the block.
+      def scan(seq, offset, limit) # :nodoc:
+        scanner.string = seq
+        scanner.pos = offset
+        while scanner.search_full(cleave_regexp, true, false)
+          scanner.search_full(WHITESPACE, true, false)
+          pos = scanner.pos
+          # skip if the next character is the exception character
+          next if cterm_exception != nil && seq[pos] == cterm_exception
+          # break if you scanned past the upper limit
+          break if pos > limit
+          yield pos
+        end
+        scanner.pos
+      end
+      # Performs an overlap-collect algorithm providing the start and end
+      # indicies of spans skipping up to max_misses boundaries.
+      def overlay(n, max_misses, offset) # :nodoc:
+        results = []
+        0.upto(n-1) do |start_index|
+          0.upto(max_misses) do |n_miss|
+            end_index = start_index + offset + n_miss
+            break if end_index == n
+            results << yield(start_index, end_index)
+          end
+        end
+        results
+      end
+      #
+      # Enzymes adapted from the default Mascot enzyme list.
+      #
+      class << self
+        protected
+        # Utility method to parse a mascot enzyme configuration
+        # string into a Digester.
+        def mascot_parse(str) # :nodoc:
+          name, sense, cleave_str, cterm_exception, independent, semi_specific = str.split(/ *\t */)
+          cterm_cleavage = case sense
+          when 'C-Term' then true
+          when 'N-Term' then false
+          else raise ArgumentError, "unknown sense: #{sense}"
+          end
+          new(name, cleave_str, cterm_exception, cterm_cleavage)
+        end
+      end
+      ARG_C =         mascot_parse('Arg-C 	C-Term 	R 	P 	 no 	 no')
+      ASP_N =         mascot_parse('Asp-N 	N-Term 	BD 	  	no 	no')
+      ASP_N_AMBIC =   mascot_parse('Asp-N_ambic 	N-Term 	DE 	  	no 	no')
+      CHYMOTRYPSIN =  mascot_parse('Chymotrypsin 	C-Term 	FLWY 	P 	no 	no')
+      CNBR =          mascot_parse('CNBr 	C-Term 	M 	  	no 	no')
+      LYS_C =         mascot_parse('Lys-C 	C-Term 	K 	P 	no 	no')
+      LYS_C_P =       mascot_parse('Lys-C/P 	C-Term 	K 	  	no 	no')
+      PEPSIN_A =      mascot_parse('PepsinA 	C-Term 	FL 	  	no 	no')
+      TRYP_CNBR =     mascot_parse('Tryp-CNBr 	C-Term 	KMR 	P 	no 	no')
+      TRYP_CHYMO =    mascot_parse('TrypChymo 	C-Term 	FKLRWY 	P 	no 	no')
+      TRYPSIN_P =     mascot_parse('Trypsin/P 	C-Term 	KR 	  	no 	no')
+      V8_DE =         mascot_parse('V8-DE 	C-Term 	BDEZ 	P 	no 	no')
+      V8_E =          mascot_parse('V8-E 	C-Term 	EZ 	P 	no 	no')
+      TRYPSIN =       mascot_parse('Trypsin 	C-Term	KR 	P 	no 	no')
+      V8_E_TRYPSIN =  mascot_parse('V8-E+Trypsin 	C-Term 	EKRZ 	P 	no 	no')
+      V8_DE_TRYPSIN = mascot_parse('V8-DE+Trypsin 	C-Term 	BDEKRZ 	P 	no 	no')
+      include Constants::Library
+      library.index_by_attribute :name
+    end
+  end
+end

data/lib/ms/in_silico/fragment.rb ADDED Viewed

@@ -0,0 +1,74 @@
+require 'ms/in_silico/spectrum'
+module Ms
+  module InSilico
+    # Ms::InSilico::Fragment::manifest calculates a theoretical ms/ms spectrum
+    #
+    # Calculates the parent ion mass and theoretical ms/ms spectrum for a
+    # peptide sequence.  Configurations allow the specification of one or
+    # more fragmentation series to include, as well as charge, and intensity.
+    #
+    #   % rap fragment TVQQEL --+ dump --no-audit
+    #   # date: 2008-09-15 14:37:55
+    #   ---
+    #   ms/in_silico/fragment (:...:):
+    #   - - 717.377745628191
+    #     - - 102.054954926291
+    #       - 132.101905118891
+    #       - 201.123368842491
+    #       - 261.144498215091
+    #       - 329.181946353891
+    #       - 389.203075726491
+    #       - 457.240523865291
+    #       - 517.261653237891
+    #       - 586.283116961491
+    #       - 616.330067154091
+    #       - 699.367180941891
+    #       - 717.377745628191
+    #
+    # In the output, the parent ion mass is given first, followed by an
+    # array of the sorted fragmentation data.
+    class Fragment < Tap::Task
+      # A block to validate a config input
+      # is an EmpericalFormula.
+      MOLECULE = lambda do |value|
+        case value
+        when Molecules::EmpiricalFormula then value
+        else Molecules::EmpiricalFormula.parse(value)
+        end
+      end
+      config :series, ['y', 'b'], &c.array   # a list of the series to include
+      config :charge, 1, &c.integer          # the charge for the parent ion
+      config :intensity, nil, &c.num_or_nil  # a uniform intensity value
+      config :nterm, 'H', &MOLECULE          # the n-terminal modification
+      config :cterm, 'OH', &MOLECULE         # the c-terminal modification
+      config :sort, true, &c.switch          # sorts the data by mass
+      config :unmask, true, &c.switch        # remove masked (negative) masses
+      def process(peptide)
+        log :fragment, peptide
+        spec = spectrum(peptide)
+        masses = []
+        series.each {|s| masses.concat(spec.series(s)) }
+        masses.delete_if {|m| m < 0 } if unmask
+        masses.sort! if sort
+        masses.collect! {|m| [m, intensity] } if intensity
+        [spec.parent_ion_mass(charge), masses]
+      end
+      protected
+      # Returns a new Spectrum used in the calculation.
+      # Primarily a hook for custom spectra in subclasses.
+      def spectrum(peptide)
+        Spectrum.new(peptide, nterm, cterm)
+      end
+    end
+  end
+end

data/lib/ms/in_silico/spectrum.rb ADDED Viewed

@@ -0,0 +1,450 @@
+require 'molecules/libraries/residue'
+require 'constants/libraries/particle'
+require 'ms/in_silico'
+module Ms
+  module InSilico
+    # Spectrum calculates the theoretical ion series produced by a fragmentation
+    # process such as collision induced disocciation (CID).  The formula used to
+    # calculate the ion series were obtained from the {Matrix Science
+    # website}[http://www.matrixscience.com/].  Spectrum uses the
+    # {Constants}[http://bioactive.rubyforge.org/constants/] gem as the default
+    # source of element and particle masses.
+    #
+    #   spec = Ms::InSilico::Spectrum.new('TVQQEL')
+    #   spec.series('b')
+    #   # => [
+    #   # 102.054954926291,
+    #   # 201.123368842491,
+    #   # 329.181946353891,
+    #   # 457.240523865291,
+    #   # 586.283116961491,
+    #   # 699.367180941891]
+    #
+    #   spec.series('y')
+    #   # => [
+    #   # 717.377745628191,
+    #   # 616.330067154091,
+    #   # 517.261653237891,
+    #   # 389.203075726491,
+    #   # 261.144498215091,
+    #   # 132.101905118891]
+    #
+    # ==== Formulae to Calculate Fragment Ion m/z values
+    #
+    # <em>Copied directly from the Matrix Science {fragmentation help
+    # section}[http://www.matrixscience.com/help/fragmentation_help.html]</em>
+    #
+    #   [N] is the molecular mass of the neutral N-terminal group, [C] is the
+    #   molecular mass of the neutral C-terminal group, [M] is molecular mass
+    #   of the neutral amino acid residues. To obtain m/z values, add or
+    #   subtract protons as required to obtain the required charge and divide
+    #   by the number of charges. For example, to get a+, add 1 proton to the
+    #   Mr value for a.  To get a--, subtract 2 protons from the Mr value for
+    #   a and divide by 2.
+    #
+    #    Ion Type  Neutral Mr
+    #    a         [N]+[M]-CHO
+    #    a*        a-NH3
+    #    a�        a-H2O
+    #    b         [N]+[M]-H
+    #    b*        b-NH3
+    #    b�        b-H2O
+    #    c         [N]+[M]+NH2
+    #    d         a - partial side chain
+    #    v         y - complete side chain
+    #    w         z - partial side chain
+    #    x         [C]+[M]+CO-H
+    #    y         [C]+[M]+H
+    #    y*        y-NH3
+    #    y�        y-H2O
+    #    z         [C]+[M]-NH2
+    #
+    # ==== Use of alternate masses
+    # By default a Spectrum will calculate the ion series' using the
+    # monoisotopic masses for each element.  To calculate masses
+    # differently, provide a block to new; each Element will be
+    # passed to the block as needed, and the block should return
+    # the element mass used in the calculation.
+    #
+    # Alternatively, a subclass can override the mass method; all
+    # objects that need to be turned into a mass (nterm, cterm,
+    # a variety of molecules specified as strings, the elements,
+    # ELECTRON, etc) are passed to mass to yield the value used
+    # in any given calculation.
+    #
+    #--
+    # ALL of the collections could be sped up using inline
+    #++
+    class Spectrum
+      include Molecules
+      include Molecules::Libraries
+      include Constants::Libraries
+      class << self
+        def inherited(base)
+          base.instance_variable_set(:@residues_to_locate, @residues_to_locate.dup)
+        end
+        # A string of residues located by scan.
+        attr_accessor :residues_to_locate
+        # Adds residues to residues_to_locate (these residues
+        # will be located by scan).  Generally used when some
+        # special fragmentation behavior occurs at specific
+        # residues.  By default no residues are located.
+        #
+        #   class Subclass < Spectrum
+        #     locate_residues "PS"
+        #   end
+        #
+        #   Subclass.new('RPPGFSPFR').residue_locations
+        #   # => {'P' => [1, 2, 6], 'S' => [5]}
+        #
+        # Calls to locate_residues are cumulative.
+        def locate_residues(residues)
+          @residues_to_locate += residues
+        end
+        # Scans the sequence to produce a ladder of masses and a
+        # hash of (residue, locations) pairs which indicate the
+        # indicies at which the residue occurs in sequence. The
+        # ladder corresponds to the M values described above.
+        #
+        # Returns [ladder, {residue => locations}].
+        #
+        # ==== Inputs
+        # sequence:: a string
+        # masses_by_byte:: an array of masses where the index of
+        #                  the mass is the byte of the
+        #                  corresponding residue.
+        # residues_to_locate:: a string of the residues to locate.
+        #
+        # Note: scan is an optimized utility function, but should
+        # be replaced by an inline function to do the same.
+        #
+        def scan(sequence, masses_by_byte, residues_to_locate)
+          locations = []
+          residues_to_locate.each_byte {|byte| locations[byte] = []}
+          mass = 0
+          ladder = []
+          sequence.each_byte do |byte|
+            mass += masses_by_byte[byte]
+            location = locations[byte]
+            location << ladder.length if location
+            ladder << mass
+          end
+          hash = {}
+          0.upto(residues_to_locate.length-1) do |index|
+            letter = residues_to_locate[index, 1]
+            byte = letter[0]
+            hash[letter] = locations[byte]
+          end
+          [ladder, hash]
+        end
+      end
+      HYDROGEN = EmpiricalFormula.parse("H")
+      HYDROXIDE = EmpiricalFormula.parse("OH")
+      ELECTRON = Particle['Electron']
+      self.residues_to_locate = ""
+      # The peptide sequence.
+      attr_reader :sequence
+      # The n-terminal modification (default H)
+      attr_reader :nterm
+      # The c-terminal modification (default OH)
+      attr_reader :cterm
+      # An optional block used to calculate masses of molecules.
+      attr_reader :block
+      # A ladder of mass values corresponding to the
+      # M values used in the fragmentation formulae.
+      attr_reader :ladder
+      # A hash of (residue, [locations]) pairs where
+      # the locations are the indicies in sequence
+      # at which residue occurs.
+      attr_reader :residue_locations
+      # Initializes a new Spectrum using the specified n- and c-terminal
+      # modifications.  Masses will be calculated using the block, if
+      # specified.  If no block is specified, then the monoisoptopic
+      # masses will be used.
+      def initialize(sequence, nterm=HYDROGEN, cterm=HYDROXIDE, &block) # :yields: element
+        @sequence = sequence
+        @nterm = nterm
+        @cterm = cterm
+        @block = block
+        residue_masses = Residue.residue_index.collect do |residue|
+          next(0) if residue == nil
+          mass(residue)
+        end
+        @ladder, @residue_locations = self.class.scan(
+          sequence,
+          residue_masses,
+          self.class.residues_to_locate)
+        @series_hash = {}
+        @series_mask = {}
+      end
+      # Returns the mass of the parent ion for the sequence, given the charge.
+      def parent_ion_mass(charge=1)
+        (mass(nterm) + ladder.last + mass(cterm) + charge * proton_mass)/charge
+      end
+      # Returns the mass of a proton (ie Hydrogen minus an Electron)
+      def proton_mass
+        mass(HYDROGEN) - mass(ELECTRON)
+      end
+      # Retrieves the specfied series, assuming a charge of 1.  A different charge
+      # can be specified for the series by using '+' and '-'.  For example:
+      #
+      #   f = Spectrum.new 'RPPGFSPFR'
+      #   f.series('y') ==  f.y_series                      # => true
+      #   f.series('b++') ==  f.b_series(2)                 # => true
+      #   f.series('nladder-') ==  f.nladder_series(-1)     # => true
+      #
+      # Series raises an error if the specified charge is zero.
+      def series(s)
+        s = s.to_s.strip
+        case s
+        when /^(immonium|nladder|cladder|[abcxyYz])(\+*)(-*)(\s[\+\-\s\w\d]+)?$/
+          series = $1
+          plus = $2
+          minus = $3
+          mod = $4.to_s.gsub(/\s/, "")
+          charge = case
+          when plus.empty? && minus.empty? then 1
+          when minus.empty? then plus.length
+          when plus.empty? then -minus.length
+          else
+            charge = plus.length - minus.length
+            raise ArgumentError.new("zero charge specified in series: #{s}") if charge == 0
+            charge
+          end
+          self.send("#{series}_series", charge, mod)
+        else
+          handle_unknown_series(s)
+        end
+      end
+      def immonium_series(charge=1, mod=nil)
+        get_series(:immonium, charge, mod) do
+          delta = mass(mod) - mass('CO')
+          previous = 0
+          series = []
+          ladder.each do |current|
+            series << (current - previous + delta + charge * proton_mass)/charge
+            previous = current
+          end
+          series
+        end
+      end
+      #   [N]+[M]-CHO
+      def a_series(charge=1, mod=nil)
+        get_series(:a, charge, mod) do
+          delta = mass(mod) + mass(nterm) - mass('CHO') + charge * proton_mass
+          nterm_series(delta, charge)
+        end
+      end
+      #   [N]+[M]-H
+      def b_series(charge=1, mod=nil)
+        get_series(:b, charge, mod) do
+          delta = mass(mod) + mass(nterm) - mass('H') + charge * proton_mass
+          nterm_series(delta, charge)
+        end
+      end
+      #   [N]+[M]+NH2
+      def c_series(charge=1, mod=nil)
+        get_series(:c, charge, mod) do
+          delta = mass(mod) + mass(nterm) + mass('NH2') + charge * proton_mass
+          nterm_series(delta, charge)
+        end
+      end
+      #   [M]+H20
+      #--
+      # Ask Peter about these as well... Currently I'm adding water to
+      # cap the ends, as if a hydrolysis reaction produced the ladder,
+      # then I'm adding H for charge... is this what is intended?
+      # Why not cladder[0] or cladder[-1]?
+      #++
+      def cladder_series(charge=1, mod=nil)
+        get_series(:cladder, charge, mod) do
+          delta = mass(mod) +  mass('H2O') + charge * proton_mass
+          nterm_series(delta, charge)
+        end
+      end
+      #   [C]+[M]+CO-H
+      def x_series(charge=1, mod=nil)
+        get_series(:x, charge, mod) do
+          delta = mass(mod) + ladder.last + mass(cterm) + mass('CO - H') + charge * proton_mass
+          cterm_series(delta, charge)
+        end
+      end
+      #   [C]+[M]+H
+      def y_series(charge=1, mod=nil)
+        get_series(:y, charge, mod) do
+          delta = mass(mod) + ladder.last + mass(cterm) + mass('H') + charge * proton_mass
+          cterm_series(delta, charge)
+        end
+      end
+      #   [C]+[M]-H
+      def Y_series(charge=1, mod=nil)
+        get_series(:Y, charge, mod) do
+          delta = mass(mod) + ladder.last + mass(cterm) - mass('H') + charge * proton_mass
+          cterm_series(delta, charge)
+        end
+      end
+      #   [C]+[M]-NH2
+      def z_series(charge=1, mod=nil)
+        get_series(:z, charge, mod) do
+          delta = mass(mod) + ladder.last + mass(cterm) - mass('NH2') + charge * proton_mass
+          cterm_series(delta, charge)
+        end
+      end
+      #   [M]+H20
+      #--
+      # Ask Peter about these as well... Currently I'm adding water to
+      # cap the ends, as if a hydrolysis reaction produced the ladder,
+      # then I'm adding H for charge... is this what is intended?
+      # Why not nladder[-1]?
+      #++
+      def nladder_series(charge=1, mod=nil)
+        get_series(:nladder, charge, mod) do
+          delta = mass(mod) + ladder.last + mass('H2O') + charge * proton_mass
+          cterm_series(delta, charge)
+        end
+      end
+      protected
+      # A hash holding all calculated series for self.  Series are keyed
+      # by the type and charge of the series (ex: b1, b2, y1, y2).
+      attr_accessor :series_hash
+      # A hash holding the locations of residues that need to be masked (ie
+      # multiplied by -1) in a given series.  Mask locations should be unique
+      # so that a given location will not be masked twice; the method
+      # mask_locations can assist in doing so.  Series masks are keyed
+      # by the series type (ex: b, y).
+      attr_accessor :series_mask
+      # Calculates the mass of the molecule for a variety of input
+      # types:
+      #
+      #   EmpiricalFormula   molecule.mass(&block)
+      #   Particle           molecule.mass
+      #   String             EmpiricalFormula.mass(molecule, &block)
+      #   Numeric            molecule
+      #   nil                0
+      #
+      def mass(molecule)
+        # note that Particles will not actually make use of the
+        # block, even though it is being passed to it.
+        case molecule
+        when EmpiricalFormula, Particle then molecule.mass(&block)
+        when String then EmpiricalFormula.mass(molecule, &block)
+        when nil then 0
+        when Numeric then molecule
+        else
+          raise "cannot calculate mass of: #{molecule}"
+        end
+      end
+      # Generates an n-terminal series (ex: a, b, or c) by adding delta
+      # to each element from ladder, and dividing by charge.  Delta,
+      # therefore, should ALREADY take account of the protons added
+      # by charge.
+      def nterm_series(delta, charge)
+        ladder.collect {|m| (m + delta)/charge }
+      end
+      # Generates a c-terminal series (ex: x, y, or z) by subtracting each
+      # element from ladder from delta, and dividing by charge.  Delta,
+      # therefore, should ALREADY take account of the protons added
+      # by charge.
+      def cterm_series(delta, charge)
+        series = ladder.collect {|m| (delta - m)/charge }
+        series.unshift(delta/charge)
+        series.pop
+        series
+      end
+      # Adds the specified locations to the series mask, ensuring that the
+      # specified locations will be unique within the mask.  If overwrite
+      # is true, then the input locations will overwrite any existing mask
+      # locations.
+      def mask_locations(series, locations, overwrite=false)
+        locations = locations.collect do |location|
+          location < 0 ? ladder.length + location : location
+        end
+        if overwrite
+          series_mask[series] = locations.uniq
+        else
+          (series_mask[series] ||= []).concat(locations).uniq!
+        end
+      end
+      # Retrieves the series keyed by "#{key}#{charge}" in series_hash.
+      # If the series has not been initialized, the series will be
+      # initialized using the supplied block, and masked using the
+      # series_mask indicated by key (not "#{key}#{charge}").
+      def get_series(key, charge=nil, mod=nil)
+        series_hash["#{key}#{charge}#{mod}"] ||= mask(yield, key, mod)
+      end
+      # Mask the locations in the series by multiplying them by -1.  Mask
+      # does NOT check to see if the location is negative or positive.
+      def mask(series, key, mod)
+        locations = series_mask[key]
+        unless mod == nil
+          mod_locations = series_mask["#{key}#{mod}"]
+          if mod_locations
+            locations += mod_locations
+            locations.uniq!
+          end
+        end
+        locations.each {|i| series[i] *= -1} unless locations == nil
+        series
+      end
+      # Hook to custom-handle an unknown series from the series method.
+      # By default, handle_unknown_series raises an ArgumentError.
+      def handle_unknown_series(s)
+        raise ArgumentError, "unknown series: #{s}"
+      end
+    end
+  end
+end

data/lib/ms/in_silico.rb ADDED Viewed

@@ -0,0 +1,4 @@
+module Ms
+  module InSilico
+  end
+end

data/tap.yml ADDED Viewed

File without changes

data/test/tap_test_suite.rb ADDED Viewed

@@ -0,0 +1,5 @@
+$:.unshift File.join(File.dirname(__FILE__), '../lib')
+# runs all subsets (see Tap::Test::SubsetMethods)
+ENV["ALL"] = "true"
+Dir.glob("./**/*_test.rb").each {|test| require test}

metadata ADDED Viewed

@@ -0,0 +1,80 @@
+--- !ruby/object:Gem::Specification
+name: ms-in_silico
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Simon Chiang
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2008-11-20 00:00:00 -07:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: tap
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: "0.11"
+    version:
+- !ruby/object:Gem::Dependency
+  name: molecules
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.1.0
+    version:
+description:
+email: simon.a.chiang@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files:
+- README
+- MIT-LICENSE
+files:
+- lib/ms/in_silico.rb
+- lib/ms/in_silico/digest.rb
+- lib/ms/in_silico/digester.rb
+- lib/ms/in_silico/fragment.rb
+- lib/ms/in_silico/spectrum.rb
+- tap.yml
+- README
+- MIT-LICENSE
+has_rdoc: true
+homepage: http://mspire.rubyforge.org/projects/ms-in_silico/
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+requirements: []
+rubyforge_project: mspire
+rubygems_version: 1.3.0
+signing_key:
+specification_version: 2
+summary: ms-in_silico task library
+test_files:
+- test/tap_test_suite.rb