RubyGems - ms-ident - Versions diffs - 0.0.3 → 0.0.17 - Mend

ms-ident 0.0.3 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

data/VERSION +1 -1
data/lib/ms/ident/peptide.rb +75 -0
data/lib/ms/ident/pepxml/modifications.rb +0 -11
data/lib/ms/ident/pepxml/msms_run_summary.rb +1 -0
data/lib/ms/ident/pepxml/sample_enzyme.rb +16 -17
data/lib/ms/ident/pepxml/search_database.rb +11 -4
data/lib/ms/ident/pepxml/search_hit/modification_info.rb +7 -10
data/lib/ms/ident/pepxml/search_result.rb +8 -1
data/lib/ms/ident/pepxml/search_summary.rb +7 -3
data/lib/ms/ident/pepxml/spectrum_query.rb +2 -0
data/lib/ms/ident/pepxml.rb +35 -8
data/lib/ms/ident/protein.rb +17 -0
data/lib/ms/ident/search.rb +105 -0
data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +80 -80
data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
metadata +9 -5
data/lib/ms/ident/pepxml/modifications/sequest.rb +0 -237

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.0.3
1	+ 0.0.17

data/lib/ms/ident/peptide.rb ADDED Viewed

@@ -0,0 +1,75 @@
+module Ms ; end
+module Ms::Ident ; end
+# A 'sequence' is a notation of a peptide that includes the leading and
+# trailing amino acid after cleavage (e.g., K.PEPTIDER.E or -.STARTK.L )
+# and may contain post-translational modification information.
+#
+# 'aaseq' is the amino acid sequence of just the peptide with no leading or
+# trailing notation (e.g., PEPTIDER or LAKKLY)
+module Ms::Ident::Peptide
+  Nonstandard_AA_re = /[^A-Z\.\-]/
+  class << self
+    # Takes a peptide sequence of the form '-.PEPTIDE.R', removes non-standard
+    # amino acids, and returns the center piece
+    def sequence_to_aaseq(sequence)
+      after_removed = remove_non_amino_acids(sequence)
+      pieces = after_removed.split('.')
+      case pieces.size
+      when 3
+        pieces[1]
+      when 2
+        if pieces[0].size > 1  ## N termini
+          pieces[0]
+        else  ## C termini
+          pieces[1]
+        end
+      when 1  ## this must be a parse error!
+        pieces[0] ## which is the peptide itself
+      else
+        abort "bad peptide sequence: #{sequence.inspect}"
+      end
+    end
+    # removes non standard amino acids specified by Nonstandard_AA_re
+    def remove_non_amino_acids(sequence)
+      sequence.gsub(Nonstandard_AA_re, '')
+    end
+    # remove non amino acids and split the sequence
+    def prepare_sequence(sequence)
+      nv = remove_non_amino_acids(sequence)
+      split_sequence(nv)
+    end
+    # Returns prev, peptide, next from sequence.  Parse errors return
+    # nil,nil,nil
+    #   R.PEPTIDE.A  # -> R, PEPTIDE, A
+    #   R.PEPTIDE.-  # -> R, PEPTIDE, -
+    #   PEPTIDE.A    # -> -, PEPTIDE, A
+    #   A.PEPTIDE    # -> A, PEPTIDE, -
+    #   PEPTIDE      # -> nil,nil,nil
+    def split_sequence(sequence)
+      pieces = sequence.split('.')
+      case pieces.size
+      when 3
+        pieces
+      when 2
+        if pieces[0].size > 1  ## N termini
+          ['-', pieces[0], pieces[1]]
+        else  ## C termini
+          [pieces[0], pieces[1], '-']
+        end
+      when 1  ## this must be a parse error!
+        [nil,nil,nil]
+      when 0
+        [nil,nil,nil]
+      end
+    end
+  end
+end

data/lib/ms/ident/pepxml/modifications.rb CHANGED Viewed

@@ -5,17 +5,6 @@ module Ms ; end
 module Ms::Ident ; end
 class Ms::Ident::Pepxml ; end
-# holds a list of AminoacidModification and TerminalModification objects.
-class Ms::Ident::Pepxml::Modifications < Array
-  ## Generates the pepxml for static and differential amino acid mods based on
-  ## sequest object
-  def to_xml(builder=nil)
-    xmlb = builder || Nokogiri::XML::Builder.new
-    self.each {|mod| mod.to_xml(xmlb) }
-    builder || xmlb.doc.root.to_xml
-  end
-end
 # Modified aminoacid, static or variable
 # unless otherwise stated, all attributes can be anything
 class Ms::Ident::Pepxml::AminoacidModification

data/lib/ms/ident/pepxml/msms_run_summary.rb CHANGED Viewed

@@ -3,6 +3,7 @@ require 'nokogiri'
 require 'ms/ident/pepxml/sample_enzyme'
 require 'ms/ident/pepxml/search_summary'
+require 'ms/ident/pepxml/spectrum_query'
 module Ms ; end
 module Ms::Ident ; end

data/lib/ms/ident/pepxml/sample_enzyme.rb CHANGED Viewed

@@ -1,4 +1,6 @@
 require 'merge'
+require 'strscan'
 module Ms ; end
 module Ms::Ident ; end
 class Ms::Ident::Pepxml ; end
@@ -56,21 +58,11 @@ class Ms::Ident::Pepxml::SampleEnzyme
   def self.from_pepxml_node(node)
     self.new.from_pepxml_node(node)
   end
-end
-###################################################
-###################################################
-###################################################
-###################################################
-# This is digestion methodology:
-=begin
-require 'strscan'
-  # takes an amino acid sequence (e.g., -.PEPTIDK.L)
+  # takes an amino acid sequence (e.g. PEPTIDE).
   # returns the number of missed cleavages
   def num_missed_cleavages(aaseq)
+    seq_to_scan = '  ' + aaseq + '  '
     raise NotImplementedError, 'need to implement for N terminal sense'  if sense == 'N'
     @num_missed_cleavages_regex =
       if @num_missed_cleavages_regex ; @num_missed_cleavages_regex
@@ -89,23 +81,30 @@ require 'strscan'
     num
   end
-  # requires full sequence (with heads and tails)
-  def num_tol_term(sequence)
+  # No arguments should contain non-standard amino acids
+  def num_tol_term(prev_aa, middle, next_aa)
     raise NotImplementedError, 'need to implement for N terminal sense'  if sense == 'N'
     no_cut = @no_cut || ''
     num_tol = 0
-    first, middle, last = SpecID::Pep.split_sequence(sequence)
     last_of_middle = middle[-1,1]
     first_of_middle = middle[0,1]
-    if ( @cut.include?(first) && !no_cut.include?(first_of_middle) ) || first == '-'
+    if ( @cut.include?(prev_aa) && !no_cut.include?(first_of_middle) ) || prev_aa == '-'
       num_tol += 1
     end
-    if @cut.include?(last_of_middle) && !no_cut.include?(last) || last == '-'
+    if @cut.include?(last_of_middle) && !no_cut.include?(next_aa) || next_aa == '-'
       num_tol += 1
     end
     num_tol
   end
+end
+###################################################
+###################################################
+###################################################
+###################################################
+# This is digestion methodology:
+=begin
   # returns all peptides of missed cleavages <= 'missed_cleavages'
   # so 2 missed cleavages will return all no missed cleavage peptides
   # all 1 missed cleavages and all 2 missed cleavages.

data/lib/ms/ident/pepxml/search_database.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+require 'ms/fasta'
 require 'merge'
 module Ms ; end
 module Ms::Ident ; end
@@ -25,13 +26,19 @@ class Ms::Ident::Pepxml
     def initialize(hash={}, get_size_of_residues=false)
       merge!(hash)
       if get_size_of_residues && File.exist?(@local_path)
-        @size_of_residues = 0
-        Ms::Fasta.foreach(@local_path) do |entry|
-          @size_of_residues += entry.sequence.size
-        end
+        set_size_of_residues!
       end
     end
+    # returns self for chaining
+    def set_size_of_residues!
+      @size_of_residues = 0
+      Ms::Fasta.foreach(@local_path) do |entry|
+        @size_of_residues += entry.sequence.size
+      end
+      self
+    end
     def to_xml(builder)
       attrs = [:local_path, :seq_type, :database_name, :orig_database_url, :database_release_date, :database_release_identifier, :size_of_residues].map {|k| v=send(k) ; [k, v] if v }.compact
       builder.search_database(Hash[attrs])

data/lib/ms/ident/pepxml/search_hit/modification_info.rb CHANGED Viewed

@@ -31,16 +31,11 @@ Ms::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :
   # position ranges from 1 to peptide length
   #attr_accessor :mod_aminoacid_masses
-  class << self
-    alias_method :old_new, :new
-    # takes either a hash or the normal list of values to set.
-    def new(*args)
-      if args.first.is_a?(Hash)
-        args = args.first.values_at(*members)
-      end
-      obj = old_new(*args)
-      obj
+  def initialize(*args)
+    if args.first.is_a?(Hash)
+      args = args.first.values_at(*members)
     end
+    super(*args)
   end
   # Will escape any xml special chars in modified_peptide
@@ -50,11 +45,12 @@ Ms::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :
     ## Create the attribute string:
     atts = [:mod_nterm_mass, :mod_cterm_mass, :modified_peptide]
     atts.map! {|at| (v=send(at)) && [at, v] }.compact
-    xmlb.modification_info(Hash[atts]) do
+    xmlb.modification_info(Hash[atts]) do |xmlb|
       mod_aminoacid_masses.andand.each do |mod_aa_mass|
         mod_aa_mass.to_xml(xmlb)
       end
     end
+    builder || xmlb.doc.root.to_s
   end
   def self.from_pepxml_node(node)
@@ -78,5 +74,6 @@ end
 Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass = Struct.new(:position, :mass) do
   def to_xml(builder)
     builder.mod_aminoacid_mass(:position => position, :mass => mass)
+    builder
   end
 end

data/lib/ms/ident/pepxml/search_result.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 require 'nokogiri'
+require 'ms/ident/pepxml/search_hit'
 module Ms ; end
 module Ms::Ident ; end
 class Ms::Ident::Pepxml ; end
@@ -8,9 +10,14 @@ class Ms::Ident::Pepxml::SearchResult
   # an array of search_hits
   attr_accessor :search_hits
-  # if block given, then yields an empty search_hits array
+  # if block given, then yields an empty search_hits array.
+  # For consistency with other objects, will also take a hash that has the key
+  # :search_hits and the value an array.
   def initialize(search_hits = [], &block)
     @search_hits = search_hits
+    if search_hits.is_a?(Hash)
+      @search_hits = search_hits[:search_hits]
+    end
     block.call(@search_hits) if block
   end

data/lib/ms/ident/pepxml/search_summary.rb CHANGED Viewed

@@ -30,7 +30,7 @@ class Ms::Ident::Pepxml::SearchSummary
   attr_accessor :out_data
   # by default, "1"
   attr_accessor :search_id
-  # a Modifications object
+  # an array of Ms::Ident::Pepxml::Modification objects
   attr_accessor :modifications
   # A SearchDatabase object (responds to :local_path and :type)
   attr_accessor :search_database
@@ -49,12 +49,14 @@ class Ms::Ident::Pepxml::SearchSummary
   def block_arg
     [@search_database = Ms::Ident::Pepxml::SearchDatabase.new,
       @enzymatic_search_constraint = Ms::Ident::Pepxml::EnzymaticSearchConstraint.new,
-      @modifications = Ms::Ident::Pepxml::Modifications.new,
+      @modifications,
       @parameters = Ms::Ident::Pepxml::Parameters.new,
     ]
   end
+  # initializes modifications to an empty array
   def initialize(hash={}, &block)
+    @modifications = []
     @search_id = DEFAULT_SEARCH_ID
     merge!(hash, &block)
   end
@@ -68,7 +70,9 @@ class Ms::Ident::Pepxml::SearchSummary
     builder.search_summary(hash) do |xmlb|
       search_database.to_xml(xmlb)
       xmlb.enzymatic_search_constraint(enzymatic_search_constraint) if enzymatic_search_constraint
-      modifications.to_xml(xmlb) if modifications
+      modifications.each do |mod|
+        mod.to_xml(xmlb)
+      end
       parameters.to_xml(xmlb) if parameters
     end
     builder || xmlb.doc.root.to_xml

data/lib/ms/ident/pepxml/spectrum_query.rb CHANGED Viewed

@@ -2,6 +2,8 @@ require 'nokogiri'
 require 'ms/mass'
 require 'merge'
+require 'ms/ident/pepxml/search_result'
 module Ms ; end
 module Ms::Ident ; end
 class Ms::Ident::Pepxml ; end

data/lib/ms/ident/pepxml.rb CHANGED Viewed

@@ -43,19 +43,46 @@ class Ms::Ident::Pepxml
     doc
   end
-  # writes xml file named msms_pipeline_analysis.summary_xml into the msms_run_summary.base_name directory
-  def to_xml_file
-    to_xml(File.dirname(msms_pipeline_analysis.msms_run_summary.base_name) + '/' + msms_pipeline_analysis.summary_xml)
-  end
+  # if no options are given, an xml string is returned.  If either :outdir or
+  # :outfile is given, the xml is written to file and the output filename is returned.
+  # A single string argument will be interpreted as :outfile if it ends in
+  # '.xml' and the :outdir otherwise.  In this case, update_summary_xml is still true
+  #
+  # options:
+  #
+  #     arg                    default
+  #     :outdir             => nil   write to disk using this outdir with summary_xml basename
+  #     :outfile            => nil   write to this filename (overrides outdir)
+  #     :update_summary_xml => true  update summary_xml attribute to point to the output file true/false
+  #
+  # set outdir to
+  # File.dirname(pepxml_obj.msms_pipeline_analysis.msms_run_summary.base_name)
+  # to write to the same directory as the input search file.
+  def to_xml(opts={})
+    opts ||= {}
+    if opts.is_a?(String)
+      opts = ( opts.match(/\.xml$/) ?  {:outfile => opts} : {:outdir => opts } )
+    end
+    opt = {:update_summary_xml => true, :outdir => nil, :outfile => nil}.merge(opts)
+    if opt[:outfile]
+      outfile = opt[:outfile]
+    elsif opt[:outdir]
+      outfile = File.join(opt[:outdir], msms_pipeline_analysis.summary_xml.split(/[\/\\]/).last)
+    end
+    self.msms_pipeline_analysis.summary_xml = File.expand_path(outfile) if (opt[:update_summary_xml] && outfile)
-  # if no outfile is given, an xml string is returned.  summary_xml should
-  # have already been set and is not influenced by the outfile given here.
-  def to_xml(outfile=nil)
     builder = Nokogiri::XML::Builder.new(:encoding => XML_ENCODING)
     msms_pipeline_analysis.to_xml(builder)
     add_stylesheet(builder.doc, Ms::Ident::Pepxml::XML_STYLESHEET_LOCATION)
     string = builder.doc.to_xml
-    outfile ? File.open(outfile,'w') {|out| out.print(string) } : string
+    if outfile
+      File.open(outfile,'w') {|out| out.print(string) }
+      outfile
+    else
+      string
+    end
   end
 end

data/lib/ms/ident/protein.rb ADDED Viewed

@@ -0,0 +1,17 @@
+module Ms ; end
+module Ms::Ident ; end
+module Ms::Ident::Protein
+  class << self
+  end
+  # gives the information up until the first space or carriage return.
+  # Assumes the protein can respond_to? :reference
+  def first_entry
+    reference.split(/[\s\r]/)[0]
+  end
+end

data/lib/ms/ident/search.rb ADDED Viewed

@@ -0,0 +1,105 @@
+module Ms
+  module Ident
+    module Search
+      attr_accessor :proteins
+      attr_accessor :peptides
+      # returns an array of peptide_hits and protein_hits that are linked to
+      # one another.  NOTE: this will update peptide and protein
+      # hits :proteins and :peptides attributes respectively).  Assumes that each search
+      # responds to :peptides, each peptide responds to :proteins and each protein to
+      # :peptides.  Can be done on a single file to restore protein/peptide
+      # linkages to their original single-file state.
+      # Assumes the protein is initialized with (reference, peptide_ar)
+      #
+      # yields the protein that will become the template for a new protein
+      # and expects a new protein hit
+      def merge!(ar_of_peptide_hit_arrays)
+        all_peptide_hits = []
+        reference_hash = {}
+        ar_of_peptide_hit_arrays.each do |peptide_hits|
+          all_peptide_hits.push(*peptide_hits)
+          peptide_hits.each do |peptide|
+            peptide.proteins.each do |protein|
+              ref = protein.reference
+              if reference_hash.key? ref
+                reference_hash[ref].peptides << peptide
+                reference_hash[ref]
+              else
+                reference_hash[ref] = yield(protein, [peptide])
+              end
+            end
+          end
+        end
+        [all_peptide_hits, reference_hash.values]
+      end
+    end
+    module SearchGroup
+      include Search
+      # an array of search objects
+      attr_accessor :searches
+      # the group's file extension (with no leading period)
+      def extension
+        'grp'
+      end
+      def search_class
+        Search
+      end
+      # a simple formatted file with paths to the search files
+      def to_paths(file)
+        IO.readlines(file).grep(/\w/).reject {|v| v =~ /^#/}.map {|v| v.chomp }
+      end
+      def from_file(file)
+        from_filenames(to_paths(file))
+      end
+      def from_filenames(filenames)
+        filenames.each do |file|
+          if !File.exist? file
+            message = "File: #{file} does not exist!\n"
+            message << "perhaps you need to modify the file with file paths"
+            abort message
+          end
+          @searches << search_class.new(file)
+        end
+      end
+      # takes an array of filenames or a single search filename (with
+      # extension defined by 'extendsion') or an array of objects passes any
+      # arguments to the initializer for each search
+      # the optional block yields the object for further processing
+      def initialize(arg=nil, opts={})
+        @peptides = []
+        @reference_hash = {}
+        @searches = []
+        if arg
+          if arg.is_a?(String) && arg =~ /\.#{Regexp.escap(extension)}$/
+            from_file(arg)
+          elsif arg.is_a?(Array) && arg.first.is_a?(String)
+            from_filenames(arg)
+          elsif arg.is_a?(Array)
+            @searches = array
+          else
+            raise ArgumentError, "must be file, array of filenames, or array of objs"
+          end
+          @searches << search_class.new(file, opts)
+        end
+        yield(self) if block_given?
+      end
+    end
+  end
+end

data/spec/ms/ident/pepxml/sample_enzyme_spec.rb CHANGED Viewed

@@ -48,6 +48,86 @@ describe 'an Ms::Ident::Pepxml::SampleEnzyme' do
   end
 end
+describe 'an Ms::Ident::Pepxml::SampleEnzyme making enzyme digestion calculations' do
+  before do
+    @full_KRP = Ms::Ident::Pepxml::SampleEnzyme.new(
+      :name => 'trypsin',
+      :cut => 'KR',
+      :no_cut => 'P',
+      :sense => 'C',
+    )
+    @just_KR = Ms::Ident::Pepxml::SampleEnzyme.new(
+      :name => 'trypsin',
+      :cut => 'KR',
+      :no_cut => '',
+      :sense => 'C',
+    )
+  end
+  it 'calculates the number of tolerant termini' do
+    exp = [{
+      # full KR/P
+      %w(K EPTIDR E) => 2,
+      %w(K PEPTIDR E) => 1,
+      %w(F EEPTIDR E) => 1,
+      %w(F PEPTIDW R) => 0,
+    },
+    {
+      # just KR
+      %w(K EPTIDR E) => 2,
+      %w(K PEPTIDR E) => 2,
+      %w(F EEPTIDR E) => 1,
+      %w(F PEPTIDW R) => 0,
+    }
+    ]
+    sample_enzyme_ar = [@full_KRP, @just_KR]
+    sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
+      hash.each do |seq, val|
+        sample_enzyme.num_tol_term(*seq).should == val
+      end
+    end
+  end
+  it 'calculates number of missed cleavages' do
+    exp = [{
+      "EPTIDR" => 0,
+      "PEPTIDR" => 0,
+      "EEPTIDR" => 0,
+      "PEPTIDW" => 0,
+      "PERPTIDW" => 0,
+      "PEPKPTIDW" => 0,
+      "PEPKTIDW" => 1,
+      "RTTIDR" => 1,
+      "RTTIKK" => 2,
+      "PKEPRTIDW" => 2,
+      "PKEPRTIDKP" => 2,
+      "PKEPRAALKPEERPTIDKW" => 3,
+    },
+    {
+      "EPTIDR" => 0,
+      "PEPTIDR" => 0,
+      "EEPTIDR" => 0,
+      "PEPTIDW" => 0,
+      "PERPTIDW" => 1,
+      "PEPKPTIDW" => 1,
+      "PEPKTIDW" => 1,
+      "RTTIDR" => 1,
+      "RTTIKK" => 2,
+      "PKEPRTIDW" => 2,
+      "PKEPRTIDKP" => 3,
+      "PKEPRAALKPEERPTIDKW" => 5,
+    }
+    ]
+    sample_enzyme_ar = [@full_KRP, @just_KR]
+    sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
+      hash.each do |aaseq, val|
+        sample_enzyme.num_missed_cleavages(aaseq).should == val
+      end
+    end
+  end
+end
 xdescribe 'read in from an xml node' do
   # placeholder until written
 end
@@ -93,86 +173,6 @@ end
 describe SampleEnzyme, 'making enzyme calculations on sequences and aaseqs' do
-  before(:each) do
-    @full_KRP = SampleEnzyme.new do |se|
-      se.name = 'trypsin'
-      se.cut = 'KR'
-      se.no_cut = 'P'
-      se.sense = 'C'
-    end
-    @just_KR = SampleEnzyme.new do |se|
-      se.name = 'trypsin'
-      se.cut = 'KR'
-      se.no_cut = ''
-      se.sense = 'C'
-    end
-  end
-  it 'calculates the number of tolerant termini' do
-    exp = [{
-      # full KR/P
-      'K.EPTIDR.E' => 2,
-      'K.PEPTIDR.E' => 1,
-      'F.EEPTIDR.E' => 1,
-      'F.PEPTIDW.R' => 0,
-    },
-    {
-      # just KR
-      'K.EPTIDR.E' => 2,
-      'K.PEPTIDR.E' => 2,
-      'F.EEPTIDR.E' => 1,
-      'F.PEPTIDW.R' => 0,
-    }
-    ]
-    scall = Sequest::PepXML::SearchHit
-    sample_enzyme_ar = [@full_KRP, @just_KR]
-    sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
-      hash.each do |seq, val|
-        sample_enzyme.num_tol_term(seq).should == val
-      end
-    end
-  end
-  it 'calculates number of missed cleavages' do
-    exp = [{
-    "EPTIDR" => 0,
-    "PEPTIDR" => 0,
-    "EEPTIDR" => 0,
-    "PEPTIDW" => 0,
-    "PERPTIDW" => 0,
-    "PEPKPTIDW" => 0,
-    "PEPKTIDW" => 1,
-    "RTTIDR" => 1,
-    "RTTIKK" => 2,
-    "PKEPRTIDW" => 2,
-    "PKEPRTIDKP" => 2,
-    "PKEPRAALKPEERPTIDKW" => 3,
-    },
-    {
-    "EPTIDR" => 0,
-    "PEPTIDR" => 0,
-    "EEPTIDR" => 0,
-    "PEPTIDW" => 0,
-    "PERPTIDW" => 1,
-    "PEPKPTIDW" => 1,
-    "PEPKTIDW" => 1,
-    "RTTIDR" => 1,
-    "RTTIKK" => 2,
-    "PKEPRTIDW" => 2,
-    "PKEPRTIDKP" => 3,
-    "PKEPRAALKPEERPTIDKW" => 5,
-    }
-    ]
-    sample_enzyme_ar = [@full_KRP, @just_KR]
-    sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
-      hash.each do |aaseq, val|
-        #first, middle, last = SpecID::Pep.split_sequence(seq)
-        # note that we are only using the middle section!
-        sample_enzyme.num_missed_cleavages(aaseq).should == val
-      end
-    end
-  end
 end
 =end

data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb ADDED Viewed

@@ -0,0 +1,37 @@
+require 'spec_helper'
+require 'ms/ident/pepxml/search_hit/modification_info'
+describe 'Ms::Ident::Pepxml::SearchHit::ModificationInfo' do
+  before do
+    modaaobjs = [[3, 150.3], [6, 345.2]].map do |ar|
+      Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(*ar)
+    end
+    hash = {
+      :mod_nterm_mass => 520.2,
+      :modified_peptide => "MOD*IFI^E&D",
+      :mod_aminoacid_masses => modaaobjs,
+    }
+    #answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
+    @obj = Ms::Ident::Pepxml::SearchHit::ModificationInfo.new(hash)
+  end
+  it 'can produce valid pepxml xml' do
+    to_match = ['<modification_info',
+    ' mod_nterm_mass="520.2"',
+    " modified_peptide=\"MOD*IFI^E&amp;D\"",
+    "<mod_aminoacid_mass",
+    " position=\"3\"",
+    " mass=\"150.3\"",
+    " position=\"6\"",
+    " mass=\"345.2\"",
+    "</modification_info>"]
+    string = @obj.to_xml
+    to_match.each do |re|
+      string.matches Regexp.new(Regexp.escape(re))
+    end
+  end
+end

metadata CHANGED Viewed

@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
   segments:
   - 0
   - 0
-  - 3
-  version: 0.0.3
+  - 17
+  version: 0.0.17
 platform: ruby
 authors:
 - John T. Prince
@@ -14,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-02-28 00:00:00 -07:00
+date: 2011-03-08 00:00:00 -07:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -135,9 +135,9 @@ files:
 - VERSION
 - lib/merge.rb
 - lib/ms/ident.rb
+- lib/ms/ident/peptide.rb
 - lib/ms/ident/pepxml.rb
 - lib/ms/ident/pepxml/modifications.rb
-- lib/ms/ident/pepxml/modifications/sequest.rb
 - lib/ms/ident/pepxml/msms_pipeline_analysis.rb
 - lib/ms/ident/pepxml/msms_run_summary.rb
 - lib/ms/ident/pepxml/parameters.rb
@@ -150,9 +150,12 @@ files:
 - lib/ms/ident/pepxml/search_result.rb
 - lib/ms/ident/pepxml/search_summary.rb
 - lib/ms/ident/pepxml/spectrum_query.rb
+- lib/ms/ident/protein.rb
+- lib/ms/ident/search.rb
 - schema/pepXML_v115.xsd
 - schema/pepXML_v19.xsd
 - spec/ms/ident/pepxml/sample_enzyme_spec.rb
+- spec/ms/ident/pepxml/search_hit/modification_info_spec.rb
 - spec/ms/ident/pepxml_spec.rb
 - spec/spec_helper.rb
 has_rdoc: true
@@ -169,7 +172,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      hash: 3918611084548908133
+      hash: -1969914373934932629
       segments:
       - 0
       version: "0"
@@ -190,5 +193,6 @@ specification_version: 3
 summary: mspire library for working with mzIdentML and pepxml
 test_files:
 - spec/ms/ident/pepxml/sample_enzyme_spec.rb
+- spec/ms/ident/pepxml/search_hit/modification_info_spec.rb
 - spec/ms/ident/pepxml_spec.rb
 - spec/spec_helper.rb

data/lib/ms/ident/pepxml/modifications/sequest.rb DELETED Viewed

@@ -1,237 +0,0 @@
-require 'ms/ident/pepxml/modifications'
-require 'ms/ident/pepxml/search_hit/modification_info'
-module Ms ; end
-module Ms::Ident ; end
-class Ms::Ident::Pepxml ; end
-module Ms::Ident::Pepxml::Modifications
-  # Handles modifications for sequest style searches
-  class Sequest
-    include Ms::Ident::Pepxml::Modifications
-    # a hash of all differential modifications present by aa_one_letter_symbol
-    # and special_symbol. This is NOT the mass difference but the total mass {
-    # 'M*' => 155.5, 'S@' => 190.3 }.  NOTE: Since the termini are dependent on
-    # the amino acid sequence, they are give the *differential* mass.  The
-    # termini are given the special symbol as in sequest e.g. '[' => 12.22, #
-    # cterminus    ']' => 14.55 # nterminus
-    attr_accessor :masses_by_diff_mod_hash
-    # a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
-    # values are the special_symbols
-    attr_accessor :mod_symbols_hash
-    # sequest params object
-    attr_accessor :params
-    # The modification symbols string looks like this:
-    # (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
-    # ct is cterminal peptide (differential)
-    # nt is nterminal peptide (differential)
-    # the C is just cysteine
-    # will set_modifications and masses_by_diff_mod hash
-    def initialize(params=nil, modification_symbols_string='')
-      @params = params
-      if @params
-        set_modifications(params, modification_symbols_string)
-      end
-    end
-    # set the masses_by_diff_mod and mod_symbols_hash from
-    def set_hashes(modification_symbols_string)
-      @mod_symbols_hash = {}
-      @masses_by_diff_mod = {}
-      if (modification_symbols_string == nil || modification_symbols_string == '')
-        return nil
-      end
-      table = @params.mass_table
-      modification_symbols_string.split(/\)\s+\(/).each do |mod|
-        if md = mod.match(/\(?(\w+)(.) (.[\d\.]+)\)?/)
-          if md[1] == 'ct' || md[1] == 'nt'
-            mass_diff = md[3].to_f
-            @masses_by_diff_mod[md[2]] = mass_diff
-            @mod_symbols_hash[[md[1].to_sym, mass_diff]] = md[2].dup
-          else
-            symbol_string = md[2].dup
-            mass_diff = md[3].to_f
-            md[1].split('').each do |aa|
-              aa_as_sym = aa.to_sym
-              @masses_by_diff_mod[aa+symbol_string] = mass_diff + table[aa_as_sym]
-              @mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
-            end
-          end
-        end
-      end
-    end
-    # given a bare peptide (no end pieces) returns a ModificationInfo object
-    # e.g. given "]PEPT*IDE", NOT 'K.PEPTIDE.R'
-    # if there are no modifications, returns nil
-    def modification_info(peptide)
-      if @masses_by_diff_mod.size == 0
-        return nil
-      end
-      hash = {}
-      hash[:modified_peptide] = peptide.dup
-      hsh = @masses_by_diff_mod
-      table = @params.mass_table
-      h = table[:h]  # this? or h_plus ??
-      oh = table[:o] + h
-      ## only the termini can match a single char
-      if hsh.key? peptide[0,1]
-        # AA + H + differential_mod
-        hash[:mod_nterm_mass] = table[peptide[1,1].to_sym] + h + hsh[peptide[0,1]]
-        peptide = peptide[1...(peptide.size)]
-      end
-      if hsh.key? peptide[(peptide.size-1),1]
-        # AA + OH + differential_mod
-        hash[:mod_cterm_mass] = table[peptide[(peptide.size-2),1].to_sym] + oh + hsh[peptide[-1,1]]
-        peptide.slice!( 0..-2 )
-        peptide = peptide[0...(peptide.size-1)]
-      end
-      mod_array = []
-      (0...peptide.size).each do |i|
-        if hsh.key? peptide[i,2]
-          mod_array << Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new([ i+1 , hsh[peptide[i,2]] ])
-        end
-      end
-      if mod_array.size > 0
-        hash[:mod_aminoacid_masses] = mod_array
-      end
-      if hash.size > 1  # if there is more than just the modified peptide there
-        Ms::Ident::Pepxml::SearchHit::ModificationInfo.new(hash)
-        #Ms::Ident::Pepxml::SearchHit::ModificationInfo.new(hash.values_at(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
-      else
-        nil
-      end
-    end
-    # returns an array of static mod objects and static terminal mod objects
-    def create_static_mods(params)
-      ####################################
-      ## static mods
-      ####################################
-      static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
-      static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
-      params.mods.each do |k,v|
-        v_to_f = v.to_f
-        if v_to_f != 0.0
-          if k =~ /add_(\w)_/
-            static_mods << [$1.to_sym, v_to_f]
-          else
-            static_terminal_mods << [k, v_to_f]
-          end
-        end
-      end
-      aa_hash = params.mass_table
-      ## Create the static_mods objects
-      static_mods.map! do |mod|
-        hash = {
-          :aminoacid => mod[0].to_s,
-          :massdiff => mod[1],
-          :mass => aa_hash[mod[0]] + mod[1],
-          :variable => 'N',
-          :binary => 'Y',
-        }
-        Ms::Ident::Pepxml::AminoacidModification.new(hash)
-      end
-      ## Create the static_terminal_mods objects
-      static_terminal_mods.map! do |mod|
-        terminus = if mod[0] =~ /Cterm/ ; 'c'
-                   else                 ; 'n' # only two possible termini
-                   end
-        protein_terminus = case mod[0]
-                           when /Nterm_protein/ ; 'n'
-                           when /Cterm_protein/ ; 'c'
-                           else nil
-                           end
-        # create the hash
-        hash = {
-          :terminus => terminus,
-          :massdiff => mod[1],
-          :variable => 'N',
-          :description => mod[0],
-        }
-        hash[:protein_terminus] = protein_terminus if protein_terminus
-        Ms::Ident::Pepxml::TerminalModification.new(hash)
-      end
-      [static_mods, static_terminal_mods]
-    end
-    # 1. sets aminoacid_modifications and terminal_modifications from a sequest params object
-    # 2. sets @params
-    # 3. sets @masses_by_diff_mod
-    def set_modifications(params, modification_symbols_string)
-      @params = params
-      set_hashes(modification_symbols_string)
-      (static_mods, static_terminal_mods) = create_static_mods(params)
-      aa_hash = params.mass_table
-      #################################
-      # Variable Mods:
-      #################################
-      arr = params.diff_search_options.rstrip.split(/\s+/)
-      # [aa.to_sym, diff.to_f]
-      variable_mods = []
-      (0...arr.size).step(2) do |i|
-        if arr[i].to_f != 0.0
-          variable_mods << [arr[i+1], arr[i].to_f]
-        end
-      end
-      mod_objects = []
-      variable_mods.each do |mod|
-        mod[0].split('').each do |aa|
-          hash = {
-            :aminoacid => aa,
-            :massdiff => mod[1],
-            :mass => aa_hash[aa.to_sym] + mod[1],
-            :variable => 'Y',
-            :binary => 'N',
-            :symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
-          }
-          mod_objects << Ms::Ident::Pepxml::AminoacidModification.new(hash)
-        end
-      end
-      variable_mods = mod_objects
-      #################################
-      # TERMINAL Variable Mods:
-      #################################
-      # These are always peptide, not protein termini (for sequest)
-      (nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
-      to_add = []
-      if nterm_diff != 0.0
-        to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
-      end
-      if cterm_diff != 0.0
-        to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
-      end
-      variable_terminal_mods = to_add.map do |term, mssdiff, symb|
-        hash = {
-          :terminus => term,
-          :massdiff => mssdiff,
-          :variable => 'Y',
-          :symbol => symb,
-        }
-        Ms::Ident::Pepxml::TerminalModification.new(hash)
-      end
-      #########################
-      # COLLECT THEM
-      #########################
-      @aminoacid_modifications = static_mods + variable_mods
-      @terminal_modifications = static_terminal_mods + variable_terminal_mods
-    end
-  end