RubyGems - ms-ident - Versions diffs - 0.0.3 → 0.0.17 - Mend

ms-ident 0.0.3 → 0.0.17

Files changed (17) hide show

data/VERSION +1 -1
data/lib/ms/ident/peptide.rb +75 -0
data/lib/ms/ident/pepxml/modifications.rb +0 -11
data/lib/ms/ident/pepxml/msms_run_summary.rb +1 -0
data/lib/ms/ident/pepxml/sample_enzyme.rb +16 -17
data/lib/ms/ident/pepxml/search_database.rb +11 -4
data/lib/ms/ident/pepxml/search_hit/modification_info.rb +7 -10
data/lib/ms/ident/pepxml/search_result.rb +8 -1
data/lib/ms/ident/pepxml/search_summary.rb +7 -3
data/lib/ms/ident/pepxml/spectrum_query.rb +2 -0
data/lib/ms/ident/pepxml.rb +35 -8
data/lib/ms/ident/protein.rb +17 -0
data/lib/ms/ident/search.rb +105 -0
data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +80 -80
data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
metadata +9 -5
data/lib/ms/ident/pepxml/modifications/sequest.rb +0 -237

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.0.3
1	+ 0.0.17

data/lib/ms/ident/peptide.rb ADDED Viewed

@@ -0,0 +1,75 @@
+module Ms ; end
+module Ms::Ident ; end
+# A 'sequence' is a notation of a peptide that includes the leading and
+# trailing amino acid after cleavage (e.g., K.PEPTIDER.E or -.STARTK.L )
+# and may contain post-translational modification information.
+#
+# 'aaseq' is the amino acid sequence of just the peptide with no leading or
+# trailing notation (e.g., PEPTIDER or LAKKLY)
+module Ms::Ident::Peptide
+  Nonstandard_AA_re = /[^A-Z\.\-]/
+  class << self
+    # Takes a peptide sequence of the form '-.PEPTIDE.R', removes non-standard
+    # amino acids, and returns the center piece
+    def sequence_to_aaseq(sequence)
+      after_removed = remove_non_amino_acids(sequence)
+      pieces = after_removed.split('.')
+      case pieces.size
+      when 3
+        pieces[1]
+      when 2
+        if pieces[0].size > 1  ## N termini
+          pieces[0]
+        else  ## C termini
+          pieces[1]
+        end
+      when 1  ## this must be a parse error!
+        pieces[0] ## which is the peptide itself
+      else
+        abort "bad peptide sequence: #{sequence.inspect}"
+      end
+    end
+    # removes non standard amino acids specified by Nonstandard_AA_re
+    def remove_non_amino_acids(sequence)
+      sequence.gsub(Nonstandard_AA_re, '')
+    end
+    # remove non amino acids and split the sequence
+    def prepare_sequence(sequence)
+      nv = remove_non_amino_acids(sequence)
+      split_sequence(nv)
+    end
+    # Returns prev, peptide, next from sequence.  Parse errors return
+    # nil,nil,nil
+    #   R.PEPTIDE.A  # -> R, PEPTIDE, A
+    #   R.PEPTIDE.-  # -> R, PEPTIDE, -
+    #   PEPTIDE.A    # -> -, PEPTIDE, A
+    #   A.PEPTIDE    # -> A, PEPTIDE, -
+    #   PEPTIDE      # -> nil,nil,nil
+    def split_sequence(sequence)
+      pieces = sequence.split('.')
+      case pieces.size
+      when 3
+        pieces
+      when 2
+        if pieces[0].size > 1  ## N termini
+          ['-', pieces[0], pieces[1]]
+        else  ## C termini
+          [pieces[0], pieces[1], '-']
+        end
+      when 1  ## this must be a parse error!
+        [nil,nil,nil]
+      when 0
+        [nil,nil,nil]
+      end
+    end
+  end
+end

data/lib/ms/ident/pepxml/modifications.rb CHANGED Viewed

@@ -5,17 +5,6 @@ module Ms ; end
 module Ms::Ident ; end
 class Ms::Ident::Pepxml ; end
-# holds a list of AminoacidModification and TerminalModification objects.
-class Ms::Ident::Pepxml::Modifications < Array
-  ## Generates the pepxml for static and differential amino acid mods based on
-  ## sequest object
-  def to_xml(builder=nil)
-    xmlb = builder || Nokogiri::XML::Builder.new
-    self.each {|mod| mod.to_xml(xmlb) }
-    builder || xmlb.doc.root.to_xml
-  end
-end
 # Modified aminoacid, static or variable
 # unless otherwise stated, all attributes can be anything
 class Ms::Ident::Pepxml::AminoacidModification

data/lib/ms/ident/pepxml/msms_run_summary.rb CHANGED Viewed

@@ -3,6 +3,7 @@ require 'nokogiri'
 require 'ms/ident/pepxml/sample_enzyme'
 require 'ms/ident/pepxml/search_summary'
+require 'ms/ident/pepxml/spectrum_query'
 module Ms ; end
 module Ms::Ident ; end

data/lib/ms/ident/pepxml/sample_enzyme.rb CHANGED Viewed

@@ -1,4 +1,6 @@
 require 'merge'
+require 'strscan'
 module Ms ; end
 module Ms::Ident ; end
 class Ms::Ident::Pepxml ; end
@@ -56,21 +58,11 @@ class Ms::Ident::Pepxml::SampleEnzyme
   def self.from_pepxml_node(node)
     self.new.from_pepxml_node(node)
   end
-end
-###################################################
-###################################################
-###################################################
-###################################################
-# This is digestion methodology:
-=begin
-require 'strscan'
-  # takes an amino acid sequence (e.g., -.PEPTIDK.L)
+  # takes an amino acid sequence (e.g. PEPTIDE).
   # returns the number of missed cleavages
   def num_missed_cleavages(aaseq)
+    seq_to_scan = '  ' + aaseq + '  '
     raise NotImplementedError, 'need to implement for N terminal sense'  if sense == 'N'
     @num_missed_cleavages_regex =
       if @num_missed_cleavages_regex ; @num_missed_cleavages_regex
@@ -89,23 +81,30 @@ require 'strscan'
     num
   end
-  # requires full sequence (with heads and tails)
-  def num_tol_term(sequence)
+  # No arguments should contain non-standard amino acids
+  def num_tol_term(prev_aa, middle, next_aa)
     raise NotImplementedError, 'need to implement for N terminal sense'  if sense == 'N'
     no_cut = @no_cut || ''
     num_tol = 0
-    first, middle, last = SpecID::Pep.split_sequence(sequence)
     last_of_middle = middle[-1,1]
     first_of_middle = middle[0,1]
-    if ( @cut.include?(first) && !no_cut.include?(first_of_middle) ) || first == '-'
+    if ( @cut.include?(prev_aa) && !no_cut.include?(first_of_middle) ) || prev_aa == '-'
       num_tol += 1
     end
-    if @cut.include?(last_of_middle) && !no_cut.include?(last) || last == '-'
+    if @cut.include?(last_of_middle) && !no_cut.include?(next_aa) || next_aa == '-'
       num_tol += 1
     end
     num_tol
   end
+end
+###################################################
+###################################################
+###################################################
+###################################################
+# This is digestion methodology:
+=begin
   # returns all peptides of missed cleavages <= 'missed_cleavages'
   # so 2 missed cleavages will return all no missed cleavage peptides
   # all 1 missed cleavages and all 2 missed cleavages.

data/lib/ms/ident/pepxml/search_database.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+require 'ms/fasta'
 require 'merge'
 module Ms ; end
 module Ms::Ident ; end
@@ -25,13 +26,19 @@ class Ms::Ident::Pepxml
     def initialize(hash={}, get_size_of_residues=false)
       merge!(hash)
       if get_size_of_residues && File.exist?(@local_path)
-        @size_of_residues = 0
-        Ms::Fasta.foreach(@local_path) do |entry|
-          @size_of_residues += entry.sequence.size
-        end
+        set_size_of_residues!
       end
     end
+    # returns self for chaining
+    def set_size_of_residues!
+      @size_of_residues = 0
+      Ms::Fasta.foreach(@local_path) do |entry|
+        @size_of_residues += entry.sequence.size
+      end
+      self
+    end
     def to_xml(builder)
       attrs = [:local_path, :seq_type, :database_name, :orig_database_url, :database_release_date, :database_release_identifier, :size_of_residues].map {|k| v=send(k) ; [k, v] if v }.compact
       builder.search_database(Hash[attrs])

data/lib/ms/ident/pepxml/search_hit/modification_info.rb CHANGED Viewed

@@ -31,16 +31,11 @@ Ms::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :
   # position ranges from 1 to peptide length
   #attr_accessor :mod_aminoacid_masses
-  class << self
-    alias_method :old_new, :new
-    # takes either a hash or the normal list of values to set.
-    def new(*args)
-      if args.first.is_a?(Hash)
-        args = args.first.values_at(*members)
-      end
-      obj = old_new(*args)
-      obj
+  def initialize(*args)
+    if args.first.is_a?(Hash)
+      args = args.first.values_at(*members)
     end
+    super(*args)
   end
   # Will escape any xml special chars in modified_peptide
@@ -50,11 +45,12 @@ Ms::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :
     ## Create the attribute string:
     atts = [:mod_nterm_mass, :mod_cterm_mass, :modified_peptide]
     atts.map! {|at| (v=send(at)) && [at, v] }.compact
-    xmlb.modification_info(Hash[atts]) do
+    xmlb.modification_info(Hash[atts]) do |xmlb|
       mod_aminoacid_masses.andand.each do |mod_aa_mass|
         mod_aa_mass.to_xml(xmlb)
       end
     end
+    builder || xmlb.doc.root.to_s
   end
   def self.from_pepxml_node(node)
@@ -78,5 +74,6 @@ end
 Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass = Struct.new(:position, :mass) do
   def to_xml(builder)
     builder.mod_aminoacid_mass(:position => position, :mass => mass)
+    builder
   end
 end

data/lib/ms/ident/pepxml/search_result.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 require 'nokogiri'
+require 'ms/ident/pepxml/search_hit'
 module Ms ; end
 module Ms::Ident ; end
 class Ms::Ident::Pepxml ; end
@@ -8,9 +10,14 @@ class Ms::Ident::Pepxml::SearchResult
   # an array of search_hits
   attr_accessor :search_hits
-  # if block given, then yields an empty search_hits array
+  # if block given, then yields an empty search_hits array.
+  # For consistency with other objects, will also take a hash that has the key
+  # :search_hits and the value an array.
   def initialize(search_hits = [], &block)
     @search_hits = search_hits
+    if search_hits.is_a?(Hash)
+      @search_hits = search_hits[:search_hits]
+    end
     block.call(@search_hits) if block
   end

data/lib/ms/ident/pepxml/search_summary.rb CHANGED Viewed

@@ -30,7 +30,7 @@ class Ms::Ident::Pepxml::SearchSummary
   attr_accessor :out_data
   # by default, "1"
   attr_accessor :search_id
-  # a Modifications object
+  # an array of Ms::Ident::Pepxml::Modification objects
   attr_accessor :modifications
   # A SearchDatabase object (responds to :local_path and :type)
   attr_accessor :search_database
@@ -49,12 +49,14 @@ class Ms::Ident::Pepxml::SearchSummary
   def block_arg
     [@search_database = Ms::Ident::Pepxml::SearchDatabase.new,
       @enzymatic_search_constraint = Ms::Ident::Pepxml::EnzymaticSearchConstraint.new,
-      @modifications = Ms::Ident::Pepxml::Modifications.new,
+      @modifications,
       @parameters = Ms::Ident::Pepxml::Parameters.new,
     ]
   end
+  # initializes modifications to an empty array
   def initialize(hash={}, &block)
+    @modifications = []
     @search_id = DEFAULT_SEARCH_ID
     merge!(hash, &block)
   end
@@ -68,7 +70,9 @@ class Ms::Ident::Pepxml::SearchSummary
     builder.search_summary(hash) do |xmlb|
       search_database.to_xml(xmlb)
       xmlb.enzymatic_search_constraint(enzymatic_search_constraint) if enzymatic_search_constraint
-      modifications.to_xml(xmlb) if modifications
+      modifications.each do |mod|
+        mod.to_xml(xmlb)
+      end
       parameters.to_xml(xmlb) if parameters
     end
     builder || xmlb.doc.root.to_xml

data/lib/ms/ident/pepxml/spectrum_query.rb CHANGED Viewed

@@ -2,6 +2,8 @@ require 'nokogiri'
 require 'ms/mass'
 require 'merge'
+require 'ms/ident/pepxml/search_result'
 module Ms ; end
 module Ms::Ident ; end
 class Ms::Ident::Pepxml ; end

data/lib/ms/ident/pepxml.rb CHANGED Viewed

@@ -43,19 +43,46 @@ class Ms::Ident::Pepxml
     doc
   end
-  # writes xml file named msms_pipeline_analysis.summary_xml into the msms_run_summary.base_name directory
-  def to_xml_file
-    to_xml(File.dirname(msms_pipeline_analysis.msms_run_summary.base_name) + '/' + msms_pipeline_analysis.summary_xml)
-  end
+  # if no options are given, an xml string is returned.  If either :outdir or
+  # :outfile is given, the xml is written to file and the output filename is returned.
+  # A single string argument will be interpreted as :outfile if it ends in
+  # '.xml' and the :outdir otherwise.  In this case, update_summary_xml is still true
+  #
+  # options:
+  #
+  #     arg                    default
+  #     :outdir             => nil   write to disk using this outdir with summary_xml basename
+  #     :outfile            => nil   write to this filename (overrides outdir)
+  #     :update_summary_xml => true  update summary_xml attribute to point to the output file true/false
+  #
+  # set outdir to
+  # File.dirname(pepxml_obj.msms_pipeline_analysis.msms_run_summary.base_name)
+  # to write to the same directory as the input search file.
+  def to_xml(opts={})
+    opts ||= {}
+    if opts.is_a?(String)
+      opts = ( opts.match(/\.xml$/) ?  {:outfile => opts} : {:outdir => opts } )
+    end
+    opt = {:update_summary_xml => true, :outdir => nil, :outfile => nil}.merge(opts)
+    if opt[:outfile]
+      outfile = opt[:outfile]
+    elsif opt[:outdir]
+      outfile = File.join(opt[:outdir], msms_pipeline_analysis.summary_xml.split(/[\/\\]/).last)
+    end
+    self.msms_pipeline_analysis.summary_xml = File.expand_path(outfile) if (opt[:update_summary_xml] && outfile)
-  # if no outfile is given, an xml string is returned.  summary_xml should
-  # have already been set and is not influenced by the outfile given here.
-  def to_xml(outfile=nil)
     builder = Nokogiri::XML::Builder.new(:encoding => XML_ENCODING)
     msms_pipeline_analysis.to_xml(builder)
     add_stylesheet(builder.doc, Ms::Ident::Pepxml::XML_STYLESHEET_LOCATION)
     string = builder.doc.to_xml
-    outfile ? File.open(outfile,'w') {|out| out.print(string) } : string
+    if outfile
+      File.open(outfile,'w') {|out| out.print(string) }
+      outfile
+    else
+      string
+    end
   end
 end

data/lib/ms/ident/protein.rb ADDED Viewed

@@ -0,0 +1,17 @@
+module Ms ; end
+module Ms::Ident ; end
+module Ms::Ident::Protein
+  class << self
+  end
+  # gives the information up until the first space or carriage return.
+  # Assumes the protein can respond_to? :reference
+  def first_entry
+    reference.split(/[\s\r]/)[0]
+  end
+end

data/lib/ms/ident/search.rb ADDED Viewed

@@ -0,0 +1,105 @@
+module Ms
+  module Ident
+    module Search
+      attr_accessor :proteins
+      attr_accessor :peptides
+      # returns an array of peptide_hits and protein_hits that are linked to
+      # one another.  NOTE: this will update peptide and protein
+      # hits :proteins and :peptides attributes respectively).  Assumes that each search
+      # responds to :peptides, each peptide responds to :proteins and each protein to
+      # :peptides.  Can be done on a single file to restore protein/peptide
+      # linkages to their original single-file state.
+      # Assumes the protein is initialized with (reference, peptide_ar)
+      #
+      # yields the protein that will become the template for a new protein
+      # and expects a new protein hit
+      def merge!(ar_of_peptide_hit_arrays)
+        all_peptide_hits = []
+        reference_hash = {}
+        ar_of_peptide_hit_arrays.each do |peptide_hits|
+          all_peptide_hits.push(*peptide_hits)
+          peptide_hits.each do |peptide|
+            peptide.proteins.each do |protein|
+              ref = protein.reference
+              if reference_hash.key? ref
+                reference_hash[ref].peptides << peptide
+                reference_hash[ref]
+              else
+                reference_hash[ref] = yield(protein, [peptide])
+              end
+            end
+          end
+        end
+        [all_peptide_hits, reference_hash.values]
+      end
+    end
+    module SearchGroup
+      include Search
+      # an array of search objects
+      attr_accessor :searches
+      # the group's file extension (with no leading period)
+      def extension
+        'grp'
+      end
+      def search_class
+        Search
+      end
+      # a simple formatted file with paths to the search files
+      def to_paths(file)
+        IO.readlines(file).grep(/\w/).reject {|v| v =~ /^#/}.map {|v| v.chomp }
+      end
+      def from_file(file)
+        from_filenames(to_paths(file))
+      end
+      def from_filenames(filenames)
+        filenames.each do |file|
+          if !File.exist? file
+            message = "File: #{file} does not exist!\n"
+            message << "perhaps you need to modify the file with file paths"
+            abort message
+          end
+          @searches << search_class.new(file)
+        end
+      end
+      # takes an array of filenames or a single search filename (with
+      # extension defined by 'extendsion') or an array of objects passes any
+      # arguments to the initializer for each search
+      # the optional block yields the object for further processing
+      def initialize(arg=nil, opts={})
+        @peptides = []
+        @reference_hash = {}
+        @searches = []
+        if arg
+          if arg.is_a?(String) && arg =~ /\.#{Regexp.escap(extension)}$/
+            from_file(arg)
+          elsif arg.is_a?(Array) && arg.first.is_a?(String)
+            from_filenames(arg)
+          elsif arg.is_a?(Array)
+            @searches = array
+          else
+            raise ArgumentError, "must be file, array of filenames, or array of objs"
+          end
+          @searches << search_class.new(file, opts)
+        end
+        yield(self) if block_given?
+      end
+    end
+  end
+end

data/spec/ms/ident/pepxml/sample_enzyme_spec.rb CHANGED Viewed

@@ -48,6 +48,86 @@ describe 'an Ms::Ident::Pepxml::SampleEnzyme' do
   end
 end
+describe 'an Ms::Ident::Pepxml::SampleEnzyme making enzyme digestion calculations' do
+  before do
+    @full_KRP = Ms::Ident::Pepxml::SampleEnzyme.new(
+      :name => 'trypsin',
+      :cut => 'KR',
+      :no_cut => 'P',
+      :sense => 'C',
+    )
+    @just_KR = Ms::Ident::Pepxml::SampleEnzyme.new(
+      :name => 'trypsin',
+      :cut => 'KR',
+      :no_cut => '',
+      :sense => 'C',
+    )
+  end
+  it 'calculates the number of tolerant termini' do
+    exp = [{
+      # full KR/P
+      %w(K EPTIDR E) => 2,
+      %w(K PEPTIDR E) => 1,
+      %w(F EEPTIDR E) => 1,
+      %w(F PEPTIDW R) => 0,
+    },
+    {
+      # just KR
+      %w(K EPTIDR E) => 2,
+      %w(K PEPTIDR E) => 2,
+      %w(F EEPTIDR E) => 1,
+      %w(F PEPTIDW R) => 0,
+    }
+    ]
+    sample_enzyme_ar = [@full_KRP, @just_KR]
+    sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
+      hash.each do |seq, val|
+        sample_enzyme.num_tol_term(*seq).should == val
+      end
+    end
+  end
+  it 'calculates number of missed cleavages' do
+    exp = [{
+      "EPTIDR" => 0,
+      "PEPTIDR" => 0,
+      "EEPTIDR" => 0,
+      "PEPTIDW" => 0,
+      "PERPTIDW" => 0,
+      "PEPKPTIDW" => 0,
+      "PEPKTIDW" => 1,
+      "RTTIDR" => 1,
+      "RTTIKK" => 2,
+      "PKEPRTIDW" => 2,
+      "PKEPRTIDKP" => 2,
+      "PKEPRAALKPEERPTIDKW" => 3,
+    },
+    {
+      "EPTIDR" => 0,
+      "PEPTIDR" => 0,
+      "EEPTIDR" => 0,
+      "PEPTIDW" => 0,
+      "PERPTIDW" => 1,
+      "PEPKPTIDW" => 1,
+      "PEPKTIDW" => 1,
+      "RTTIDR" => 1,
+      "RTTIKK" => 2,
+      "PKEPRTIDW" => 2,
+      "PKEPRTIDKP" => 3,
+      "PKEPRAALKPEERPTIDKW" => 5,
+    }
+    ]
+    sample_enzyme_ar = [@full_KRP, @just_KR]
+    sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
+      hash.each do |aaseq, val|
+        sample_enzyme.num_missed_cleavages(aaseq).should == val
+      end
+    end
+  end
+end
 xdescribe 'read in from an xml node' do
   # placeholder until written
 end
@@ -93,86 +173,6 @@ end
 describe SampleEnzyme, 'making enzyme calculations on sequences and aaseqs' do
-  before(:each) do
-    @full_KRP = SampleEnzyme.new do |se|
-      se.name = 'trypsin'
-      se.cut = 'KR'
-      se.no_cut = 'P'
-      se.sense = 'C'
-    end
-    @just_KR = SampleEnzyme.new do |se|
-      se.name = 'trypsin'
-      se.cut = 'KR'
-      se.no_cut = ''
-      se.sense = 'C'
-    end
-  end
-  it 'calculates the number of tolerant termini' do
-    exp = [{
-      # full KR/P
-      'K.EPTIDR.E' => 2,
-      'K.PEPTIDR.E' => 1,
-      'F.EEPTIDR.E' => 1,
-      'F.PEPTIDW.R' => 0,
-    },
-    {
-      # just KR
-      'K.EPTIDR.E' => 2,
-      'K.PEPTIDR.E' => 2,
-      'F.EEPTIDR.E' => 1,
-      'F.PEPTIDW.R' => 0,
-    }
-    ]
-    scall = Sequest::PepXML::SearchHit
-    sample_enzyme_ar = [@full_KRP, @just_KR]
-    sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
-      hash.each do |seq, val|
-        sample_enzyme.num_tol_term(seq).should == val
-      end
-    end
-  end
-  it 'calculates number of missed cleavages' do
-    exp = [{
-    "EPTIDR" => 0,
-    "PEPTIDR" => 0,
-    "EEPTIDR" => 0,
-    "PEPTIDW" => 0,
-    "PERPTIDW" => 0,
-    "PEPKPTIDW" => 0,
-    "PEPKTIDW" => 1,
-    "RTTIDR" => 1,
-    "RTTIKK" => 2,
-    "PKEPRTIDW" => 2,
-    "PKEPRTIDKP" => 2,
-    "PKEPRAALKPEERPTIDKW" => 3,
-    },
-    {
-    "EPTIDR" => 0,
-    "PEPTIDR" => 0,
-    "EEPTIDR" => 0,
-    "PEPTIDW" => 0,
-    "PERPTIDW" => 1,
-    "PEPKPTIDW" => 1,
-    "PEPKTIDW" => 1,
-    "RTTIDR" => 1,
-    "RTTIKK" => 2,
-    "PKEPRTIDW" => 2,
-    "PKEPRTIDKP" => 3,
-    "PKEPRAALKPEERPTIDKW" => 5,
-    }
-    ]
-    sample_enzyme_ar = [@full_KRP, @just_KR]
-    sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
-      hash.each do |aaseq, val|
-        #first, middle, last = SpecID::Pep.split_sequence(seq)
-        # note that we are only using the middle section!
-        sample_enzyme.num_missed_cleavages(aaseq).should == val
-      end
-    end
-  end
 end
 =end

data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb ADDED Viewed

@@ -0,0 +1,37 @@
+require 'spec_helper'
+require 'ms/ident/pepxml/search_hit/modification_info'
+describe 'Ms::Ident::Pepxml::SearchHit::ModificationInfo' do
+  before do
+    modaaobjs = [[3, 150.3], [6, 345.2]].map do |ar|
+      Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(*ar)
+    end
+    hash = {
+      :mod_nterm_mass => 520.2,
+      :modified_peptide => "MOD*IFI^E&D",
+      :mod_aminoacid_masses => modaaobjs,
+    }
+    #answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
+    @obj = Ms::Ident::Pepxml::SearchHit::ModificationInfo.new(hash)
+  end
+  it 'can produce valid pepxml xml' do
+    to_match = ['<modification_info',
+    ' mod_nterm_mass="520.2"',
+    " modified_peptide=\"MOD*IFI^E&amp;D\"",
+    "<mod_aminoacid_mass",
+    " position=\"3\"",
+    " mass=\"150.3\"",
+    " position=\"6\"",
+    " mass=\"345.2\"",
+    "</modification_info>"]
+    string = @obj.to_xml
+    to_match.each do |re|
+      string.matches Regexp.new(Regexp.escape(re))
+    end
+  end
+end

metadata CHANGED Viewed

@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
   segments:
   - 0
   - 0
-  - 3
-  version: 0.0.3
+  - 17
+  version: 0.0.17
 platform: ruby
 authors:
 - John T. Prince
@@ -14,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-02-28 00:00:00 -07:00
+date: 2011-03-08 00:00:00 -07:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -135,9 +135,9 @@ files:
 - VERSION
 - lib/merge.rb
 - lib/ms/ident.rb
+- lib/ms/ident/peptide.rb
 - lib/ms/ident/pepxml.rb
 - lib/ms/ident/pepxml/modifications.rb
-- lib/ms/ident/pepxml/modifications/sequest.rb
 - lib/ms/ident/pepxml/msms_pipeline_analysis.rb
 - lib/ms/ident/pepxml/msms_run_summary.rb
 - lib/ms/ident/pepxml/parameters.rb
@@ -150,9 +150,12 @@ files:
 - lib/ms/ident/pepxml/search_result.rb
 - lib/ms/ident/pepxml/search_summary.rb
 - lib/ms/ident/pepxml/spectrum_query.rb
+- lib/ms/ident/protein.rb
+- lib/ms/ident/search.rb
 - schema/pepXML_v115.xsd
 - schema/pepXML_v19.xsd
 - spec/ms/ident/pepxml/sample_enzyme_spec.rb
+- spec/ms/ident/pepxml/search_hit/modification_info_spec.rb
 - spec/ms/ident/pepxml_spec.rb
 - spec/spec_helper.rb
 has_rdoc: true
@@ -169,7 +172,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      hash: 3918611084548908133
+      hash: -1969914373934932629
       segments:
       - 0
       version: "0"
@@ -190,5 +193,6 @@ specification_version: 3
 summary: mspire library for working with mzIdentML and pepxml
 test_files:
 - spec/ms/ident/pepxml/sample_enzyme_spec.rb
+- spec/ms/ident/pepxml/search_hit/modification_info_spec.rb
 - spec/ms/ident/pepxml_spec.rb
 - spec/spec_helper.rb

data/lib/ms/ident/pepxml/modifications/sequest.rb DELETED Viewed

@@ -1,237 +0,0 @@
-require 'ms/ident/pepxml/modifications'
-require 'ms/ident/pepxml/search_hit/modification_info'
-module Ms ; end
-module Ms::Ident ; end
-class Ms::Ident::Pepxml ; end
-module Ms::Ident::Pepxml::Modifications
-  # Handles modifications for sequest style searches
-  class Sequest
-    include Ms::Ident::Pepxml::Modifications
-    # a hash of all differential modifications present by aa_one_letter_symbol
-    # and special_symbol. This is NOT the mass difference but the total mass {
-    # 'M*' => 155.5, 'S@' => 190.3 }.  NOTE: Since the termini are dependent on
-    # the amino acid sequence, they are give the *differential* mass.  The
-    # termini are given the special symbol as in sequest e.g. '[' => 12.22, #
-    # cterminus    ']' => 14.55 # nterminus
-    attr_accessor :masses_by_diff_mod_hash
-    # a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
-    # values are the special_symbols
-    attr_accessor :mod_symbols_hash
-    # sequest params object
-    attr_accessor :params
-    # The modification symbols string looks like this:
-    # (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
-    # ct is cterminal peptide (differential)
-    # nt is nterminal peptide (differential)
-    # the C is just cysteine
-    # will set_modifications and masses_by_diff_mod hash
-    def initialize(params=nil, modification_symbols_string='')
-      @params = params
-      if @params
-        set_modifications(params, modification_symbols_string)
-      end
-    end
-    # set the masses_by_diff_mod and mod_symbols_hash from
-    def set_hashes(modification_symbols_string)
-      @mod_symbols_hash = {}
-      @masses_by_diff_mod = {}
-      if (modification_symbols_string == nil || modification_symbols_string == '')
-        return nil
-      end
-      table = @params.mass_table
-      modification_symbols_string.split(/\)\s+\(/).each do |mod|
-        if md = mod.match(/\(?(\w+)(.) (.[\d\.]+)\)?/)
-          if md[1] == 'ct' || md[1] == 'nt'
-            mass_diff = md[3].to_f
-            @masses_by_diff_mod[md[2]] = mass_diff
-            @mod_symbols_hash[[md[1].to_sym, mass_diff]] = md[2].dup
-          else
-            symbol_string = md[2].dup
-            mass_diff = md[3].to_f
-            md[1].split('').each do |aa|
-              aa_as_sym = aa.to_sym
-              @masses_by_diff_mod[aa+symbol_string] = mass_diff + table[aa_as_sym]
-              @mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
-            end
-          end
-        end
-      end
-    end
-    # given a bare peptide (no end pieces) returns a ModificationInfo object
-    # e.g. given "]PEPT*IDE", NOT 'K.PEPTIDE.R'
-    # if there are no modifications, returns nil
-    def modification_info(peptide)
-      if @masses_by_diff_mod.size == 0
-        return nil
-      end
-      hash = {}
-      hash[:modified_peptide] = peptide.dup
-      hsh = @masses_by_diff_mod
-      table = @params.mass_table
-      h = table[:h]  # this? or h_plus ??
-      oh = table[:o] + h
-      ## only the termini can match a single char
-      if hsh.key? peptide[0,1]
-        # AA + H + differential_mod
-        hash[:mod_nterm_mass] = table[peptide[1,1].to_sym] + h + hsh[peptide[0,1]]
-        peptide = peptide[1...(peptide.size)]
-      end
-      if hsh.key? peptide[(peptide.size-1),1]
-        # AA + OH + differential_mod
-        hash[:mod_cterm_mass] = table[peptide[(peptide.size-2),1].to_sym] + oh + hsh[peptide[-1,1]]
-        peptide.slice!( 0..-2 )
-        peptide = peptide[0...(peptide.size-1)]
-      end
-      mod_array = []
-      (0...peptide.size).each do |i|
-        if hsh.key? peptide[i,2]
-          mod_array << Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new([ i+1 , hsh[peptide[i,2]] ])
-        end
-      end
-      if mod_array.size > 0
-        hash[:mod_aminoacid_masses] = mod_array
-      end
-      if hash.size > 1  # if there is more than just the modified peptide there
-        Ms::Ident::Pepxml::SearchHit::ModificationInfo.new(hash)
-        #Ms::Ident::Pepxml::SearchHit::ModificationInfo.new(hash.values_at(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
-      else
-        nil
-      end
-    end
-    # returns an array of static mod objects and static terminal mod objects
-    def create_static_mods(params)
-      ####################################
-      ## static mods
-      ####################################
-      static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
-      static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
-      params.mods.each do |k,v|
-        v_to_f = v.to_f
-        if v_to_f != 0.0
-          if k =~ /add_(\w)_/
-            static_mods << [$1.to_sym, v_to_f]
-          else
-            static_terminal_mods << [k, v_to_f]
-          end
-        end
-      end
-      aa_hash = params.mass_table
-      ## Create the static_mods objects
-      static_mods.map! do |mod|
-        hash = {
-          :aminoacid => mod[0].to_s,
-          :massdiff => mod[1],
-          :mass => aa_hash[mod[0]] + mod[1],
-          :variable => 'N',
-          :binary => 'Y',
-        }
-        Ms::Ident::Pepxml::AminoacidModification.new(hash)
-      end
-      ## Create the static_terminal_mods objects
-      static_terminal_mods.map! do |mod|
-        terminus = if mod[0] =~ /Cterm/ ; 'c'
-                   else                 ; 'n' # only two possible termini
-                   end
-        protein_terminus = case mod[0]
-                           when /Nterm_protein/ ; 'n'
-                           when /Cterm_protein/ ; 'c'
-                           else nil
-                           end
-        # create the hash
-        hash = {
-          :terminus => terminus,
-          :massdiff => mod[1],
-          :variable => 'N',
-          :description => mod[0],
-        }
-        hash[:protein_terminus] = protein_terminus if protein_terminus
-        Ms::Ident::Pepxml::TerminalModification.new(hash)
-      end
-      [static_mods, static_terminal_mods]
-    end
-    # 1. sets aminoacid_modifications and terminal_modifications from a sequest params object
-    # 2. sets @params
-    # 3. sets @masses_by_diff_mod
-    def set_modifications(params, modification_symbols_string)
-      @params = params
-      set_hashes(modification_symbols_string)
-      (static_mods, static_terminal_mods) = create_static_mods(params)
-      aa_hash = params.mass_table
-      #################################
-      # Variable Mods:
-      #################################
-      arr = params.diff_search_options.rstrip.split(/\s+/)
-      # [aa.to_sym, diff.to_f]
-      variable_mods = []
-      (0...arr.size).step(2) do |i|
-        if arr[i].to_f != 0.0
-          variable_mods << [arr[i+1], arr[i].to_f]
-        end
-      end
-      mod_objects = []
-      variable_mods.each do |mod|
-        mod[0].split('').each do |aa|
-          hash = {
-            :aminoacid => aa,
-            :massdiff => mod[1],
-            :mass => aa_hash[aa.to_sym] + mod[1],
-            :variable => 'Y',
-            :binary => 'N',
-            :symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
-          }
-          mod_objects << Ms::Ident::Pepxml::AminoacidModification.new(hash)
-        end
-      end
-      variable_mods = mod_objects
-      #################################
-      # TERMINAL Variable Mods:
-      #################################
-      # These are always peptide, not protein termini (for sequest)
-      (nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
-      to_add = []
-      if nterm_diff != 0.0
-        to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
-      end
-      if cterm_diff != 0.0
-        to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
-      end
-      variable_terminal_mods = to_add.map do |term, mssdiff, symb|
-        hash = {
-          :terminus => term,
-          :massdiff => mssdiff,
-          :variable => 'Y',
-          :symbol => symb,
-        }
-        Ms::Ident::Pepxml::TerminalModification.new(hash)
-      end
-      #########################
-      # COLLECT THEM
-      #########################
-      @aminoacid_modifications = static_mods + variable_mods
-      @terminal_modifications = static_terminal_mods + variable_terminal_mods
-    end
-  end