RubyGems - mspire - Versions diffs - 0.5.0 → 0.6.1 - Mend

mspire 0.5.0 → 0.6.1

Files changed (107) hide show

data/README.rdoc +24 -0
data/Rakefile +51 -0
data/VERSION +1 -0
data/lib/cv/description.rb +18 -0
data/lib/cv/param.rb +33 -0
data/lib/cv.rb +3 -0
data/lib/io/bookmark.rb +13 -0
data/lib/merge.rb +7 -0
data/lib/ms/cvlist.rb +76 -0
data/lib/ms/digester.rb +245 -0
data/lib/ms/fasta.rb +86 -0
data/lib/ms/ident/peptide/db.rb +243 -0
data/lib/ms/ident/peptide.rb +72 -0
data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
data/lib/ms/ident/peptide_hit.rb +26 -0
data/lib/ms/ident/pepxml/modifications.rb +83 -0
data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
data/lib/ms/ident/pepxml/parameters.rb +14 -0
data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
data/lib/ms/ident/pepxml/search_database.rb +49 -0
data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
data/lib/ms/ident/pepxml/search_hit.rb +144 -0
data/lib/ms/ident/pepxml/search_result.rb +35 -0
data/lib/ms/ident/pepxml/search_summary.rb +92 -0
data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
data/lib/ms/ident/pepxml.rb +112 -0
data/lib/ms/ident/protein.rb +33 -0
data/lib/ms/ident/protein_group.rb +80 -0
data/lib/ms/ident/search.rb +114 -0
data/lib/ms/ident.rb +37 -0
data/lib/ms/isotope/aa.rb +59 -0
data/lib/ms/mascot.rb +6 -0
data/lib/ms/mass/aa.rb +79 -0
data/lib/ms/mass.rb +55 -0
data/lib/ms/mzml/index_list.rb +98 -0
data/lib/ms/mzml/plms1.rb +34 -0
data/lib/ms/mzml.rb +197 -0
data/lib/ms/obo.rb +38 -0
data/lib/ms/plms1.rb +156 -0
data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
data/lib/ms/quant/qspec.rb +112 -0
data/lib/ms/spectrum.rb +154 -8
data/lib/ms.rb +3 -10
data/lib/msplat.rb +2 -0
data/lib/obo/ims.rb +5 -0
data/lib/obo/ms.rb +7 -0
data/lib/obo/ontology.rb +41 -0
data/lib/obo/unit.rb +5 -0
data/lib/openany.rb +23 -0
data/lib/write_file_or_string.rb +18 -0
data/obo/ims.obo +562 -0
data/obo/ms.obo +11677 -0
data/obo/unit.obo +2563 -0
data/spec/ms/cvlist_spec.rb +60 -0
data/spec/ms/digester_spec.rb +351 -0
data/spec/ms/fasta_spec.rb +100 -0
data/spec/ms/ident/peptide/db_spec.rb +108 -0
data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
data/spec/ms/ident/pepxml_spec.rb +442 -0
data/spec/ms/ident/protein_group_spec.rb +68 -0
data/spec/ms/mass_spec.rb +8 -0
data/spec/ms/mzml/index_list_spec.rb +122 -0
data/spec/ms/mzml/plms1_spec.rb +62 -0
data/spec/ms/mzml_spec.rb +50 -0
data/spec/ms/plms1_spec.rb +38 -0
data/spec/ms/quant/qspec_spec.rb +25 -0
data/spec/msplat_spec.rb +24 -0
data/spec/obo_spec.rb +25 -0
data/spec/spec_helper.rb +25 -0
data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
data/spec/testfiles/plms1/output.key +0 -0
metadata +157 -40
data/README +0 -77
data/changelog.txt +0 -196
data/lib/ms/calc.rb +0 -32
data/lib/ms/data/interleaved.rb +0 -60
data/lib/ms/data/lazy_io.rb +0 -73
data/lib/ms/data/lazy_string.rb +0 -15
data/lib/ms/data/simple.rb +0 -59
data/lib/ms/data/transposed.rb +0 -41
data/lib/ms/data.rb +0 -57
data/lib/ms/format/format_error.rb +0 -12
data/lib/ms/support/binary_search.rb +0 -126

data/lib/ms/ident/peptide/db.rb ADDED Viewed

@@ -0,0 +1,243 @@
+require 'ms/digester'
+require 'ms/fasta'
+require 'optparse'
+module MS ; end
+module MS::Ident ; end
+module MS::Ident::Peptide ; end
+# the object itself is a modified Hash.
+# It is initialized with the database file and a protein array can be
+# retrieved with the #[] method given an amino acid sequence.  All other
+# methods are untested at this time and should be avoided!
+class MS::Ident::Peptide::Db < Hash
+  MAX_NUM_AA_EXPANSION = 3
+  # the twenty standard amino acids
+  STANDARD_AA = %w(A C D E F G H I K L M N P Q R S T V W Y)
+  DEFAULT_PEPTIDE_CENTRIC_DB = {:missed_cleavages => 2, :min_length => 4, :enzyme => MS::Digester[:trypsin], :id_regexp => nil, :remove_digestion_file => true, :cleave_initiator_methionine => true, :expand_aa => {'X' => STANDARD_AA}}
+  PROTEIN_DELIMITER = "\t"
+  KEY_VALUE_DELIMITER = ": "
+  def self.cmdline(argv)
+    opt = {
+      :remove_digestion_file => true,
+      :enzyme => MS::Digester[:trypsin]
+    }
+    opts = OptionParser.new do |op|
+      op.banner = "usage: #{File.basename($0)} <file>.fasta ..."
+      op.separator "output: "
+      op.separator "    <file>.msd_clvg<missed_cleavages>.min_aaseq<min_length>.yml"
+      op.separator "format:"
+      op.separator "    PEPTIDE: ID1<tab>ID2<tab>ID3..."
+      op.separator ""
+      op.separator "    Initiator Methionines - by default, will generate two peptides"
+      op.separator "    for any peptide found at the N-termini starting with 'M'"
+      op.separator "    (i.e., one with and one without the leading methionine)"
+      op.separator ""
+      op.on("--missed-cleavages <#{opt[:missed_cleavages]}>", Integer, "max num of missed cleavages") {|v| opt[:missed_cleavages] = v }
+      op.on("--min-length <#{opt[:min_length]}>", Integer, "the minimum peptide aaseq length") {|v| opt[:min_length] = v }
+      op.on("--no-cleaved-methionine", "does not cleave off initiator methionine") { opt[:cleave_initiator_methionine] = false }
+      op.on("--no-expand-x", "don't enumerate aa 'X' possibilities") { opt[:expand_aa] = nil }
+      op.on("-e", "--enzyme <name>", "enzyme for digestion") {|v| opt[:enzyme] = MS::Insilico::Digester.const_get(v.upcase) }
+      op.on("--list-enzymes", "lists approved enzymes and exits") do
+        puts MS::Digester::ENZYMES.keys.join("\n")
+        exit
+      end
+    end
+    opts.parse!(argv)
+    if argv.size == 0
+      puts opts || exit
+    end
+    argv.map do |file|
+      MS::Ident::Peptide::Db.peptide_centric_db(file, opt)
+    end
+  end
+  # writes a new file with the added 'min_aaseq<Integer>'
+  # creates a temporary digestion file that contains all peptides digesting
+  # with certain missed_cleavages (i.e., min_seq_length is not applied to
+  # this file but on the final peptide centric db)
+  # returns the full name of the written file.
+  def self.peptide_centric_db(fasta_file, opts={})
+    opts = DEFAULT_PEPTIDE_CENTRIC_DB.merge(opts)
+    (missed_cleavages, min_length, enzyme, id_regexp, remove_digestion_file, cleave_initiator_methionine, expand_aa) = opts.values_at(:missed_cleavages, :min_length, :enzyme, :id_regexp, :remove_digestion_file, :cleave_initiator_methionine, :expand_aa)
+    start_time = Time.now
+    print "Digesting #{fasta_file} ..." if $VERBOSE
+    if expand_aa
+      letters_to_expand_re = Regexp.new("[" << Regexp.escape(expand_aa.keys.join) << "]")
+    end
+    base = fasta_file.chomp(File.extname(fasta_file))
+    digestion_file = base + ".msd_clvg#{missed_cleavages}.peptides"
+    File.open(digestion_file, "w") do |fh|
+      MS::Fasta.open(fasta_file) do |fasta|
+        fasta.each do |prot|
+          peptides = enzyme.digest(prot.sequence, missed_cleavages)
+          if (cleave_initiator_methionine && (prot.sequence[0,1] == "M"))
+            m_peps = []
+            init_methionine_peps = []
+            peptides.each do |pep|
+              # if the peptide is at the beginning of the protein sequence
+              if prot.sequence[0,pep.size] == pep
+                m_peps << pep[1..-1]
+              end
+            end
+            peptides.push(*m_peps)
+          end
+          if expand_aa
+            peptides = peptides.map do |pep|
+              if pep =~ letters_to_expand_re
+                expand_peptides(pep, expand_aa)
+              else
+                pep
+              end
+            end.flatten
+          end
+          fh.puts( prot.header.split(/\s+/).first + "\t" + peptides.join(" ") )
+        end
+      end
+    end
+    puts "#{Time.now - start_time} sec" if $VERBOSE
+    start_time = Time.now
+    print "Organizing raw digestion #{digestion_file} ..." if $VERBOSE
+    hash = Hash.new {|h,k| h[k] = [] }
+    ::IO.foreach(digestion_file) do |line|
+      (prot, *peps) = line.chomp!.split(/\s+/)
+      # prot is something like this: "sp|P31946|1433B_HUMAN" in uniprot
+      peps.each do |pep|
+        if pep.size >= min_length
+          hash[pep] << prot
+        end
+      end
+    end
+    puts "#{Time.now - start_time} sec" if $VERBOSE
+    base = digestion_file.chomp(File.extname(digestion_file))
+    final_outfile = base + ".min_aaseq#{min_length}" + ".yml"
+    start_time = Time.now
+    print "Writing #{hash.size} peptides to #{} ..." if $VERBOSE
+    File.open(final_outfile, 'w') do |out|
+      hash.each do |k,v|
+        out.puts( [k, v.join(PROTEIN_DELIMITER)].join(KEY_VALUE_DELIMITER) )
+      end
+    end
+    puts "#{Time.now - start_time} sec" if $VERBOSE
+    if remove_digestion_file
+      File.unlink(digestion_file)
+    end
+    File.expand_path(final_outfile)
+  end
+  # does combinatorial expansion of all letters requesting it.
+  # expand_aa is hash like: {'X'=>STANDARD_AA}
+  # returns nil if there are more than MAX_NUM_AA_EXPANSION amino acids to
+  # be expanded
+  # returns an empty array if there is no expansion
+  def self.expand_peptides(peptide, expand_aa)
+    letters_in_order = expand_aa.keys.sort
+    index_and_key = []
+    peptide.split('').each_with_index do |char,i|
+      if let_index = letters_in_order.index(char)
+        index_and_key << [i, letters_in_order[let_index]]
+      end
+    end
+    if index_and_key.size > MAX_NUM_AA_EXPANSION
+      return nil
+    end
+    to_expand = [peptide]
+    index_and_key.each do |i,letter|
+      new_peps = []
+      while current_pep = to_expand.shift do
+        new_peps << expand_aa[letter].map {|v| dp = current_pep.dup ; dp[i] = v ; dp }
+      end
+      to_expand = new_peps.flatten
+    end
+    to_expand
+  end
+  def initialize(db_file)
+    self.replace(YAML.load_file(db_file))
+  end
+  alias_method :old_bracket, '[]'.to_sym
+  # returns the protein id's as an array
+  def [](key)
+    old_bracket(key).chomp.split(PROTEIN_DELIMITER)
+  end
+  # an object for on disk retrieval of db entries
+  # proteins are returned as an array.
+  # behaves much like a hash once it is opened.
+  class IO
+    include Enumerable
+    def self.open(filename, &block)
+      raise ArgumentError unless block
+      File.open(filename) do |io|
+        block.call(self.new(io))
+      end
+    end
+    attr_accessor :io
+    attr_accessor :index
+    def initialize(io)
+      @io = io
+      @index = {}
+      re = /^(\w+)#{Regexp.escape(KEY_VALUE_DELIMITER)}/
+        prev_io_pos = io.pos
+      triplets = io.each_line.map do |line|
+        key = re.match(line)[1]
+        [key, prev_io_pos + key.bytesize+KEY_VALUE_DELIMITER.bytesize, prev_io_pos=io.pos]
+      end
+      triplets.each do |key, start, end_pos|
+        @index[key] = [start, end_pos-start]
+      end
+    end
+    # returns an array of proteins for the given key (peptide aaseq)
+    def [](key)
+      (start, length) = @index[key]
+      return nil unless start
+      @io.seek(start)
+      string = @io.read(length)
+      string.chomp!
+      string.split("\t")
+    end
+    # number of entries
+    def size ; @index.size end
+    alias_method :length, :size
+    def keys
+      @index.keys
+    end
+    # all the protein lists
+    def values
+      keys.map {|key| self[key] }
+    end
+    # yields a pair of aaseq and protein array
+    def each(&block)
+      @index.each do |key, start_length|
+        block.call([key, self[key]])
+      end
+    end
+  end
+end

data/lib/ms/ident/peptide.rb ADDED Viewed

@@ -0,0 +1,72 @@
+module MS ; end
+module MS::Ident ; end
+# A 'sequence' is a notation of a peptide that includes the leading and
+# trailing amino acid after cleavage (e.g., K.PEPTIDER.E or -.STARTK.L )
+# and may contain post-translational modification information.
+#
+# 'aaseq' is the amino acid sequence of just the peptide with no leading or
+# trailing notation (e.g., PEPTIDER or LAKKLY)
+module MS::Ident::Peptide
+  Nonstandard_AA_re = /[^A-Z\.\-]/
+  class << self
+    # Takes a peptide sequence of the form '-.PEPTIDE.R', removes non-standard
+    # amino acids, and returns the center piece
+    def sequence_to_aaseq(sequence)
+      after_removed = remove_non_amino_acids(sequence)
+      pieces = after_removed.split('.')
+      case pieces.size
+      when 3
+        pieces[1]
+      when 2
+        if pieces[0].size > 1  ## N termini
+          pieces[0]
+        else  ## C termini
+          pieces[1]
+        end
+      when 1  ## this must be a parse error!
+        pieces[0] ## which is the peptide itself
+      else
+        abort "bad peptide sequence: #{sequence.inspect}"
+      end
+    end
+    # removes non standard amino acids specified by Nonstandard_AA_re
+    def remove_non_amino_acids(sequence)
+      sequence.gsub(Nonstandard_AA_re, '')
+    end
+    # remove non amino acids and split the sequence
+    def prepare_sequence(sequence)
+      nv = remove_non_amino_acids(sequence)
+      split_sequence(nv)
+    end
+    # Returns prev, peptide, next from sequence.  Parse errors return
+    # nil,nil,nil
+    #   R.PEPTIDE.A  # -> R, PEPTIDE, A
+    #   R.PEPTIDE.-  # -> R, PEPTIDE, -
+    #   PEPTIDE.A    # -> -, PEPTIDE, A
+    #   A.PEPTIDE    # -> A, PEPTIDE, -
+    #   PEPTIDE      # -> nil,nil,nil
+    def split_sequence(sequence)
+      pieces = sequence.split('.')
+      case pieces.size
+      when 3
+        pieces
+      when 2
+        if pieces[0].size > 1  ## N termini
+          ['-', pieces[0], pieces[1]]
+        else  ## C termini
+          [pieces[0], pieces[1], '-']
+        end
+      when 1  ## this must be a parse error!
+        [nil,nil,nil]
+      when 0
+        [nil,nil,nil]
+      end
+    end
+  end
+end

data/lib/ms/ident/peptide_hit/qvalue.rb ADDED Viewed

@@ -0,0 +1,56 @@
+require 'ms/ident/search'
+require 'ms/ident/peptide_hit'
+module MS ; end
+module MS::Ident ; end
+class MS::Ident::PeptideHit
+  module Qvalue
+    FILE_EXTENSION = '.phq.tsv'
+    FILE_DELIMITER = "\t"
+    HEADER = %w(run_id id aaseq charge qvalue)
+    class << self
+      # writes to the file, adding an extension
+      def to_phq(base, hits, qvalues=[])
+        to_file(base + FILE_EXTENSION, hits, qvalues)
+      end
+      # writes the peptide hits to a phq.tsv file. qvalues is a parallel array
+      # to hits that can provide qvalues if not inherent to the hits
+      # returns the filename.
+      def to_file(filename, hits, qvalues=[])
+        File.open(filename,'w') do |out|
+          out.puts HEADER.join(FILE_DELIMITER)
+          hits.zip(qvalues) do |hit, qvalue|
+            out.puts [hit.search.id, hit.id, hit.aaseq, hit.charge, qvalue || hit.qvalue].join(FILE_DELIMITER)
+          end
+        end
+        filename
+      end
+      # returns an array of PeptideHit objects from a phq.tsv
+      def from_file(filename)
+        searches = Hash.new {|h,id|  h[id] = MS::Ident::Search.new(id) }
+        peptide_hits = []
+        File.open(filename) do |io|
+          header = io.readline.chomp.split(FILE_DELIMITER)
+          raise "bad headers" unless header == HEADER
+          io.each do |line|
+            line.chomp!
+            (run_id, id, aaseq, charge, qvalue) = line.split(FILE_DELIMITER)
+            ph = MS::Ident::PeptideHit.new
+            ph.search = searches[run_id]
+            ph.id = id; ph.aaseq = aaseq ; ph.charge = charge.to_i ; ph.qvalue = qvalue.to_f
+            peptide_hits << ph
+          end
+        end
+        peptide_hits
+      end
+      alias_method :from_phq, :from_file
+    end
+  end # Qvalue
+end # Peptide Hit

data/lib/ms/ident/peptide_hit.rb ADDED Viewed

@@ -0,0 +1,26 @@
+require 'merge'
+module MS ; end
+module MS::Ident ; end
+module MS::Ident::PeptideHitLike
+  attr_accessor :id
+  attr_accessor :search
+  attr_accessor :missed_cleavages
+  attr_accessor :aaseq
+  attr_accessor :charge
+  # an array of MS::Ident::ProteinLike objects
+  attr_accessor :proteins
+  # relative to the set the hit is contained in!
+  attr_accessor :qvalue
+end
+class MS::Ident::PeptideHit
+  include MS::Ident::PeptideHitLike
+  include Merge
+  def initialize(hash)
+    merge!(hash)
+  end
+end

data/lib/ms/ident/pepxml/modifications.rb ADDED Viewed

@@ -0,0 +1,83 @@
+require 'merge'
+require 'nokogiri'
+module MS ; end
+module MS::Ident ; end
+class MS::Ident::Pepxml ; end
+# Modified aminoacid, static or variable
+# unless otherwise stated, all attributes can be anything
+class MS::Ident::Pepxml::AminoacidModification
+  include Merge
+  # The amino acid (one letter code)
+  attr_accessor :aminoacid
+  # Mass difference with respect to unmodified aminoacid, as a Float
+  attr_accessor :massdiff
+  # Mass of modified aminoacid, Float
+  attr_accessor :mass
+  # Y if both modified and unmodified aminoacid could be present in the
+  # dataset, N if only modified aminoacid can be present
+  attr_accessor :variable
+  # whether modification can reside only at protein terminus (specified 'n',
+  # 'c', or 'nc')
+  attr_accessor :peptide_terminus
+  # Symbol used by search engine to designate this modification
+  attr_accessor :symbol
+  # 'Y' if each peptide must have only modified or unmodified aminoacid, 'N' if a
+  # peptide may contain both modified and unmodified aminoacid
+  attr_accessor :binary
+  def initialize(hash={})
+    merge!(hash)
+  end
+  # returns the builder or an xml string if no builder supplied
+  def to_xml(builder=nil)
+    xmlb = builder || Nokogiri::XML::Builder.new
+    # note massdiff: must begin with either + (nonnegative) or - [e.g.
+    # +1.05446 or -2.3342] consider Numeric#to_plus_minus_string in
+    # MS::Ident::Pepxml
+    attrs = [:aminoacid, :massdiff, :mass, :variable, :peptide_terminus, :symbol, :binary].map {|at| v=send(at) ; [at,v] if v }.compact
+    hash = Hash[attrs]
+    hash[:massdiff] = hash[:massdiff].to_plus_minus_string
+    xmlb.aminoacid_modification(hash)
+    builder || xmlb.doc.root.to_xml
+  end
+end
+# Modified aminoacid, static or variable
+class MS::Ident::Pepxml::TerminalModification
+  include Merge
+  # n for N-terminus, c for C-terminus
+  attr_accessor :terminus
+  # Mass difference with respect to unmodified terminus
+  attr_accessor :massdiff
+  # Mass of modified terminus
+  attr_accessor :mass
+  # Y if both modified and unmodified terminus could be present in the
+  # dataset, N if only modified terminus can be present
+  attr_accessor :variable
+  # MSial symbol used by search engine to designate this modification
+  attr_accessor :symbol
+  # whether modification can reside only at protein terminus (specified n or
+  # c)
+  attr_accessor :protein_terminus
+  attr_accessor :description
+  def initialize(hash={})
+    hash.each {|k,v| send("#{k}=", v) }
+  end
+  # returns the builder or an xml string if no builder supplied
+  def to_xml(builder=nil)
+    xmlb = builder || Nokogiri::XML::Builder.new
+    #short_element_xml_from_instance_vars("terminal_modification")
+   attrs = [:terminus, :massdiff, :mass, :variable, :protein_terminus, :description].map {|at| v=send(at) ; [at,v] if v }
+   hash = Hash[attrs]
+    hash[:massdiff] = hash[:massdiff].to_plus_minus_string
+    xmlb.terminal_modification(hash)
+    builder || xmlb.doc.root.to_xml
+  end
+end

data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb ADDED Viewed

@@ -0,0 +1,70 @@
+require 'merge'
+require 'ms/ident/pepxml/msms_run_summary'
+module MS ; end
+module MS::Ident ; end
+class MS::Ident::Pepxml; end
+class MS::Ident::Pepxml::MsmsPipelineAnalysis
+  include Merge
+  XMLNS = "http://regis-web.systemsbiology.net/pepXML"
+  XMLNS_XSI = "http://www.w3.org/2001/XMLSchema-instance"
+  # (this doesn't actually exist), also, the space is supposed to be there
+  XSI_SCHEMA_LOCATION_BASE = "http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v"
+  # the only additions concerning a writer are from v18 are to the 'spectrum': retention_time_sec and activationMethodType
+  PEPXML_VERSION = 115
+  #include SpecIDXML
+  # Version 1.2.3
+  #attr_writer :date
+  #attr_writer :xmlns, :xmlns_xsi, :xsi_schemaLocation
+  #attr_accessor :summary_xml
+  attr_accessor :xmlns
+  attr_accessor :xmlns_xsi
+  attr_accessor :xsi_schema_location
+  # an Integer
+  attr_accessor :pepxml_version
+  # self referential path to the outputfile
+  attr_accessor :summary_xml
+  attr_accessor :msms_run_summary
+  attr_writer :date
+  def block_arg
+    @msms_run_summary = MS::Ident::Pepxml::MsmsRunSummary.new
+  end
+  # if block given, yields a new msms_run_summary to return value of block
+  def initialize(hash={}, &block)
+    @xmlns = XMLNS
+    @xmlns_xsi = XMLNS_XSI
+    @xsi_schema_location = xsi_schema_location
+    @pepxml_version = PEPXML_VERSION
+    merge!(hash, &block)
+  end
+  # returns the location based on the pepxml version number
+  def xsi_schema_location
+    XSI_SCHEMA_LOCATION_BASE + pepxml_version.to_s + '.xsd'
+  end
+  # if no date string given, then it will set to Time.now
+  def date
+    return @date if @date
+    tarr = Time.now.to_a
+    tarr[3..5].reverse.join('-') + "T#{tarr[0..2].reverse.join(':')}"
+  end
+  # uses the filename as summary_xml (if it is nil) attribute and builds a complete, valid xml document,
+  # writing it to the filename
+  def to_xml(builder)
+    xmlb = builder || Nokogiri::XML::Builder.new
+    xmlb.msms_pipeline_analysis(:date => date, :xmlns => xmlns, 'xsi:schemaLocation'.to_sym => xsi_schema_location, :summary_xml => summary_xml) do |xmlb|
+      msms_run_summary.to_xml(xmlb) if msms_run_summary
+    end
+    builder || xmlb.doc.root.to_xml
+  end
+end

data/lib/ms/ident/pepxml/msms_run_summary.rb ADDED Viewed

@@ -0,0 +1,82 @@
+require 'merge'
+require 'nokogiri'
+require 'ms/ident/pepxml/sample_enzyme'
+require 'ms/ident/pepxml/search_summary'
+require 'ms/ident/pepxml/spectrum_query'
+module MS ; end
+module MS::Ident ; end
+class MS::Ident::Pepxml; end
+class MS::Ident::Pepxml::MsmsRunSummary
+  include Merge
+  # The name of the pep xml file without any extension
+  attr_accessor :base_name
+  # The name of the mass spec manufacturer
+  attr_accessor :ms_manufacturer
+  attr_accessor :ms_model
+  attr_accessor :ms_mass_analyzer
+  attr_accessor :ms_detector
+  attr_accessor :raw_data_type
+  attr_accessor :raw_data
+  attr_accessor :ms_ionization
+  attr_accessor :pepxml_version
+  # A SampleEnzyme object (responds to: name, cut, no_cut, sense)
+  attr_accessor :sample_enzyme
+  # A SearchSummary object
+  attr_accessor :search_summary
+  # An array of spectrum_queries
+  attr_accessor :spectrum_queries
+  def block_arg
+    [@sample_enzyme = MS::Ident::Pepxml::SampleEnzyme.new,
+      @search_summary = MS::Ident::Pepxml::SearchSummary.new,
+      @spectrum_queries ]
+  end
+  # takes a hash of name, value pairs
+  # if block given, yields a SampleEnzyme object, a SearchSummary and an array
+  # for SpectrumQueries
+  def initialize(hash={}, &block)
+    @spectrum_queries = []
+    merge!(hash, &block)
+    block.call(block_arg) if block
+  end
+  # optionally takes an xml builder object and returns the builder, or the xml
+  # string if no builder was given
+  # sets the index attribute of each spectrum query if it is not already set
+  def to_xml(builder=nil)
+    xmlb = builder || Nokogiri::XML::Builder.new
+    hash = {:base_name => base_name, :msManufacturer => ms_manufacturer, :msModel => ms_model, :msIonization => ms_ionization, :msMassAnalyzer => ms_mass_analyzer, :msDetector => ms_detector, :raw_data_type => raw_data_type, :raw_data => raw_data}
+    hash.each {|k,v| hash.delete(k) unless v }
+    xmlb.msms_run_summary(hash) do |xmlb|
+      sample_enzyme.to_xml(xmlb) if sample_enzyme
+      search_summary.to_xml(xmlb) if search_summary
+      spectrum_queries.each_with_index do |sq,i|
+        sq.index = i+1 unless sq.index
+        sq.to_xml(xmlb)
+      end
+    end
+    builder || xmlb.doc.root.to_xml
+  end
+  def self.from_pepxml_node(node)
+    self.new.from_pepxml_node(node)
+  end
+  # peps correspond to search_results
+  def from_pepxml_node(node)
+    @base_name = node['base_name']
+    @ms_manufacturer = node['msManufacturer']
+    @ms_model = node['msModel']
+    @ms_manufacturer = node['msIonization']
+    @ms_mass_analyzer = node['msMassAnalyzer']
+    @ms_detector = node['msDetector']
+    @raw_data_type = node['raw_data_type']
+    @raw_data = node['raw_data']
+    self
+  end
+end

data/lib/ms/ident/pepxml/parameters.rb ADDED Viewed

@@ -0,0 +1,14 @@
+module MS
+  module Ident
+    class Pepxml
+      class Parameters < Hash
+        def to_xml(builder)
+          self.each do |k,v|
+            builder.parameter(:name => k, :value => v)
+          end
+        end
+      end
+    end
+  end
+end