ms-ident 0.0.3 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.3
1
+ 0.0.17
@@ -0,0 +1,75 @@
1
+
2
+ module Ms ; end
3
+ module Ms::Ident ; end
4
+
5
+ # A 'sequence' is a notation of a peptide that includes the leading and
6
+ # trailing amino acid after cleavage (e.g., K.PEPTIDER.E or -.STARTK.L )
7
+ # and may contain post-translational modification information.
8
+ #
9
+ # 'aaseq' is the amino acid sequence of just the peptide with no leading or
10
+ # trailing notation (e.g., PEPTIDER or LAKKLY)
11
+ module Ms::Ident::Peptide
12
+ Nonstandard_AA_re = /[^A-Z\.\-]/
13
+
14
+ class << self
15
+
16
+ # Takes a peptide sequence of the form '-.PEPTIDE.R', removes non-standard
17
+ # amino acids, and returns the center piece
18
+ def sequence_to_aaseq(sequence)
19
+ after_removed = remove_non_amino_acids(sequence)
20
+ pieces = after_removed.split('.')
21
+ case pieces.size
22
+ when 3
23
+ pieces[1]
24
+ when 2
25
+ if pieces[0].size > 1 ## N termini
26
+ pieces[0]
27
+ else ## C termini
28
+ pieces[1]
29
+ end
30
+ when 1 ## this must be a parse error!
31
+ pieces[0] ## which is the peptide itself
32
+ else
33
+ abort "bad peptide sequence: #{sequence.inspect}"
34
+ end
35
+ end
36
+
37
+ # removes non standard amino acids specified by Nonstandard_AA_re
38
+ def remove_non_amino_acids(sequence)
39
+ sequence.gsub(Nonstandard_AA_re, '')
40
+ end
41
+
42
+ # remove non amino acids and split the sequence
43
+ def prepare_sequence(sequence)
44
+ nv = remove_non_amino_acids(sequence)
45
+ split_sequence(nv)
46
+ end
47
+
48
+ # Returns prev, peptide, next from sequence. Parse errors return
49
+ # nil,nil,nil
50
+ # R.PEPTIDE.A # -> R, PEPTIDE, A
51
+ # R.PEPTIDE.- # -> R, PEPTIDE, -
52
+ # PEPTIDE.A # -> -, PEPTIDE, A
53
+ # A.PEPTIDE # -> A, PEPTIDE, -
54
+ # PEPTIDE # -> nil,nil,nil
55
+ def split_sequence(sequence)
56
+ pieces = sequence.split('.')
57
+ case pieces.size
58
+ when 3
59
+ pieces
60
+ when 2
61
+ if pieces[0].size > 1 ## N termini
62
+ ['-', pieces[0], pieces[1]]
63
+ else ## C termini
64
+ [pieces[0], pieces[1], '-']
65
+ end
66
+ when 1 ## this must be a parse error!
67
+ [nil,nil,nil]
68
+ when 0
69
+ [nil,nil,nil]
70
+ end
71
+ end
72
+ end
73
+
74
+
75
+ end
@@ -5,17 +5,6 @@ module Ms ; end
5
5
  module Ms::Ident ; end
6
6
  class Ms::Ident::Pepxml ; end
7
7
 
8
- # holds a list of AminoacidModification and TerminalModification objects.
9
- class Ms::Ident::Pepxml::Modifications < Array
10
- ## Generates the pepxml for static and differential amino acid mods based on
11
- ## sequest object
12
- def to_xml(builder=nil)
13
- xmlb = builder || Nokogiri::XML::Builder.new
14
- self.each {|mod| mod.to_xml(xmlb) }
15
- builder || xmlb.doc.root.to_xml
16
- end
17
- end
18
-
19
8
  # Modified aminoacid, static or variable
20
9
  # unless otherwise stated, all attributes can be anything
21
10
  class Ms::Ident::Pepxml::AminoacidModification
@@ -3,6 +3,7 @@ require 'nokogiri'
3
3
 
4
4
  require 'ms/ident/pepxml/sample_enzyme'
5
5
  require 'ms/ident/pepxml/search_summary'
6
+ require 'ms/ident/pepxml/spectrum_query'
6
7
 
7
8
  module Ms ; end
8
9
  module Ms::Ident ; end
@@ -1,4 +1,6 @@
1
1
  require 'merge'
2
+ require 'strscan'
3
+
2
4
  module Ms ; end
3
5
  module Ms::Ident ; end
4
6
  class Ms::Ident::Pepxml ; end
@@ -56,21 +58,11 @@ class Ms::Ident::Pepxml::SampleEnzyme
56
58
  def self.from_pepxml_node(node)
57
59
  self.new.from_pepxml_node(node)
58
60
  end
59
- end
60
-
61
- ###################################################
62
- ###################################################
63
- ###################################################
64
- ###################################################
65
- # This is digestion methodology:
66
-
67
- =begin
68
61
 
69
- require 'strscan'
70
-
71
- # takes an amino acid sequence (e.g., -.PEPTIDK.L)
62
+ # takes an amino acid sequence (e.g. PEPTIDE).
72
63
  # returns the number of missed cleavages
73
64
  def num_missed_cleavages(aaseq)
65
+ seq_to_scan = ' ' + aaseq + ' '
74
66
  raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
75
67
  @num_missed_cleavages_regex =
76
68
  if @num_missed_cleavages_regex ; @num_missed_cleavages_regex
@@ -89,23 +81,30 @@ require 'strscan'
89
81
  num
90
82
  end
91
83
 
92
- # requires full sequence (with heads and tails)
93
- def num_tol_term(sequence)
84
+ # No arguments should contain non-standard amino acids
85
+ def num_tol_term(prev_aa, middle, next_aa)
94
86
  raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
95
87
  no_cut = @no_cut || ''
96
88
  num_tol = 0
97
- first, middle, last = SpecID::Pep.split_sequence(sequence)
98
89
  last_of_middle = middle[-1,1]
99
90
  first_of_middle = middle[0,1]
100
- if ( @cut.include?(first) && !no_cut.include?(first_of_middle) ) || first == '-'
91
+ if ( @cut.include?(prev_aa) && !no_cut.include?(first_of_middle) ) || prev_aa == '-'
101
92
  num_tol += 1
102
93
  end
103
- if @cut.include?(last_of_middle) && !no_cut.include?(last) || last == '-'
94
+ if @cut.include?(last_of_middle) && !no_cut.include?(next_aa) || next_aa == '-'
104
95
  num_tol += 1
105
96
  end
106
97
  num_tol
107
98
  end
99
+ end
100
+
101
+ ###################################################
102
+ ###################################################
103
+ ###################################################
104
+ ###################################################
105
+ # This is digestion methodology:
108
106
 
107
+ =begin
109
108
  # returns all peptides of missed cleavages <= 'missed_cleavages'
110
109
  # so 2 missed cleavages will return all no missed cleavage peptides
111
110
  # all 1 missed cleavages and all 2 missed cleavages.
@@ -1,3 +1,4 @@
1
+ require 'ms/fasta'
1
2
  require 'merge'
2
3
  module Ms ; end
3
4
  module Ms::Ident ; end
@@ -25,13 +26,19 @@ class Ms::Ident::Pepxml
25
26
  def initialize(hash={}, get_size_of_residues=false)
26
27
  merge!(hash)
27
28
  if get_size_of_residues && File.exist?(@local_path)
28
- @size_of_residues = 0
29
- Ms::Fasta.foreach(@local_path) do |entry|
30
- @size_of_residues += entry.sequence.size
31
- end
29
+ set_size_of_residues!
32
30
  end
33
31
  end
34
32
 
33
+ # returns self for chaining
34
+ def set_size_of_residues!
35
+ @size_of_residues = 0
36
+ Ms::Fasta.foreach(@local_path) do |entry|
37
+ @size_of_residues += entry.sequence.size
38
+ end
39
+ self
40
+ end
41
+
35
42
  def to_xml(builder)
36
43
  attrs = [:local_path, :seq_type, :database_name, :orig_database_url, :database_release_date, :database_release_identifier, :size_of_residues].map {|k| v=send(k) ; [k, v] if v }.compact
37
44
  builder.search_database(Hash[attrs])
@@ -31,16 +31,11 @@ Ms::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :
31
31
  # position ranges from 1 to peptide length
32
32
  #attr_accessor :mod_aminoacid_masses
33
33
 
34
- class << self
35
- alias_method :old_new, :new
36
- # takes either a hash or the normal list of values to set.
37
- def new(*args)
38
- if args.first.is_a?(Hash)
39
- args = args.first.values_at(*members)
40
- end
41
- obj = old_new(*args)
42
- obj
34
+ def initialize(*args)
35
+ if args.first.is_a?(Hash)
36
+ args = args.first.values_at(*members)
43
37
  end
38
+ super(*args)
44
39
  end
45
40
 
46
41
  # Will escape any xml special chars in modified_peptide
@@ -50,11 +45,12 @@ Ms::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :
50
45
  ## Create the attribute string:
51
46
  atts = [:mod_nterm_mass, :mod_cterm_mass, :modified_peptide]
52
47
  atts.map! {|at| (v=send(at)) && [at, v] }.compact
53
- xmlb.modification_info(Hash[atts]) do
48
+ xmlb.modification_info(Hash[atts]) do |xmlb|
54
49
  mod_aminoacid_masses.andand.each do |mod_aa_mass|
55
50
  mod_aa_mass.to_xml(xmlb)
56
51
  end
57
52
  end
53
+ builder || xmlb.doc.root.to_s
58
54
  end
59
55
 
60
56
  def self.from_pepxml_node(node)
@@ -78,5 +74,6 @@ end
78
74
  Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass = Struct.new(:position, :mass) do
79
75
  def to_xml(builder)
80
76
  builder.mod_aminoacid_mass(:position => position, :mass => mass)
77
+ builder
81
78
  end
82
79
  end
@@ -1,5 +1,7 @@
1
1
  require 'nokogiri'
2
2
 
3
+ require 'ms/ident/pepxml/search_hit'
4
+
3
5
  module Ms ; end
4
6
  module Ms::Ident ; end
5
7
  class Ms::Ident::Pepxml ; end
@@ -8,9 +10,14 @@ class Ms::Ident::Pepxml::SearchResult
8
10
  # an array of search_hits
9
11
  attr_accessor :search_hits
10
12
 
11
- # if block given, then yields an empty search_hits array
13
+ # if block given, then yields an empty search_hits array.
14
+ # For consistency with other objects, will also take a hash that has the key
15
+ # :search_hits and the value an array.
12
16
  def initialize(search_hits = [], &block)
13
17
  @search_hits = search_hits
18
+ if search_hits.is_a?(Hash)
19
+ @search_hits = search_hits[:search_hits]
20
+ end
14
21
  block.call(@search_hits) if block
15
22
  end
16
23
 
@@ -30,7 +30,7 @@ class Ms::Ident::Pepxml::SearchSummary
30
30
  attr_accessor :out_data
31
31
  # by default, "1"
32
32
  attr_accessor :search_id
33
- # a Modifications object
33
+ # an array of Ms::Ident::Pepxml::Modification objects
34
34
  attr_accessor :modifications
35
35
  # A SearchDatabase object (responds to :local_path and :type)
36
36
  attr_accessor :search_database
@@ -49,12 +49,14 @@ class Ms::Ident::Pepxml::SearchSummary
49
49
  def block_arg
50
50
  [@search_database = Ms::Ident::Pepxml::SearchDatabase.new,
51
51
  @enzymatic_search_constraint = Ms::Ident::Pepxml::EnzymaticSearchConstraint.new,
52
- @modifications = Ms::Ident::Pepxml::Modifications.new,
52
+ @modifications,
53
53
  @parameters = Ms::Ident::Pepxml::Parameters.new,
54
54
  ]
55
55
  end
56
56
 
57
+ # initializes modifications to an empty array
57
58
  def initialize(hash={}, &block)
59
+ @modifications = []
58
60
  @search_id = DEFAULT_SEARCH_ID
59
61
  merge!(hash, &block)
60
62
  end
@@ -68,7 +70,9 @@ class Ms::Ident::Pepxml::SearchSummary
68
70
  builder.search_summary(hash) do |xmlb|
69
71
  search_database.to_xml(xmlb)
70
72
  xmlb.enzymatic_search_constraint(enzymatic_search_constraint) if enzymatic_search_constraint
71
- modifications.to_xml(xmlb) if modifications
73
+ modifications.each do |mod|
74
+ mod.to_xml(xmlb)
75
+ end
72
76
  parameters.to_xml(xmlb) if parameters
73
77
  end
74
78
  builder || xmlb.doc.root.to_xml
@@ -2,6 +2,8 @@ require 'nokogiri'
2
2
  require 'ms/mass'
3
3
  require 'merge'
4
4
 
5
+ require 'ms/ident/pepxml/search_result'
6
+
5
7
  module Ms ; end
6
8
  module Ms::Ident ; end
7
9
  class Ms::Ident::Pepxml ; end
@@ -43,19 +43,46 @@ class Ms::Ident::Pepxml
43
43
  doc
44
44
  end
45
45
 
46
- # writes xml file named msms_pipeline_analysis.summary_xml into the msms_run_summary.base_name directory
47
- def to_xml_file
48
- to_xml(File.dirname(msms_pipeline_analysis.msms_run_summary.base_name) + '/' + msms_pipeline_analysis.summary_xml)
49
- end
46
+ # if no options are given, an xml string is returned. If either :outdir or
47
+ # :outfile is given, the xml is written to file and the output filename is returned.
48
+ # A single string argument will be interpreted as :outfile if it ends in
49
+ # '.xml' and the :outdir otherwise. In this case, update_summary_xml is still true
50
+ #
51
+ # options:
52
+ #
53
+ # arg default
54
+ # :outdir => nil write to disk using this outdir with summary_xml basename
55
+ # :outfile => nil write to this filename (overrides outdir)
56
+ # :update_summary_xml => true update summary_xml attribute to point to the output file true/false
57
+ #
58
+ # set outdir to
59
+ # File.dirname(pepxml_obj.msms_pipeline_analysis.msms_run_summary.base_name)
60
+ # to write to the same directory as the input search file.
61
+ def to_xml(opts={})
62
+ opts ||= {}
63
+ if opts.is_a?(String)
64
+ opts = ( opts.match(/\.xml$/) ? {:outfile => opts} : {:outdir => opts } )
65
+ end
66
+ opt = {:update_summary_xml => true, :outdir => nil, :outfile => nil}.merge(opts)
67
+
68
+ if opt[:outfile]
69
+ outfile = opt[:outfile]
70
+ elsif opt[:outdir]
71
+ outfile = File.join(opt[:outdir], msms_pipeline_analysis.summary_xml.split(/[\/\\]/).last)
72
+ end
73
+ self.msms_pipeline_analysis.summary_xml = File.expand_path(outfile) if (opt[:update_summary_xml] && outfile)
50
74
 
51
- # if no outfile is given, an xml string is returned. summary_xml should
52
- # have already been set and is not influenced by the outfile given here.
53
- def to_xml(outfile=nil)
54
75
  builder = Nokogiri::XML::Builder.new(:encoding => XML_ENCODING)
55
76
  msms_pipeline_analysis.to_xml(builder)
56
77
  add_stylesheet(builder.doc, Ms::Ident::Pepxml::XML_STYLESHEET_LOCATION)
57
78
  string = builder.doc.to_xml
58
- outfile ? File.open(outfile,'w') {|out| out.print(string) } : string
79
+
80
+ if outfile
81
+ File.open(outfile,'w') {|out| out.print(string) }
82
+ outfile
83
+ else
84
+ string
85
+ end
59
86
  end
60
87
  end
61
88
 
@@ -0,0 +1,17 @@
1
+
2
+ module Ms ; end
3
+ module Ms::Ident ; end
4
+
5
+ module Ms::Ident::Protein
6
+
7
+ class << self
8
+ end
9
+
10
+ # gives the information up until the first space or carriage return.
11
+ # Assumes the protein can respond_to? :reference
12
+ def first_entry
13
+ reference.split(/[\s\r]/)[0]
14
+ end
15
+
16
+ end
17
+
@@ -0,0 +1,105 @@
1
+
2
+ module Ms
3
+ module Ident
4
+
5
+ module Search
6
+ attr_accessor :proteins
7
+ attr_accessor :peptides
8
+
9
+ # returns an array of peptide_hits and protein_hits that are linked to
10
+ # one another. NOTE: this will update peptide and protein
11
+ # hits :proteins and :peptides attributes respectively). Assumes that each search
12
+ # responds to :peptides, each peptide responds to :proteins and each protein to
13
+ # :peptides. Can be done on a single file to restore protein/peptide
14
+ # linkages to their original single-file state.
15
+ # Assumes the protein is initialized with (reference, peptide_ar)
16
+ #
17
+ # yields the protein that will become the template for a new protein
18
+ # and expects a new protein hit
19
+ def merge!(ar_of_peptide_hit_arrays)
20
+ all_peptide_hits = []
21
+ reference_hash = {}
22
+ ar_of_peptide_hit_arrays.each do |peptide_hits|
23
+ all_peptide_hits.push(*peptide_hits)
24
+ peptide_hits.each do |peptide|
25
+ peptide.proteins.each do |protein|
26
+ ref = protein.reference
27
+ if reference_hash.key? ref
28
+ reference_hash[ref].peptides << peptide
29
+ reference_hash[ref]
30
+ else
31
+ reference_hash[ref] = yield(protein, [peptide])
32
+ end
33
+ end
34
+ end
35
+ end
36
+ [all_peptide_hits, reference_hash.values]
37
+ end
38
+
39
+ end
40
+
41
+
42
+ module SearchGroup
43
+ include Search
44
+
45
+ # an array of search objects
46
+ attr_accessor :searches
47
+
48
+ # the group's file extension (with no leading period)
49
+ def extension
50
+ 'grp'
51
+ end
52
+
53
+ def search_class
54
+ Search
55
+ end
56
+
57
+ # a simple formatted file with paths to the search files
58
+ def to_paths(file)
59
+ IO.readlines(file).grep(/\w/).reject {|v| v =~ /^#/}.map {|v| v.chomp }
60
+ end
61
+
62
+ def from_file(file)
63
+ from_filenames(to_paths(file))
64
+ end
65
+
66
+
67
+ def from_filenames(filenames)
68
+ filenames.each do |file|
69
+ if !File.exist? file
70
+ message = "File: #{file} does not exist!\n"
71
+ message << "perhaps you need to modify the file with file paths"
72
+ abort message
73
+ end
74
+ @searches << search_class.new(file)
75
+ end
76
+ end
77
+
78
+
79
+ # takes an array of filenames or a single search filename (with
80
+ # extension defined by 'extendsion') or an array of objects passes any
81
+ # arguments to the initializer for each search
82
+ # the optional block yields the object for further processing
83
+ def initialize(arg=nil, opts={})
84
+ @peptides = []
85
+ @reference_hash = {}
86
+ @searches = []
87
+
88
+ if arg
89
+ if arg.is_a?(String) && arg =~ /\.#{Regexp.escap(extension)}$/
90
+ from_file(arg)
91
+ elsif arg.is_a?(Array) && arg.first.is_a?(String)
92
+ from_filenames(arg)
93
+ elsif arg.is_a?(Array)
94
+ @searches = array
95
+ else
96
+ raise ArgumentError, "must be file, array of filenames, or array of objs"
97
+ end
98
+ @searches << search_class.new(file, opts)
99
+ end
100
+ yield(self) if block_given?
101
+ end
102
+
103
+ end
104
+ end
105
+ end
@@ -48,6 +48,86 @@ describe 'an Ms::Ident::Pepxml::SampleEnzyme' do
48
48
  end
49
49
  end
50
50
 
51
+ describe 'an Ms::Ident::Pepxml::SampleEnzyme making enzyme digestion calculations' do
52
+ before do
53
+ @full_KRP = Ms::Ident::Pepxml::SampleEnzyme.new(
54
+ :name => 'trypsin',
55
+ :cut => 'KR',
56
+ :no_cut => 'P',
57
+ :sense => 'C',
58
+ )
59
+ @just_KR = Ms::Ident::Pepxml::SampleEnzyme.new(
60
+ :name => 'trypsin',
61
+ :cut => 'KR',
62
+ :no_cut => '',
63
+ :sense => 'C',
64
+ )
65
+ end
66
+
67
+ it 'calculates the number of tolerant termini' do
68
+ exp = [{
69
+ # full KR/P
70
+ %w(K EPTIDR E) => 2,
71
+ %w(K PEPTIDR E) => 1,
72
+ %w(F EEPTIDR E) => 1,
73
+ %w(F PEPTIDW R) => 0,
74
+ },
75
+ {
76
+ # just KR
77
+ %w(K EPTIDR E) => 2,
78
+ %w(K PEPTIDR E) => 2,
79
+ %w(F EEPTIDR E) => 1,
80
+ %w(F PEPTIDW R) => 0,
81
+ }
82
+ ]
83
+ sample_enzyme_ar = [@full_KRP, @just_KR]
84
+ sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
85
+ hash.each do |seq, val|
86
+ sample_enzyme.num_tol_term(*seq).should == val
87
+ end
88
+ end
89
+ end
90
+
91
+ it 'calculates number of missed cleavages' do
92
+ exp = [{
93
+ "EPTIDR" => 0,
94
+ "PEPTIDR" => 0,
95
+ "EEPTIDR" => 0,
96
+ "PEPTIDW" => 0,
97
+ "PERPTIDW" => 0,
98
+ "PEPKPTIDW" => 0,
99
+ "PEPKTIDW" => 1,
100
+ "RTTIDR" => 1,
101
+ "RTTIKK" => 2,
102
+ "PKEPRTIDW" => 2,
103
+ "PKEPRTIDKP" => 2,
104
+ "PKEPRAALKPEERPTIDKW" => 3,
105
+ },
106
+ {
107
+ "EPTIDR" => 0,
108
+ "PEPTIDR" => 0,
109
+ "EEPTIDR" => 0,
110
+ "PEPTIDW" => 0,
111
+ "PERPTIDW" => 1,
112
+ "PEPKPTIDW" => 1,
113
+ "PEPKTIDW" => 1,
114
+ "RTTIDR" => 1,
115
+ "RTTIKK" => 2,
116
+ "PKEPRTIDW" => 2,
117
+ "PKEPRTIDKP" => 3,
118
+ "PKEPRAALKPEERPTIDKW" => 5,
119
+ }
120
+ ]
121
+
122
+ sample_enzyme_ar = [@full_KRP, @just_KR]
123
+ sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
124
+ hash.each do |aaseq, val|
125
+ sample_enzyme.num_missed_cleavages(aaseq).should == val
126
+ end
127
+ end
128
+ end
129
+ end
130
+
51
131
  xdescribe 'read in from an xml node' do
52
132
  # placeholder until written
53
133
  end
@@ -93,86 +173,6 @@ end
93
173
 
94
174
  describe SampleEnzyme, 'making enzyme calculations on sequences and aaseqs' do
95
175
 
96
- before(:each) do
97
- @full_KRP = SampleEnzyme.new do |se|
98
- se.name = 'trypsin'
99
- se.cut = 'KR'
100
- se.no_cut = 'P'
101
- se.sense = 'C'
102
- end
103
- @just_KR = SampleEnzyme.new do |se|
104
- se.name = 'trypsin'
105
- se.cut = 'KR'
106
- se.no_cut = ''
107
- se.sense = 'C'
108
- end
109
- end
110
-
111
- it 'calculates the number of tolerant termini' do
112
- exp = [{
113
- # full KR/P
114
- 'K.EPTIDR.E' => 2,
115
- 'K.PEPTIDR.E' => 1,
116
- 'F.EEPTIDR.E' => 1,
117
- 'F.PEPTIDW.R' => 0,
118
- },
119
- {
120
- # just KR
121
- 'K.EPTIDR.E' => 2,
122
- 'K.PEPTIDR.E' => 2,
123
- 'F.EEPTIDR.E' => 1,
124
- 'F.PEPTIDW.R' => 0,
125
- }
126
- ]
127
- scall = Sequest::PepXML::SearchHit
128
- sample_enzyme_ar = [@full_KRP, @just_KR]
129
- sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
130
- hash.each do |seq, val|
131
- sample_enzyme.num_tol_term(seq).should == val
132
- end
133
- end
134
- end
135
-
136
- it 'calculates number of missed cleavages' do
137
- exp = [{
138
- "EPTIDR" => 0,
139
- "PEPTIDR" => 0,
140
- "EEPTIDR" => 0,
141
- "PEPTIDW" => 0,
142
- "PERPTIDW" => 0,
143
- "PEPKPTIDW" => 0,
144
- "PEPKTIDW" => 1,
145
- "RTTIDR" => 1,
146
- "RTTIKK" => 2,
147
- "PKEPRTIDW" => 2,
148
- "PKEPRTIDKP" => 2,
149
- "PKEPRAALKPEERPTIDKW" => 3,
150
- },
151
- {
152
- "EPTIDR" => 0,
153
- "PEPTIDR" => 0,
154
- "EEPTIDR" => 0,
155
- "PEPTIDW" => 0,
156
- "PERPTIDW" => 1,
157
- "PEPKPTIDW" => 1,
158
- "PEPKTIDW" => 1,
159
- "RTTIDR" => 1,
160
- "RTTIKK" => 2,
161
- "PKEPRTIDW" => 2,
162
- "PKEPRTIDKP" => 3,
163
- "PKEPRAALKPEERPTIDKW" => 5,
164
- }
165
- ]
166
-
167
- sample_enzyme_ar = [@full_KRP, @just_KR]
168
- sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
169
- hash.each do |aaseq, val|
170
- #first, middle, last = SpecID::Pep.split_sequence(seq)
171
- # note that we are only using the middle section!
172
- sample_enzyme.num_missed_cleavages(aaseq).should == val
173
- end
174
- end
175
- end
176
176
 
177
177
  end
178
178
  =end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/ident/pepxml/search_hit/modification_info'
4
+
5
+ describe 'Ms::Ident::Pepxml::SearchHit::ModificationInfo' do
6
+
7
+ before do
8
+ modaaobjs = [[3, 150.3], [6, 345.2]].map do |ar|
9
+ Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(*ar)
10
+ end
11
+ hash = {
12
+ :mod_nterm_mass => 520.2,
13
+ :modified_peptide => "MOD*IFI^E&D",
14
+ :mod_aminoacid_masses => modaaobjs,
15
+ }
16
+ #answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
17
+ @obj = Ms::Ident::Pepxml::SearchHit::ModificationInfo.new(hash)
18
+ end
19
+
20
+ it 'can produce valid pepxml xml' do
21
+ to_match = ['<modification_info',
22
+ ' mod_nterm_mass="520.2"',
23
+ " modified_peptide=\"MOD*IFI^E&amp;D\"",
24
+ "<mod_aminoacid_mass",
25
+ " position=\"3\"",
26
+ " mass=\"150.3\"",
27
+ " position=\"6\"",
28
+ " mass=\"345.2\"",
29
+ "</modification_info>"]
30
+ string = @obj.to_xml
31
+ to_match.each do |re|
32
+ string.matches Regexp.new(Regexp.escape(re))
33
+ end
34
+ end
35
+ end
36
+
37
+
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 3
9
- version: 0.0.3
8
+ - 17
9
+ version: 0.0.17
10
10
  platform: ruby
11
11
  authors:
12
12
  - John T. Prince
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-02-28 00:00:00 -07:00
17
+ date: 2011-03-08 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -135,9 +135,9 @@ files:
135
135
  - VERSION
136
136
  - lib/merge.rb
137
137
  - lib/ms/ident.rb
138
+ - lib/ms/ident/peptide.rb
138
139
  - lib/ms/ident/pepxml.rb
139
140
  - lib/ms/ident/pepxml/modifications.rb
140
- - lib/ms/ident/pepxml/modifications/sequest.rb
141
141
  - lib/ms/ident/pepxml/msms_pipeline_analysis.rb
142
142
  - lib/ms/ident/pepxml/msms_run_summary.rb
143
143
  - lib/ms/ident/pepxml/parameters.rb
@@ -150,9 +150,12 @@ files:
150
150
  - lib/ms/ident/pepxml/search_result.rb
151
151
  - lib/ms/ident/pepxml/search_summary.rb
152
152
  - lib/ms/ident/pepxml/spectrum_query.rb
153
+ - lib/ms/ident/protein.rb
154
+ - lib/ms/ident/search.rb
153
155
  - schema/pepXML_v115.xsd
154
156
  - schema/pepXML_v19.xsd
155
157
  - spec/ms/ident/pepxml/sample_enzyme_spec.rb
158
+ - spec/ms/ident/pepxml/search_hit/modification_info_spec.rb
156
159
  - spec/ms/ident/pepxml_spec.rb
157
160
  - spec/spec_helper.rb
158
161
  has_rdoc: true
@@ -169,7 +172,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
169
172
  requirements:
170
173
  - - ">="
171
174
  - !ruby/object:Gem::Version
172
- hash: 3918611084548908133
175
+ hash: -1969914373934932629
173
176
  segments:
174
177
  - 0
175
178
  version: "0"
@@ -190,5 +193,6 @@ specification_version: 3
190
193
  summary: mspire library for working with mzIdentML and pepxml
191
194
  test_files:
192
195
  - spec/ms/ident/pepxml/sample_enzyme_spec.rb
196
+ - spec/ms/ident/pepxml/search_hit/modification_info_spec.rb
193
197
  - spec/ms/ident/pepxml_spec.rb
194
198
  - spec/spec_helper.rb
@@ -1,237 +0,0 @@
1
- require 'ms/ident/pepxml/modifications'
2
- require 'ms/ident/pepxml/search_hit/modification_info'
3
-
4
- module Ms ; end
5
- module Ms::Ident ; end
6
- class Ms::Ident::Pepxml ; end
7
-
8
- module Ms::Ident::Pepxml::Modifications
9
- # Handles modifications for sequest style searches
10
- class Sequest
11
- include Ms::Ident::Pepxml::Modifications
12
-
13
- # a hash of all differential modifications present by aa_one_letter_symbol
14
- # and special_symbol. This is NOT the mass difference but the total mass {
15
- # 'M*' => 155.5, 'S@' => 190.3 }. NOTE: Since the termini are dependent on
16
- # the amino acid sequence, they are give the *differential* mass. The
17
- # termini are given the special symbol as in sequest e.g. '[' => 12.22, #
18
- # cterminus ']' => 14.55 # nterminus
19
- attr_accessor :masses_by_diff_mod_hash
20
- # a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
21
- # values are the special_symbols
22
- attr_accessor :mod_symbols_hash
23
-
24
- # sequest params object
25
- attr_accessor :params
26
-
27
-
28
- # The modification symbols string looks like this:
29
- # (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
30
- # ct is cterminal peptide (differential)
31
- # nt is nterminal peptide (differential)
32
- # the C is just cysteine
33
- # will set_modifications and masses_by_diff_mod hash
34
- def initialize(params=nil, modification_symbols_string='')
35
- @params = params
36
- if @params
37
- set_modifications(params, modification_symbols_string)
38
- end
39
- end
40
-
41
- # set the masses_by_diff_mod and mod_symbols_hash from
42
- def set_hashes(modification_symbols_string)
43
-
44
- @mod_symbols_hash = {}
45
- @masses_by_diff_mod = {}
46
- if (modification_symbols_string == nil || modification_symbols_string == '')
47
- return nil
48
- end
49
- table = @params.mass_table
50
- modification_symbols_string.split(/\)\s+\(/).each do |mod|
51
- if md = mod.match(/\(?(\w+)(.) (.[\d\.]+)\)?/)
52
- if md[1] == 'ct' || md[1] == 'nt'
53
- mass_diff = md[3].to_f
54
- @masses_by_diff_mod[md[2]] = mass_diff
55
- @mod_symbols_hash[[md[1].to_sym, mass_diff]] = md[2].dup
56
- else
57
- symbol_string = md[2].dup
58
- mass_diff = md[3].to_f
59
- md[1].split('').each do |aa|
60
- aa_as_sym = aa.to_sym
61
- @masses_by_diff_mod[aa+symbol_string] = mass_diff + table[aa_as_sym]
62
- @mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
63
- end
64
- end
65
- end
66
- end
67
- end
68
-
69
- # given a bare peptide (no end pieces) returns a ModificationInfo object
70
- # e.g. given "]PEPT*IDE", NOT 'K.PEPTIDE.R'
71
- # if there are no modifications, returns nil
72
- def modification_info(peptide)
73
- if @masses_by_diff_mod.size == 0
74
- return nil
75
- end
76
- hash = {}
77
- hash[:modified_peptide] = peptide.dup
78
- hsh = @masses_by_diff_mod
79
- table = @params.mass_table
80
- h = table[:h] # this? or h_plus ??
81
- oh = table[:o] + h
82
- ## only the termini can match a single char
83
- if hsh.key? peptide[0,1]
84
- # AA + H + differential_mod
85
- hash[:mod_nterm_mass] = table[peptide[1,1].to_sym] + h + hsh[peptide[0,1]]
86
- peptide = peptide[1...(peptide.size)]
87
- end
88
- if hsh.key? peptide[(peptide.size-1),1]
89
- # AA + OH + differential_mod
90
- hash[:mod_cterm_mass] = table[peptide[(peptide.size-2),1].to_sym] + oh + hsh[peptide[-1,1]]
91
- peptide.slice!( 0..-2 )
92
- peptide = peptide[0...(peptide.size-1)]
93
- end
94
- mod_array = []
95
- (0...peptide.size).each do |i|
96
- if hsh.key? peptide[i,2]
97
- mod_array << Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new([ i+1 , hsh[peptide[i,2]] ])
98
- end
99
- end
100
- if mod_array.size > 0
101
- hash[:mod_aminoacid_masses] = mod_array
102
- end
103
- if hash.size > 1 # if there is more than just the modified peptide there
104
- Ms::Ident::Pepxml::SearchHit::ModificationInfo.new(hash)
105
- #Ms::Ident::Pepxml::SearchHit::ModificationInfo.new(hash.values_at(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
106
- else
107
- nil
108
- end
109
- end
110
-
111
- # returns an array of static mod objects and static terminal mod objects
112
- def create_static_mods(params)
113
-
114
- ####################################
115
- ## static mods
116
- ####################################
117
-
118
- static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
119
- static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
120
-
121
- params.mods.each do |k,v|
122
- v_to_f = v.to_f
123
- if v_to_f != 0.0
124
- if k =~ /add_(\w)_/
125
- static_mods << [$1.to_sym, v_to_f]
126
- else
127
- static_terminal_mods << [k, v_to_f]
128
- end
129
- end
130
- end
131
- aa_hash = params.mass_table
132
-
133
- ## Create the static_mods objects
134
- static_mods.map! do |mod|
135
- hash = {
136
- :aminoacid => mod[0].to_s,
137
- :massdiff => mod[1],
138
- :mass => aa_hash[mod[0]] + mod[1],
139
- :variable => 'N',
140
- :binary => 'Y',
141
- }
142
- Ms::Ident::Pepxml::AminoacidModification.new(hash)
143
- end
144
-
145
- ## Create the static_terminal_mods objects
146
- static_terminal_mods.map! do |mod|
147
- terminus = if mod[0] =~ /Cterm/ ; 'c'
148
- else ; 'n' # only two possible termini
149
- end
150
- protein_terminus = case mod[0]
151
- when /Nterm_protein/ ; 'n'
152
- when /Cterm_protein/ ; 'c'
153
- else nil
154
- end
155
-
156
- # create the hash
157
- hash = {
158
- :terminus => terminus,
159
- :massdiff => mod[1],
160
- :variable => 'N',
161
- :description => mod[0],
162
- }
163
- hash[:protein_terminus] = protein_terminus if protein_terminus
164
- Ms::Ident::Pepxml::TerminalModification.new(hash)
165
- end
166
- [static_mods, static_terminal_mods]
167
- end
168
-
169
- # 1. sets aminoacid_modifications and terminal_modifications from a sequest params object
170
- # 2. sets @params
171
- # 3. sets @masses_by_diff_mod
172
- def set_modifications(params, modification_symbols_string)
173
- @params = params
174
-
175
- set_hashes(modification_symbols_string)
176
- (static_mods, static_terminal_mods) = create_static_mods(params)
177
-
178
- aa_hash = params.mass_table
179
- #################################
180
- # Variable Mods:
181
- #################################
182
- arr = params.diff_search_options.rstrip.split(/\s+/)
183
- # [aa.to_sym, diff.to_f]
184
- variable_mods = []
185
- (0...arr.size).step(2) do |i|
186
- if arr[i].to_f != 0.0
187
- variable_mods << [arr[i+1], arr[i].to_f]
188
- end
189
- end
190
- mod_objects = []
191
- variable_mods.each do |mod|
192
- mod[0].split('').each do |aa|
193
- hash = {
194
-
195
- :aminoacid => aa,
196
- :massdiff => mod[1],
197
- :mass => aa_hash[aa.to_sym] + mod[1],
198
- :variable => 'Y',
199
- :binary => 'N',
200
- :symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
201
- }
202
- mod_objects << Ms::Ident::Pepxml::AminoacidModification.new(hash)
203
- end
204
- end
205
- variable_mods = mod_objects
206
- #################################
207
- # TERMINAL Variable Mods:
208
- #################################
209
- # These are always peptide, not protein termini (for sequest)
210
- (nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
211
-
212
- to_add = []
213
- if nterm_diff != 0.0
214
- to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
215
- end
216
- if cterm_diff != 0.0
217
- to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
218
- end
219
-
220
- variable_terminal_mods = to_add.map do |term, mssdiff, symb|
221
- hash = {
222
- :terminus => term,
223
- :massdiff => mssdiff,
224
- :variable => 'Y',
225
- :symbol => symb,
226
- }
227
- Ms::Ident::Pepxml::TerminalModification.new(hash)
228
- end
229
-
230
- #########################
231
- # COLLECT THEM
232
- #########################
233
- @aminoacid_modifications = static_mods + variable_mods
234
- @terminal_modifications = static_terminal_mods + variable_terminal_mods
235
- end
236
- end
237
-