ms-ident 0.0.3 → 0.0.17

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.3
1
+ 0.0.17
@@ -0,0 +1,75 @@
1
+
2
+ module Ms ; end
3
+ module Ms::Ident ; end
4
+
5
+ # A 'sequence' is a notation of a peptide that includes the leading and
6
+ # trailing amino acid after cleavage (e.g., K.PEPTIDER.E or -.STARTK.L )
7
+ # and may contain post-translational modification information.
8
+ #
9
+ # 'aaseq' is the amino acid sequence of just the peptide with no leading or
10
+ # trailing notation (e.g., PEPTIDER or LAKKLY)
11
+ module Ms::Ident::Peptide
12
+ Nonstandard_AA_re = /[^A-Z\.\-]/
13
+
14
+ class << self
15
+
16
+ # Takes a peptide sequence of the form '-.PEPTIDE.R', removes non-standard
17
+ # amino acids, and returns the center piece
18
+ def sequence_to_aaseq(sequence)
19
+ after_removed = remove_non_amino_acids(sequence)
20
+ pieces = after_removed.split('.')
21
+ case pieces.size
22
+ when 3
23
+ pieces[1]
24
+ when 2
25
+ if pieces[0].size > 1 ## N termini
26
+ pieces[0]
27
+ else ## C termini
28
+ pieces[1]
29
+ end
30
+ when 1 ## this must be a parse error!
31
+ pieces[0] ## which is the peptide itself
32
+ else
33
+ abort "bad peptide sequence: #{sequence.inspect}"
34
+ end
35
+ end
36
+
37
+ # removes non standard amino acids specified by Nonstandard_AA_re
38
+ def remove_non_amino_acids(sequence)
39
+ sequence.gsub(Nonstandard_AA_re, '')
40
+ end
41
+
42
+ # remove non amino acids and split the sequence
43
+ def prepare_sequence(sequence)
44
+ nv = remove_non_amino_acids(sequence)
45
+ split_sequence(nv)
46
+ end
47
+
48
+ # Returns prev, peptide, next from sequence. Parse errors return
49
+ # nil,nil,nil
50
+ # R.PEPTIDE.A # -> R, PEPTIDE, A
51
+ # R.PEPTIDE.- # -> R, PEPTIDE, -
52
+ # PEPTIDE.A # -> -, PEPTIDE, A
53
+ # A.PEPTIDE # -> A, PEPTIDE, -
54
+ # PEPTIDE # -> nil,nil,nil
55
+ def split_sequence(sequence)
56
+ pieces = sequence.split('.')
57
+ case pieces.size
58
+ when 3
59
+ pieces
60
+ when 2
61
+ if pieces[0].size > 1 ## N termini
62
+ ['-', pieces[0], pieces[1]]
63
+ else ## C termini
64
+ [pieces[0], pieces[1], '-']
65
+ end
66
+ when 1 ## this must be a parse error!
67
+ [nil,nil,nil]
68
+ when 0
69
+ [nil,nil,nil]
70
+ end
71
+ end
72
+ end
73
+
74
+
75
+ end
@@ -5,17 +5,6 @@ module Ms ; end
5
5
  module Ms::Ident ; end
6
6
  class Ms::Ident::Pepxml ; end
7
7
 
8
- # holds a list of AminoacidModification and TerminalModification objects.
9
- class Ms::Ident::Pepxml::Modifications < Array
10
- ## Generates the pepxml for static and differential amino acid mods based on
11
- ## sequest object
12
- def to_xml(builder=nil)
13
- xmlb = builder || Nokogiri::XML::Builder.new
14
- self.each {|mod| mod.to_xml(xmlb) }
15
- builder || xmlb.doc.root.to_xml
16
- end
17
- end
18
-
19
8
  # Modified aminoacid, static or variable
20
9
  # unless otherwise stated, all attributes can be anything
21
10
  class Ms::Ident::Pepxml::AminoacidModification
@@ -3,6 +3,7 @@ require 'nokogiri'
3
3
 
4
4
  require 'ms/ident/pepxml/sample_enzyme'
5
5
  require 'ms/ident/pepxml/search_summary'
6
+ require 'ms/ident/pepxml/spectrum_query'
6
7
 
7
8
  module Ms ; end
8
9
  module Ms::Ident ; end
@@ -1,4 +1,6 @@
1
1
  require 'merge'
2
+ require 'strscan'
3
+
2
4
  module Ms ; end
3
5
  module Ms::Ident ; end
4
6
  class Ms::Ident::Pepxml ; end
@@ -56,21 +58,11 @@ class Ms::Ident::Pepxml::SampleEnzyme
56
58
  def self.from_pepxml_node(node)
57
59
  self.new.from_pepxml_node(node)
58
60
  end
59
- end
60
-
61
- ###################################################
62
- ###################################################
63
- ###################################################
64
- ###################################################
65
- # This is digestion methodology:
66
-
67
- =begin
68
61
 
69
- require 'strscan'
70
-
71
- # takes an amino acid sequence (e.g., -.PEPTIDK.L)
62
+ # takes an amino acid sequence (e.g. PEPTIDE).
72
63
  # returns the number of missed cleavages
73
64
  def num_missed_cleavages(aaseq)
65
+ seq_to_scan = ' ' + aaseq + ' '
74
66
  raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
75
67
  @num_missed_cleavages_regex =
76
68
  if @num_missed_cleavages_regex ; @num_missed_cleavages_regex
@@ -89,23 +81,30 @@ require 'strscan'
89
81
  num
90
82
  end
91
83
 
92
- # requires full sequence (with heads and tails)
93
- def num_tol_term(sequence)
84
+ # No arguments should contain non-standard amino acids
85
+ def num_tol_term(prev_aa, middle, next_aa)
94
86
  raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
95
87
  no_cut = @no_cut || ''
96
88
  num_tol = 0
97
- first, middle, last = SpecID::Pep.split_sequence(sequence)
98
89
  last_of_middle = middle[-1,1]
99
90
  first_of_middle = middle[0,1]
100
- if ( @cut.include?(first) && !no_cut.include?(first_of_middle) ) || first == '-'
91
+ if ( @cut.include?(prev_aa) && !no_cut.include?(first_of_middle) ) || prev_aa == '-'
101
92
  num_tol += 1
102
93
  end
103
- if @cut.include?(last_of_middle) && !no_cut.include?(last) || last == '-'
94
+ if @cut.include?(last_of_middle) && !no_cut.include?(next_aa) || next_aa == '-'
104
95
  num_tol += 1
105
96
  end
106
97
  num_tol
107
98
  end
99
+ end
100
+
101
+ ###################################################
102
+ ###################################################
103
+ ###################################################
104
+ ###################################################
105
+ # This is digestion methodology:
108
106
 
107
+ =begin
109
108
  # returns all peptides of missed cleavages <= 'missed_cleavages'
110
109
  # so 2 missed cleavages will return all no missed cleavage peptides
111
110
  # all 1 missed cleavages and all 2 missed cleavages.
@@ -1,3 +1,4 @@
1
+ require 'ms/fasta'
1
2
  require 'merge'
2
3
  module Ms ; end
3
4
  module Ms::Ident ; end
@@ -25,13 +26,19 @@ class Ms::Ident::Pepxml
25
26
  def initialize(hash={}, get_size_of_residues=false)
26
27
  merge!(hash)
27
28
  if get_size_of_residues && File.exist?(@local_path)
28
- @size_of_residues = 0
29
- Ms::Fasta.foreach(@local_path) do |entry|
30
- @size_of_residues += entry.sequence.size
31
- end
29
+ set_size_of_residues!
32
30
  end
33
31
  end
34
32
 
33
+ # returns self for chaining
34
+ def set_size_of_residues!
35
+ @size_of_residues = 0
36
+ Ms::Fasta.foreach(@local_path) do |entry|
37
+ @size_of_residues += entry.sequence.size
38
+ end
39
+ self
40
+ end
41
+
35
42
  def to_xml(builder)
36
43
  attrs = [:local_path, :seq_type, :database_name, :orig_database_url, :database_release_date, :database_release_identifier, :size_of_residues].map {|k| v=send(k) ; [k, v] if v }.compact
37
44
  builder.search_database(Hash[attrs])
@@ -31,16 +31,11 @@ Ms::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :
31
31
  # position ranges from 1 to peptide length
32
32
  #attr_accessor :mod_aminoacid_masses
33
33
 
34
- class << self
35
- alias_method :old_new, :new
36
- # takes either a hash or the normal list of values to set.
37
- def new(*args)
38
- if args.first.is_a?(Hash)
39
- args = args.first.values_at(*members)
40
- end
41
- obj = old_new(*args)
42
- obj
34
+ def initialize(*args)
35
+ if args.first.is_a?(Hash)
36
+ args = args.first.values_at(*members)
43
37
  end
38
+ super(*args)
44
39
  end
45
40
 
46
41
  # Will escape any xml special chars in modified_peptide
@@ -50,11 +45,12 @@ Ms::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :
50
45
  ## Create the attribute string:
51
46
  atts = [:mod_nterm_mass, :mod_cterm_mass, :modified_peptide]
52
47
  atts.map! {|at| (v=send(at)) && [at, v] }.compact
53
- xmlb.modification_info(Hash[atts]) do
48
+ xmlb.modification_info(Hash[atts]) do |xmlb|
54
49
  mod_aminoacid_masses.andand.each do |mod_aa_mass|
55
50
  mod_aa_mass.to_xml(xmlb)
56
51
  end
57
52
  end
53
+ builder || xmlb.doc.root.to_s
58
54
  end
59
55
 
60
56
  def self.from_pepxml_node(node)
@@ -78,5 +74,6 @@ end
78
74
  Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass = Struct.new(:position, :mass) do
79
75
  def to_xml(builder)
80
76
  builder.mod_aminoacid_mass(:position => position, :mass => mass)
77
+ builder
81
78
  end
82
79
  end
@@ -1,5 +1,7 @@
1
1
  require 'nokogiri'
2
2
 
3
+ require 'ms/ident/pepxml/search_hit'
4
+
3
5
  module Ms ; end
4
6
  module Ms::Ident ; end
5
7
  class Ms::Ident::Pepxml ; end
@@ -8,9 +10,14 @@ class Ms::Ident::Pepxml::SearchResult
8
10
  # an array of search_hits
9
11
  attr_accessor :search_hits
10
12
 
11
- # if block given, then yields an empty search_hits array
13
+ # if block given, then yields an empty search_hits array.
14
+ # For consistency with other objects, will also take a hash that has the key
15
+ # :search_hits and the value an array.
12
16
  def initialize(search_hits = [], &block)
13
17
  @search_hits = search_hits
18
+ if search_hits.is_a?(Hash)
19
+ @search_hits = search_hits[:search_hits]
20
+ end
14
21
  block.call(@search_hits) if block
15
22
  end
16
23
 
@@ -30,7 +30,7 @@ class Ms::Ident::Pepxml::SearchSummary
30
30
  attr_accessor :out_data
31
31
  # by default, "1"
32
32
  attr_accessor :search_id
33
- # a Modifications object
33
+ # an array of Ms::Ident::Pepxml::Modification objects
34
34
  attr_accessor :modifications
35
35
  # A SearchDatabase object (responds to :local_path and :type)
36
36
  attr_accessor :search_database
@@ -49,12 +49,14 @@ class Ms::Ident::Pepxml::SearchSummary
49
49
  def block_arg
50
50
  [@search_database = Ms::Ident::Pepxml::SearchDatabase.new,
51
51
  @enzymatic_search_constraint = Ms::Ident::Pepxml::EnzymaticSearchConstraint.new,
52
- @modifications = Ms::Ident::Pepxml::Modifications.new,
52
+ @modifications,
53
53
  @parameters = Ms::Ident::Pepxml::Parameters.new,
54
54
  ]
55
55
  end
56
56
 
57
+ # initializes modifications to an empty array
57
58
  def initialize(hash={}, &block)
59
+ @modifications = []
58
60
  @search_id = DEFAULT_SEARCH_ID
59
61
  merge!(hash, &block)
60
62
  end
@@ -68,7 +70,9 @@ class Ms::Ident::Pepxml::SearchSummary
68
70
  builder.search_summary(hash) do |xmlb|
69
71
  search_database.to_xml(xmlb)
70
72
  xmlb.enzymatic_search_constraint(enzymatic_search_constraint) if enzymatic_search_constraint
71
- modifications.to_xml(xmlb) if modifications
73
+ modifications.each do |mod|
74
+ mod.to_xml(xmlb)
75
+ end
72
76
  parameters.to_xml(xmlb) if parameters
73
77
  end
74
78
  builder || xmlb.doc.root.to_xml
@@ -2,6 +2,8 @@ require 'nokogiri'
2
2
  require 'ms/mass'
3
3
  require 'merge'
4
4
 
5
+ require 'ms/ident/pepxml/search_result'
6
+
5
7
  module Ms ; end
6
8
  module Ms::Ident ; end
7
9
  class Ms::Ident::Pepxml ; end
@@ -43,19 +43,46 @@ class Ms::Ident::Pepxml
43
43
  doc
44
44
  end
45
45
 
46
- # writes xml file named msms_pipeline_analysis.summary_xml into the msms_run_summary.base_name directory
47
- def to_xml_file
48
- to_xml(File.dirname(msms_pipeline_analysis.msms_run_summary.base_name) + '/' + msms_pipeline_analysis.summary_xml)
49
- end
46
+ # if no options are given, an xml string is returned. If either :outdir or
47
+ # :outfile is given, the xml is written to file and the output filename is returned.
48
+ # A single string argument will be interpreted as :outfile if it ends in
49
+ # '.xml' and the :outdir otherwise. In this case, update_summary_xml is still true
50
+ #
51
+ # options:
52
+ #
53
+ # arg default
54
+ # :outdir => nil write to disk using this outdir with summary_xml basename
55
+ # :outfile => nil write to this filename (overrides outdir)
56
+ # :update_summary_xml => true update summary_xml attribute to point to the output file true/false
57
+ #
58
+ # set outdir to
59
+ # File.dirname(pepxml_obj.msms_pipeline_analysis.msms_run_summary.base_name)
60
+ # to write to the same directory as the input search file.
61
+ def to_xml(opts={})
62
+ opts ||= {}
63
+ if opts.is_a?(String)
64
+ opts = ( opts.match(/\.xml$/) ? {:outfile => opts} : {:outdir => opts } )
65
+ end
66
+ opt = {:update_summary_xml => true, :outdir => nil, :outfile => nil}.merge(opts)
67
+
68
+ if opt[:outfile]
69
+ outfile = opt[:outfile]
70
+ elsif opt[:outdir]
71
+ outfile = File.join(opt[:outdir], msms_pipeline_analysis.summary_xml.split(/[\/\\]/).last)
72
+ end
73
+ self.msms_pipeline_analysis.summary_xml = File.expand_path(outfile) if (opt[:update_summary_xml] && outfile)
50
74
 
51
- # if no outfile is given, an xml string is returned. summary_xml should
52
- # have already been set and is not influenced by the outfile given here.
53
- def to_xml(outfile=nil)
54
75
  builder = Nokogiri::XML::Builder.new(:encoding => XML_ENCODING)
55
76
  msms_pipeline_analysis.to_xml(builder)
56
77
  add_stylesheet(builder.doc, Ms::Ident::Pepxml::XML_STYLESHEET_LOCATION)
57
78
  string = builder.doc.to_xml
58
- outfile ? File.open(outfile,'w') {|out| out.print(string) } : string
79
+
80
+ if outfile
81
+ File.open(outfile,'w') {|out| out.print(string) }
82
+ outfile
83
+ else
84
+ string
85
+ end
59
86
  end
60
87
  end
61
88
 
@@ -0,0 +1,17 @@
1
+
2
+ module Ms ; end
3
+ module Ms::Ident ; end
4
+
5
+ module Ms::Ident::Protein
6
+
7
+ class << self
8
+ end
9
+
10
+ # gives the information up until the first space or carriage return.
11
+ # Assumes the protein can respond_to? :reference
12
+ def first_entry
13
+ reference.split(/[\s\r]/)[0]
14
+ end
15
+
16
+ end
17
+
@@ -0,0 +1,105 @@
1
+
2
+ module Ms
3
+ module Ident
4
+
5
+ module Search
6
+ attr_accessor :proteins
7
+ attr_accessor :peptides
8
+
9
+ # returns an array of peptide_hits and protein_hits that are linked to
10
+ # one another. NOTE: this will update peptide and protein
11
+ # hits :proteins and :peptides attributes respectively). Assumes that each search
12
+ # responds to :peptides, each peptide responds to :proteins and each protein to
13
+ # :peptides. Can be done on a single file to restore protein/peptide
14
+ # linkages to their original single-file state.
15
+ # Assumes the protein is initialized with (reference, peptide_ar)
16
+ #
17
+ # yields the protein that will become the template for a new protein
18
+ # and expects a new protein hit
19
+ def merge!(ar_of_peptide_hit_arrays)
20
+ all_peptide_hits = []
21
+ reference_hash = {}
22
+ ar_of_peptide_hit_arrays.each do |peptide_hits|
23
+ all_peptide_hits.push(*peptide_hits)
24
+ peptide_hits.each do |peptide|
25
+ peptide.proteins.each do |protein|
26
+ ref = protein.reference
27
+ if reference_hash.key? ref
28
+ reference_hash[ref].peptides << peptide
29
+ reference_hash[ref]
30
+ else
31
+ reference_hash[ref] = yield(protein, [peptide])
32
+ end
33
+ end
34
+ end
35
+ end
36
+ [all_peptide_hits, reference_hash.values]
37
+ end
38
+
39
+ end
40
+
41
+
42
+ module SearchGroup
43
+ include Search
44
+
45
+ # an array of search objects
46
+ attr_accessor :searches
47
+
48
+ # the group's file extension (with no leading period)
49
+ def extension
50
+ 'grp'
51
+ end
52
+
53
+ def search_class
54
+ Search
55
+ end
56
+
57
+ # a simple formatted file with paths to the search files
58
+ def to_paths(file)
59
+ IO.readlines(file).grep(/\w/).reject {|v| v =~ /^#/}.map {|v| v.chomp }
60
+ end
61
+
62
+ def from_file(file)
63
+ from_filenames(to_paths(file))
64
+ end
65
+
66
+
67
+ def from_filenames(filenames)
68
+ filenames.each do |file|
69
+ if !File.exist? file
70
+ message = "File: #{file} does not exist!\n"
71
+ message << "perhaps you need to modify the file with file paths"
72
+ abort message
73
+ end
74
+ @searches << search_class.new(file)
75
+ end
76
+ end
77
+
78
+
79
+ # takes an array of filenames or a single search filename (with
80
+ # extension defined by 'extendsion') or an array of objects passes any
81
+ # arguments to the initializer for each search
82
+ # the optional block yields the object for further processing
83
+ def initialize(arg=nil, opts={})
84
+ @peptides = []
85
+ @reference_hash = {}
86
+ @searches = []
87
+
88
+ if arg
89
+ if arg.is_a?(String) && arg =~ /\.#{Regexp.escap(extension)}$/
90
+ from_file(arg)
91
+ elsif arg.is_a?(Array) && arg.first.is_a?(String)
92
+ from_filenames(arg)
93
+ elsif arg.is_a?(Array)
94
+ @searches = array
95
+ else
96
+ raise ArgumentError, "must be file, array of filenames, or array of objs"
97
+ end
98
+ @searches << search_class.new(file, opts)
99
+ end
100
+ yield(self) if block_given?
101
+ end
102
+
103
+ end
104
+ end
105
+ end
@@ -48,6 +48,86 @@ describe 'an Ms::Ident::Pepxml::SampleEnzyme' do
48
48
  end
49
49
  end
50
50
 
51
+ describe 'an Ms::Ident::Pepxml::SampleEnzyme making enzyme digestion calculations' do
52
+ before do
53
+ @full_KRP = Ms::Ident::Pepxml::SampleEnzyme.new(
54
+ :name => 'trypsin',
55
+ :cut => 'KR',
56
+ :no_cut => 'P',
57
+ :sense => 'C',
58
+ )
59
+ @just_KR = Ms::Ident::Pepxml::SampleEnzyme.new(
60
+ :name => 'trypsin',
61
+ :cut => 'KR',
62
+ :no_cut => '',
63
+ :sense => 'C',
64
+ )
65
+ end
66
+
67
+ it 'calculates the number of tolerant termini' do
68
+ exp = [{
69
+ # full KR/P
70
+ %w(K EPTIDR E) => 2,
71
+ %w(K PEPTIDR E) => 1,
72
+ %w(F EEPTIDR E) => 1,
73
+ %w(F PEPTIDW R) => 0,
74
+ },
75
+ {
76
+ # just KR
77
+ %w(K EPTIDR E) => 2,
78
+ %w(K PEPTIDR E) => 2,
79
+ %w(F EEPTIDR E) => 1,
80
+ %w(F PEPTIDW R) => 0,
81
+ }
82
+ ]
83
+ sample_enzyme_ar = [@full_KRP, @just_KR]
84
+ sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
85
+ hash.each do |seq, val|
86
+ sample_enzyme.num_tol_term(*seq).should == val
87
+ end
88
+ end
89
+ end
90
+
91
+ it 'calculates number of missed cleavages' do
92
+ exp = [{
93
+ "EPTIDR" => 0,
94
+ "PEPTIDR" => 0,
95
+ "EEPTIDR" => 0,
96
+ "PEPTIDW" => 0,
97
+ "PERPTIDW" => 0,
98
+ "PEPKPTIDW" => 0,
99
+ "PEPKTIDW" => 1,
100
+ "RTTIDR" => 1,
101
+ "RTTIKK" => 2,
102
+ "PKEPRTIDW" => 2,
103
+ "PKEPRTIDKP" => 2,
104
+ "PKEPRAALKPEERPTIDKW" => 3,
105
+ },
106
+ {
107
+ "EPTIDR" => 0,
108
+ "PEPTIDR" => 0,
109
+ "EEPTIDR" => 0,
110
+ "PEPTIDW" => 0,
111
+ "PERPTIDW" => 1,
112
+ "PEPKPTIDW" => 1,
113
+ "PEPKTIDW" => 1,
114
+ "RTTIDR" => 1,
115
+ "RTTIKK" => 2,
116
+ "PKEPRTIDW" => 2,
117
+ "PKEPRTIDKP" => 3,
118
+ "PKEPRAALKPEERPTIDKW" => 5,
119
+ }
120
+ ]
121
+
122
+ sample_enzyme_ar = [@full_KRP, @just_KR]
123
+ sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
124
+ hash.each do |aaseq, val|
125
+ sample_enzyme.num_missed_cleavages(aaseq).should == val
126
+ end
127
+ end
128
+ end
129
+ end
130
+
51
131
  xdescribe 'read in from an xml node' do
52
132
  # placeholder until written
53
133
  end
@@ -93,86 +173,6 @@ end
93
173
 
94
174
  describe SampleEnzyme, 'making enzyme calculations on sequences and aaseqs' do
95
175
 
96
- before(:each) do
97
- @full_KRP = SampleEnzyme.new do |se|
98
- se.name = 'trypsin'
99
- se.cut = 'KR'
100
- se.no_cut = 'P'
101
- se.sense = 'C'
102
- end
103
- @just_KR = SampleEnzyme.new do |se|
104
- se.name = 'trypsin'
105
- se.cut = 'KR'
106
- se.no_cut = ''
107
- se.sense = 'C'
108
- end
109
- end
110
-
111
- it 'calculates the number of tolerant termini' do
112
- exp = [{
113
- # full KR/P
114
- 'K.EPTIDR.E' => 2,
115
- 'K.PEPTIDR.E' => 1,
116
- 'F.EEPTIDR.E' => 1,
117
- 'F.PEPTIDW.R' => 0,
118
- },
119
- {
120
- # just KR
121
- 'K.EPTIDR.E' => 2,
122
- 'K.PEPTIDR.E' => 2,
123
- 'F.EEPTIDR.E' => 1,
124
- 'F.PEPTIDW.R' => 0,
125
- }
126
- ]
127
- scall = Sequest::PepXML::SearchHit
128
- sample_enzyme_ar = [@full_KRP, @just_KR]
129
- sample_enzyme_ar.zip(exp) do |sample_enzyme,hash|
130
- hash.each do |seq, val|
131
- sample_enzyme.num_tol_term(seq).should == val
132
- end
133
- end
134
- end
135
-
136
- it 'calculates number of missed cleavages' do
137
- exp = [{
138
- "EPTIDR" => 0,
139
- "PEPTIDR" => 0,
140
- "EEPTIDR" => 0,
141
- "PEPTIDW" => 0,
142
- "PERPTIDW" => 0,
143
- "PEPKPTIDW" => 0,
144
- "PEPKTIDW" => 1,
145
- "RTTIDR" => 1,
146
- "RTTIKK" => 2,
147
- "PKEPRTIDW" => 2,
148
- "PKEPRTIDKP" => 2,
149
- "PKEPRAALKPEERPTIDKW" => 3,
150
- },
151
- {
152
- "EPTIDR" => 0,
153
- "PEPTIDR" => 0,
154
- "EEPTIDR" => 0,
155
- "PEPTIDW" => 0,
156
- "PERPTIDW" => 1,
157
- "PEPKPTIDW" => 1,
158
- "PEPKTIDW" => 1,
159
- "RTTIDR" => 1,
160
- "RTTIKK" => 2,
161
- "PKEPRTIDW" => 2,
162
- "PKEPRTIDKP" => 3,
163
- "PKEPRAALKPEERPTIDKW" => 5,
164
- }
165
- ]
166
-
167
- sample_enzyme_ar = [@full_KRP, @just_KR]
168
- sample_enzyme_ar.zip(exp) do |sample_enzyme, hash|
169
- hash.each do |aaseq, val|
170
- #first, middle, last = SpecID::Pep.split_sequence(seq)
171
- # note that we are only using the middle section!
172
- sample_enzyme.num_missed_cleavages(aaseq).should == val
173
- end
174
- end
175
- end
176
176
 
177
177
  end
178
178
  =end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/ident/pepxml/search_hit/modification_info'
4
+
5
+ describe 'Ms::Ident::Pepxml::SearchHit::ModificationInfo' do
6
+
7
+ before do
8
+ modaaobjs = [[3, 150.3], [6, 345.2]].map do |ar|
9
+ Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(*ar)
10
+ end
11
+ hash = {
12
+ :mod_nterm_mass => 520.2,
13
+ :modified_peptide => "MOD*IFI^E&D",
14
+ :mod_aminoacid_masses => modaaobjs,
15
+ }
16
+ #answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
17
+ @obj = Ms::Ident::Pepxml::SearchHit::ModificationInfo.new(hash)
18
+ end
19
+
20
+ it 'can produce valid pepxml xml' do
21
+ to_match = ['<modification_info',
22
+ ' mod_nterm_mass="520.2"',
23
+ " modified_peptide=\"MOD*IFI^E&amp;D\"",
24
+ "<mod_aminoacid_mass",
25
+ " position=\"3\"",
26
+ " mass=\"150.3\"",
27
+ " position=\"6\"",
28
+ " mass=\"345.2\"",
29
+ "</modification_info>"]
30
+ string = @obj.to_xml
31
+ to_match.each do |re|
32
+ string.matches Regexp.new(Regexp.escape(re))
33
+ end
34
+ end
35
+ end
36
+
37
+
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 3
9
- version: 0.0.3
8
+ - 17
9
+ version: 0.0.17
10
10
  platform: ruby
11
11
  authors:
12
12
  - John T. Prince
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-02-28 00:00:00 -07:00
17
+ date: 2011-03-08 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -135,9 +135,9 @@ files:
135
135
  - VERSION
136
136
  - lib/merge.rb
137
137
  - lib/ms/ident.rb
138
+ - lib/ms/ident/peptide.rb
138
139
  - lib/ms/ident/pepxml.rb
139
140
  - lib/ms/ident/pepxml/modifications.rb
140
- - lib/ms/ident/pepxml/modifications/sequest.rb
141
141
  - lib/ms/ident/pepxml/msms_pipeline_analysis.rb
142
142
  - lib/ms/ident/pepxml/msms_run_summary.rb
143
143
  - lib/ms/ident/pepxml/parameters.rb
@@ -150,9 +150,12 @@ files:
150
150
  - lib/ms/ident/pepxml/search_result.rb
151
151
  - lib/ms/ident/pepxml/search_summary.rb
152
152
  - lib/ms/ident/pepxml/spectrum_query.rb
153
+ - lib/ms/ident/protein.rb
154
+ - lib/ms/ident/search.rb
153
155
  - schema/pepXML_v115.xsd
154
156
  - schema/pepXML_v19.xsd
155
157
  - spec/ms/ident/pepxml/sample_enzyme_spec.rb
158
+ - spec/ms/ident/pepxml/search_hit/modification_info_spec.rb
156
159
  - spec/ms/ident/pepxml_spec.rb
157
160
  - spec/spec_helper.rb
158
161
  has_rdoc: true
@@ -169,7 +172,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
169
172
  requirements:
170
173
  - - ">="
171
174
  - !ruby/object:Gem::Version
172
- hash: 3918611084548908133
175
+ hash: -1969914373934932629
173
176
  segments:
174
177
  - 0
175
178
  version: "0"
@@ -190,5 +193,6 @@ specification_version: 3
190
193
  summary: mspire library for working with mzIdentML and pepxml
191
194
  test_files:
192
195
  - spec/ms/ident/pepxml/sample_enzyme_spec.rb
196
+ - spec/ms/ident/pepxml/search_hit/modification_info_spec.rb
193
197
  - spec/ms/ident/pepxml_spec.rb
194
198
  - spec/spec_helper.rb
@@ -1,237 +0,0 @@
1
- require 'ms/ident/pepxml/modifications'
2
- require 'ms/ident/pepxml/search_hit/modification_info'
3
-
4
- module Ms ; end
5
- module Ms::Ident ; end
6
- class Ms::Ident::Pepxml ; end
7
-
8
- module Ms::Ident::Pepxml::Modifications
9
- # Handles modifications for sequest style searches
10
- class Sequest
11
- include Ms::Ident::Pepxml::Modifications
12
-
13
- # a hash of all differential modifications present by aa_one_letter_symbol
14
- # and special_symbol. This is NOT the mass difference but the total mass {
15
- # 'M*' => 155.5, 'S@' => 190.3 }. NOTE: Since the termini are dependent on
16
- # the amino acid sequence, they are give the *differential* mass. The
17
- # termini are given the special symbol as in sequest e.g. '[' => 12.22, #
18
- # cterminus ']' => 14.55 # nterminus
19
- attr_accessor :masses_by_diff_mod_hash
20
- # a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
21
- # values are the special_symbols
22
- attr_accessor :mod_symbols_hash
23
-
24
- # sequest params object
25
- attr_accessor :params
26
-
27
-
28
- # The modification symbols string looks like this:
29
- # (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
30
- # ct is cterminal peptide (differential)
31
- # nt is nterminal peptide (differential)
32
- # the C is just cysteine
33
- # will set_modifications and masses_by_diff_mod hash
34
- def initialize(params=nil, modification_symbols_string='')
35
- @params = params
36
- if @params
37
- set_modifications(params, modification_symbols_string)
38
- end
39
- end
40
-
41
- # set the masses_by_diff_mod and mod_symbols_hash from
42
- def set_hashes(modification_symbols_string)
43
-
44
- @mod_symbols_hash = {}
45
- @masses_by_diff_mod = {}
46
- if (modification_symbols_string == nil || modification_symbols_string == '')
47
- return nil
48
- end
49
- table = @params.mass_table
50
- modification_symbols_string.split(/\)\s+\(/).each do |mod|
51
- if md = mod.match(/\(?(\w+)(.) (.[\d\.]+)\)?/)
52
- if md[1] == 'ct' || md[1] == 'nt'
53
- mass_diff = md[3].to_f
54
- @masses_by_diff_mod[md[2]] = mass_diff
55
- @mod_symbols_hash[[md[1].to_sym, mass_diff]] = md[2].dup
56
- else
57
- symbol_string = md[2].dup
58
- mass_diff = md[3].to_f
59
- md[1].split('').each do |aa|
60
- aa_as_sym = aa.to_sym
61
- @masses_by_diff_mod[aa+symbol_string] = mass_diff + table[aa_as_sym]
62
- @mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
63
- end
64
- end
65
- end
66
- end
67
- end
68
-
69
- # given a bare peptide (no end pieces) returns a ModificationInfo object
70
- # e.g. given "]PEPT*IDE", NOT 'K.PEPTIDE.R'
71
- # if there are no modifications, returns nil
72
- def modification_info(peptide)
73
- if @masses_by_diff_mod.size == 0
74
- return nil
75
- end
76
- hash = {}
77
- hash[:modified_peptide] = peptide.dup
78
- hsh = @masses_by_diff_mod
79
- table = @params.mass_table
80
- h = table[:h] # this? or h_plus ??
81
- oh = table[:o] + h
82
- ## only the termini can match a single char
83
- if hsh.key? peptide[0,1]
84
- # AA + H + differential_mod
85
- hash[:mod_nterm_mass] = table[peptide[1,1].to_sym] + h + hsh[peptide[0,1]]
86
- peptide = peptide[1...(peptide.size)]
87
- end
88
- if hsh.key? peptide[(peptide.size-1),1]
89
- # AA + OH + differential_mod
90
- hash[:mod_cterm_mass] = table[peptide[(peptide.size-2),1].to_sym] + oh + hsh[peptide[-1,1]]
91
- peptide.slice!( 0..-2 )
92
- peptide = peptide[0...(peptide.size-1)]
93
- end
94
- mod_array = []
95
- (0...peptide.size).each do |i|
96
- if hsh.key? peptide[i,2]
97
- mod_array << Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new([ i+1 , hsh[peptide[i,2]] ])
98
- end
99
- end
100
- if mod_array.size > 0
101
- hash[:mod_aminoacid_masses] = mod_array
102
- end
103
- if hash.size > 1 # if there is more than just the modified peptide there
104
- Ms::Ident::Pepxml::SearchHit::ModificationInfo.new(hash)
105
- #Ms::Ident::Pepxml::SearchHit::ModificationInfo.new(hash.values_at(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
106
- else
107
- nil
108
- end
109
- end
110
-
111
- # returns an array of static mod objects and static terminal mod objects
112
- def create_static_mods(params)
113
-
114
- ####################################
115
- ## static mods
116
- ####################################
117
-
118
- static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
119
- static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
120
-
121
- params.mods.each do |k,v|
122
- v_to_f = v.to_f
123
- if v_to_f != 0.0
124
- if k =~ /add_(\w)_/
125
- static_mods << [$1.to_sym, v_to_f]
126
- else
127
- static_terminal_mods << [k, v_to_f]
128
- end
129
- end
130
- end
131
- aa_hash = params.mass_table
132
-
133
- ## Create the static_mods objects
134
- static_mods.map! do |mod|
135
- hash = {
136
- :aminoacid => mod[0].to_s,
137
- :massdiff => mod[1],
138
- :mass => aa_hash[mod[0]] + mod[1],
139
- :variable => 'N',
140
- :binary => 'Y',
141
- }
142
- Ms::Ident::Pepxml::AminoacidModification.new(hash)
143
- end
144
-
145
- ## Create the static_terminal_mods objects
146
- static_terminal_mods.map! do |mod|
147
- terminus = if mod[0] =~ /Cterm/ ; 'c'
148
- else ; 'n' # only two possible termini
149
- end
150
- protein_terminus = case mod[0]
151
- when /Nterm_protein/ ; 'n'
152
- when /Cterm_protein/ ; 'c'
153
- else nil
154
- end
155
-
156
- # create the hash
157
- hash = {
158
- :terminus => terminus,
159
- :massdiff => mod[1],
160
- :variable => 'N',
161
- :description => mod[0],
162
- }
163
- hash[:protein_terminus] = protein_terminus if protein_terminus
164
- Ms::Ident::Pepxml::TerminalModification.new(hash)
165
- end
166
- [static_mods, static_terminal_mods]
167
- end
168
-
169
- # 1. sets aminoacid_modifications and terminal_modifications from a sequest params object
170
- # 2. sets @params
171
- # 3. sets @masses_by_diff_mod
172
- def set_modifications(params, modification_symbols_string)
173
- @params = params
174
-
175
- set_hashes(modification_symbols_string)
176
- (static_mods, static_terminal_mods) = create_static_mods(params)
177
-
178
- aa_hash = params.mass_table
179
- #################################
180
- # Variable Mods:
181
- #################################
182
- arr = params.diff_search_options.rstrip.split(/\s+/)
183
- # [aa.to_sym, diff.to_f]
184
- variable_mods = []
185
- (0...arr.size).step(2) do |i|
186
- if arr[i].to_f != 0.0
187
- variable_mods << [arr[i+1], arr[i].to_f]
188
- end
189
- end
190
- mod_objects = []
191
- variable_mods.each do |mod|
192
- mod[0].split('').each do |aa|
193
- hash = {
194
-
195
- :aminoacid => aa,
196
- :massdiff => mod[1],
197
- :mass => aa_hash[aa.to_sym] + mod[1],
198
- :variable => 'Y',
199
- :binary => 'N',
200
- :symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
201
- }
202
- mod_objects << Ms::Ident::Pepxml::AminoacidModification.new(hash)
203
- end
204
- end
205
- variable_mods = mod_objects
206
- #################################
207
- # TERMINAL Variable Mods:
208
- #################################
209
- # These are always peptide, not protein termini (for sequest)
210
- (nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
211
-
212
- to_add = []
213
- if nterm_diff != 0.0
214
- to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
215
- end
216
- if cterm_diff != 0.0
217
- to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
218
- end
219
-
220
- variable_terminal_mods = to_add.map do |term, mssdiff, symb|
221
- hash = {
222
- :terminus => term,
223
- :massdiff => mssdiff,
224
- :variable => 'Y',
225
- :symbol => symb,
226
- }
227
- Ms::Ident::Pepxml::TerminalModification.new(hash)
228
- end
229
-
230
- #########################
231
- # COLLECT THEM
232
- #########################
233
- @aminoacid_modifications = static_mods + variable_mods
234
- @terminal_modifications = static_terminal_mods + variable_terminal_mods
235
- end
236
- end
237
-