ms-ident 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +31 -0
- data/Gemfile.lock +32 -0
- data/LICENSE +61 -0
- data/README.rdoc +97 -0
- data/Rakefile +54 -0
- data/VERSION +1 -0
- data/lib/merge.rb +7 -0
- data/lib/ms/ident/pepxml/modifications/sequest.rb +237 -0
- data/lib/ms/ident/pepxml/modifications.rb +94 -0
- data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
- data/lib/ms/ident/pepxml/msms_run_summary.rb +81 -0
- data/lib/ms/ident/pepxml/parameters.rb +14 -0
- data/lib/ms/ident/pepxml/pep_summary.rb +104 -0
- data/lib/ms/ident/pepxml/prot_summary.rb +484 -0
- data/lib/ms/ident/pepxml/sample_enzyme.rb +166 -0
- data/lib/ms/ident/pepxml/search_database.rb +42 -0
- data/lib/ms/ident/pepxml/search_hit/modification_info.rb +82 -0
- data/lib/ms/ident/pepxml/search_hit.rb +141 -0
- data/lib/ms/ident/pepxml/search_result.rb +28 -0
- data/lib/ms/ident/pepxml/search_summary.rb +88 -0
- data/lib/ms/ident/pepxml/spectrum_query.rb +83 -0
- data/lib/ms/ident/pepxml.rb +61 -0
- data/lib/ms/ident.rb +11 -0
- data/schema/pepXML_v115.xsd +1458 -0
- data/schema/pepXML_v19.xsd +1337 -0
- data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
- data/spec/ms/ident/pepxml_spec.rb +436 -0
- data/spec/spec_helper.rb +40 -0
- metadata +194 -0
data/.document
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
gem 'nokogiri'
|
6
|
+
gem 'ms-core', ">= 0.0.12"
|
7
|
+
gem 'andand'
|
8
|
+
|
9
|
+
dev_gems = {
|
10
|
+
"spec-more" => ">= 0.0.4",
|
11
|
+
"bundler" => "~> 1.0.0",
|
12
|
+
"jeweler" => "~> 1.5.2",
|
13
|
+
"rcov" => ">= 0",
|
14
|
+
}
|
15
|
+
|
16
|
+
# Add dependencies to develop your gem here.
|
17
|
+
# Include everything needed to run rake, tests, features, etc.
|
18
|
+
group :development do
|
19
|
+
dev_gems.each do |name,version_string|
|
20
|
+
gem name, version_string
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Add dependencies to develop your gem here.
|
25
|
+
# Include everything needed to run rake, tests, features, etc.
|
26
|
+
group :development_large do
|
27
|
+
dev_gems.each do |name,version_string|
|
28
|
+
gem name, version_string
|
29
|
+
end
|
30
|
+
gem "ms-testdata", ">= 0.1.1"
|
31
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
andand (1.3.1)
|
5
|
+
bacon (1.1.0)
|
6
|
+
bio (1.4.1)
|
7
|
+
git (1.2.5)
|
8
|
+
jeweler (1.5.2)
|
9
|
+
bundler (~> 1.0.0)
|
10
|
+
git (>= 1.2.5)
|
11
|
+
rake
|
12
|
+
ms-core (0.0.12)
|
13
|
+
bio (>= 1.4.1)
|
14
|
+
ms-testdata (0.1.1)
|
15
|
+
nokogiri (1.4.4)
|
16
|
+
rake (0.8.7)
|
17
|
+
rcov (0.9.9)
|
18
|
+
spec-more (0.0.4)
|
19
|
+
bacon
|
20
|
+
|
21
|
+
PLATFORMS
|
22
|
+
ruby
|
23
|
+
|
24
|
+
DEPENDENCIES
|
25
|
+
andand
|
26
|
+
bundler (~> 1.0.0)
|
27
|
+
jeweler (~> 1.5.2)
|
28
|
+
ms-core (>= 0.0.12)
|
29
|
+
ms-testdata (>= 0.1.1)
|
30
|
+
nokogiri
|
31
|
+
rcov
|
32
|
+
spec-more (>= 0.0.4)
|
data/LICENSE
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
Work on pepxml is from the original mspire library, whose license is
|
2
|
+
duplicated here:
|
3
|
+
===============================================================================
|
4
|
+
Copyright (c) 2006, The University of Texas at Austin("U.T. Austin"). All
|
5
|
+
rights reserved.
|
6
|
+
|
7
|
+
Software by John T. Prince under the direction of Edward M. Marcotte.
|
8
|
+
|
9
|
+
By using this software the USER indicates that he or she has read, understood
|
10
|
+
and will comply with the following:
|
11
|
+
|
12
|
+
U. T. Austin hereby grants USER permission to use, copy, modify, merge,
|
13
|
+
publish, distribute, sublicense, and/or sell copies of this software and its
|
14
|
+
documentation for any purpose and without fee, provided that a full copy of
|
15
|
+
this notice is included with the software and its documentation.
|
16
|
+
|
17
|
+
Title to copyright this software and its associated documentation shall at all
|
18
|
+
times remain with U. T. Austin. No right is granted to use in advertising,
|
19
|
+
publicity or otherwise any trademark, service mark, or the name of U. T.
|
20
|
+
Austin.
|
21
|
+
|
22
|
+
This software and any associated documentation are provided "as is," and U. T.
|
23
|
+
AUSTIN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESSED OR IMPLIED, INCLUDING
|
24
|
+
THOSE OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT USE OF
|
25
|
+
THE SOFTWARE, MODIFICATIONS, OR ASSOCIATED DOCUMENTATION WILL NOT INFRINGE ANY
|
26
|
+
PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER INTELLECTUAL PROPERTY RIGHTS OF A
|
27
|
+
THIRD PARTY. U. T. Austin, The University of Texas System, its Regents,
|
28
|
+
officers, and employees shall not be liable under any circumstances for any
|
29
|
+
direct, indirect, special, incidental, or consequential damages with respect
|
30
|
+
to any claim by USER or any third party on account of or arising from the use,
|
31
|
+
or inability to use, this software or its associated documentation, even if U.
|
32
|
+
T. Austin has been advised of the possibility of those damages.
|
33
|
+
|
34
|
+
Submit software operation questions to: Edward M. Marcotte, Department of
|
35
|
+
Chemistry and Biochemistry, U. T. Austin, Austin, Texas 78712.
|
36
|
+
===============================================================================
|
37
|
+
|
38
|
+
The rest of the work (and modifications) are licensed here:
|
39
|
+
===============================================================================
|
40
|
+
Copyright (c) 2011 Brigham Young University
|
41
|
+
Author: John T. Prince
|
42
|
+
|
43
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
44
|
+
a copy of this software and associated documentation files (the
|
45
|
+
"Software"), to deal in the Software without restriction, including
|
46
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
47
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
48
|
+
permit persons to whom the Software is furnished to do so, subject to
|
49
|
+
the following conditions:
|
50
|
+
|
51
|
+
The above copyright notice and this permission notice shall be
|
52
|
+
included in all copies or substantial portions of the Software.
|
53
|
+
|
54
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
55
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
56
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
57
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
58
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
59
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
60
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
61
|
+
===============================================================================
|
data/README.rdoc
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
= ms-ident
|
2
|
+
|
3
|
+
Working with mass spectrometry based peptide/protein identifications. Includes support for building pepxml files.
|
4
|
+
|
5
|
+
Planned support for mzIdentML and reading pepxml.
|
6
|
+
|
7
|
+
== Synposis
|
8
|
+
|
9
|
+
=== Generating a pepxml file
|
10
|
+
|
11
|
+
This example shows a very block oriented way of constructing a pepxml.
|
12
|
+
Objects or empty data structures are passed into blocks for subcategories to
|
13
|
+
use. Because there are a lot of attributes to manage, most objects accept upon
|
14
|
+
initialization or later with 'merge!'
|
15
|
+
|
16
|
+
pepxml = Pepxml.new do |msms_pipeline_analysis|
|
17
|
+
msms_pipeline_analysis.merge!(:summary_xml => "020.xml") do |msms_run_summary|
|
18
|
+
# prep the sample enzyme and search_summary
|
19
|
+
msms_run_summary.merge!(
|
20
|
+
:base_name => '/home/jtprince/dev/mspire/020',
|
21
|
+
:ms_manufacturer => 'Thermo',
|
22
|
+
:ms_model => 'LTQ Orbitrap',
|
23
|
+
:ms_ionization => 'ESI',
|
24
|
+
:ms_mass_analyzer => 'Ion Trap',
|
25
|
+
:ms_detector => 'UNKNOWN'
|
26
|
+
) do |sample_enzyme, search_summary, spectrum_queries|
|
27
|
+
sample_enzyme.merge!(:name=>'Trypsin',:cut=>'KR',:no_cut=>'P',:sense=>'C')
|
28
|
+
search_summary.merge!(
|
29
|
+
:base_name=>'/path/to/file/020',
|
30
|
+
:search_engine => 'SEQUEST',
|
31
|
+
:precursor_mass_type =>'monoisotopic',
|
32
|
+
:fragment_mass_type => 'average'
|
33
|
+
) do |search_database, enzymatic_search_constraint, modifications, parameters|
|
34
|
+
search_database.merge!(:local_path => '/path/to/db.fasta', :seq_type => 'AA') # note seq_type == type
|
35
|
+
enzymatic_search_constraint.merge!(
|
36
|
+
:enzyme => 'Trypsin',
|
37
|
+
:max_num_internal_cleavages => 2,
|
38
|
+
:min_number_termini => 2
|
39
|
+
)
|
40
|
+
modifications << Pepxml::AminoacidModification.new(
|
41
|
+
:aminoacid => 'M', :massdiff => 15.9994, :mass => Ms::Mass::AA::MONO['M']+15.9994,
|
42
|
+
:variable => 'Y', :symbol => '*')
|
43
|
+
# invented, for example, a protein terminating mod
|
44
|
+
modifications << Pepxml::TerminalModification.new(
|
45
|
+
:terminus => 'c', :massdiff => 23.3333, :mass => Ms::Mass::MONO['oh'] + 23.3333,
|
46
|
+
:variable => 'Y', :symbol => '[', :protein_terminus => 'c',
|
47
|
+
:description => 'leave protein_terminus off if not protein mod'
|
48
|
+
)
|
49
|
+
modifications << Pepxml::TerminalModification.new(
|
50
|
+
:terminus => 'c', :massdiff => 25.42322, :mass => Ms::Mass::MONO['h+'] + 25.42322,
|
51
|
+
:variable => 'N', :symbol => ']', :description => 'example: c term mod'
|
52
|
+
)
|
53
|
+
parameters.merge!(
|
54
|
+
:fragment_ion_tolerance => 1.0000,
|
55
|
+
:digest_mass_range => '600.0 3500.0',
|
56
|
+
:enzyme_info => 'Trypsin(KR/P) 1 1 KR P', # etc....
|
57
|
+
)
|
58
|
+
end
|
59
|
+
spectrum_query1 = Pepxml::SpectrumQuery.new(
|
60
|
+
:spectrum => '020.3.3.1', :start_scan => 3, :end_scan => 3,
|
61
|
+
:precursor_neutral_mass => 1120.93743421875, :assumed_charge => 1
|
62
|
+
) do |search_results|
|
63
|
+
search_result1 = Pepxml::SearchResult.new do |search_hits|
|
64
|
+
modpositions = [[1, 243.1559], [6, 167.0581], [7,181.085]].map do |pair|
|
65
|
+
Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(*pair)
|
66
|
+
end
|
67
|
+
# order(modified_peptide, mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
|
68
|
+
# or can be set by hash
|
69
|
+
mod_info = Pepxml::SearchHit::ModificationInfo.new('Y#RLGGS#T#K', modpositions)
|
70
|
+
search_hit1 = Pepxml::SearchHit.new(
|
71
|
+
:hit_rank=>1, :peptide=>'YRLGGSTK', :peptide_prev_aa => "R", :peptide_next_aa => "K",
|
72
|
+
:protein => "gi|16130113|ref|NP_416680.1|", :num_tot_proteins => 1, :num_matched_ions => 5,
|
73
|
+
:tot_num_ions => 35, :calc_neutral_pep_mass => 1120.93163442, :massdiff => 0.00579979875010395,
|
74
|
+
:num_tol_term => 2, :num_missed_cleavages => 1, :is_rejected => 0,
|
75
|
+
:modification_info => mod_info) do |search_scores|
|
76
|
+
search_scores.merge!(:xcorr => 0.12346, :deltacn => 0.7959, :deltacnstar => 0,
|
77
|
+
:spscore => 29.85, :sprank => 1)
|
78
|
+
end
|
79
|
+
search_hits << search_hit1
|
80
|
+
end
|
81
|
+
search_results << search_result1
|
82
|
+
end
|
83
|
+
spectrum_queries << spectrum_query1
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
puts pepxml.to_xml
|
88
|
+
|
89
|
+
The block is optional in initalization or with merge! You can just as easily
|
90
|
+
set the needed attributes directly.
|
91
|
+
|
92
|
+
msms_run_summary.new(:search_summary => my_search_summary, :spectrum_queries => spec_queries)
|
93
|
+
msms_run_summary.sample_enzyme = sample_enzyme_object
|
94
|
+
|
95
|
+
== Copyright
|
96
|
+
|
97
|
+
see LICENSE
|
data/Rakefile
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'rake'
|
11
|
+
|
12
|
+
require 'jeweler'
|
13
|
+
Jeweler::Tasks.new do |gem|
|
14
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
15
|
+
gem.name = "ms-ident"
|
16
|
+
gem.homepage = "http://github.com/jtprince/ms-ident"
|
17
|
+
gem.license = "MIT"
|
18
|
+
gem.summary = %Q{mspire library for working with mzIdentML and pepxml}
|
19
|
+
gem.description = %Q{mspire library for working with mzIdentML and pepxml}
|
20
|
+
gem.email = "jtprince@gmail.com"
|
21
|
+
gem.authors = ["John T. Prince"]
|
22
|
+
gem.rubyforge_project = 'mspire'
|
23
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
24
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
25
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
26
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
27
|
+
end
|
28
|
+
Jeweler::RubygemsDotOrgTasks.new
|
29
|
+
|
30
|
+
require 'rake/testtask'
|
31
|
+
Rake::TestTask.new(:spec) do |spec|
|
32
|
+
spec.libs << 'lib' << 'spec'
|
33
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
34
|
+
spec.verbose = true
|
35
|
+
end
|
36
|
+
|
37
|
+
require 'rcov/rcovtask'
|
38
|
+
Rcov::RcovTask.new do |spec|
|
39
|
+
spec.libs << 'spec'
|
40
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
41
|
+
spec.verbose = true
|
42
|
+
end
|
43
|
+
|
44
|
+
task :default => :spec
|
45
|
+
|
46
|
+
require 'rake/rdoctask'
|
47
|
+
Rake::RDocTask.new do |rdoc|
|
48
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
49
|
+
|
50
|
+
rdoc.rdoc_dir = 'rdoc'
|
51
|
+
rdoc.title = "ms-ident #{version}"
|
52
|
+
rdoc.rdoc_files.include('README*')
|
53
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
54
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.2
|
data/lib/merge.rb
ADDED
@@ -0,0 +1,237 @@
|
|
1
|
+
require 'ms/ident/pepxml/modifications'
|
2
|
+
require 'ms/ident/pepxml/search_hit/modification_info'
|
3
|
+
|
4
|
+
module Ms ; end
|
5
|
+
module Ms::Ident ; end
|
6
|
+
class Ms::Ident::Pepxml ; end
|
7
|
+
|
8
|
+
module Ms::Ident::Pepxml::Modifications
|
9
|
+
# Handles modifications for sequest style searches
|
10
|
+
class Sequest
|
11
|
+
include Ms::Ident::Pepxml::Modifications
|
12
|
+
|
13
|
+
# a hash of all differential modifications present by aa_one_letter_symbol
|
14
|
+
# and special_symbol. This is NOT the mass difference but the total mass {
|
15
|
+
# 'M*' => 155.5, 'S@' => 190.3 }. NOTE: Since the termini are dependent on
|
16
|
+
# the amino acid sequence, they are give the *differential* mass. The
|
17
|
+
# termini are given the special symbol as in sequest e.g. '[' => 12.22, #
|
18
|
+
# cterminus ']' => 14.55 # nterminus
|
19
|
+
attr_accessor :masses_by_diff_mod_hash
|
20
|
+
# a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
|
21
|
+
# values are the special_symbols
|
22
|
+
attr_accessor :mod_symbols_hash
|
23
|
+
|
24
|
+
# sequest params object
|
25
|
+
attr_accessor :params
|
26
|
+
|
27
|
+
|
28
|
+
# The modification symbols string looks like this:
|
29
|
+
# (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
|
30
|
+
# ct is cterminal peptide (differential)
|
31
|
+
# nt is nterminal peptide (differential)
|
32
|
+
# the C is just cysteine
|
33
|
+
# will set_modifications and masses_by_diff_mod hash
|
34
|
+
def initialize(params=nil, modification_symbols_string='')
|
35
|
+
@params = params
|
36
|
+
if @params
|
37
|
+
set_modifications(params, modification_symbols_string)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# set the masses_by_diff_mod and mod_symbols_hash from
|
42
|
+
def set_hashes(modification_symbols_string)
|
43
|
+
|
44
|
+
@mod_symbols_hash = {}
|
45
|
+
@masses_by_diff_mod = {}
|
46
|
+
if (modification_symbols_string == nil || modification_symbols_string == '')
|
47
|
+
return nil
|
48
|
+
end
|
49
|
+
table = @params.mass_table
|
50
|
+
modification_symbols_string.split(/\)\s+\(/).each do |mod|
|
51
|
+
if md = mod.match(/\(?(\w+)(.) (.[\d\.]+)\)?/)
|
52
|
+
if md[1] == 'ct' || md[1] == 'nt'
|
53
|
+
mass_diff = md[3].to_f
|
54
|
+
@masses_by_diff_mod[md[2]] = mass_diff
|
55
|
+
@mod_symbols_hash[[md[1].to_sym, mass_diff]] = md[2].dup
|
56
|
+
else
|
57
|
+
symbol_string = md[2].dup
|
58
|
+
mass_diff = md[3].to_f
|
59
|
+
md[1].split('').each do |aa|
|
60
|
+
aa_as_sym = aa.to_sym
|
61
|
+
@masses_by_diff_mod[aa+symbol_string] = mass_diff + table[aa_as_sym]
|
62
|
+
@mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# given a bare peptide (no end pieces) returns a ModificationInfo object
|
70
|
+
# e.g. given "]PEPT*IDE", NOT 'K.PEPTIDE.R'
|
71
|
+
# if there are no modifications, returns nil
|
72
|
+
def modification_info(peptide)
|
73
|
+
if @masses_by_diff_mod.size == 0
|
74
|
+
return nil
|
75
|
+
end
|
76
|
+
hash = {}
|
77
|
+
hash[:modified_peptide] = peptide.dup
|
78
|
+
hsh = @masses_by_diff_mod
|
79
|
+
table = @params.mass_table
|
80
|
+
h = table[:h] # this? or h_plus ??
|
81
|
+
oh = table[:o] + h
|
82
|
+
## only the termini can match a single char
|
83
|
+
if hsh.key? peptide[0,1]
|
84
|
+
# AA + H + differential_mod
|
85
|
+
hash[:mod_nterm_mass] = table[peptide[1,1].to_sym] + h + hsh[peptide[0,1]]
|
86
|
+
peptide = peptide[1...(peptide.size)]
|
87
|
+
end
|
88
|
+
if hsh.key? peptide[(peptide.size-1),1]
|
89
|
+
# AA + OH + differential_mod
|
90
|
+
hash[:mod_cterm_mass] = table[peptide[(peptide.size-2),1].to_sym] + oh + hsh[peptide[-1,1]]
|
91
|
+
peptide.slice!( 0..-2 )
|
92
|
+
peptide = peptide[0...(peptide.size-1)]
|
93
|
+
end
|
94
|
+
mod_array = []
|
95
|
+
(0...peptide.size).each do |i|
|
96
|
+
if hsh.key? peptide[i,2]
|
97
|
+
mod_array << Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new([ i+1 , hsh[peptide[i,2]] ])
|
98
|
+
end
|
99
|
+
end
|
100
|
+
if mod_array.size > 0
|
101
|
+
hash[:mod_aminoacid_masses] = mod_array
|
102
|
+
end
|
103
|
+
if hash.size > 1 # if there is more than just the modified peptide there
|
104
|
+
Ms::Ident::Pepxml::SearchHit::ModificationInfo.new(hash)
|
105
|
+
#Ms::Ident::Pepxml::SearchHit::ModificationInfo.new(hash.values_at(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
|
106
|
+
else
|
107
|
+
nil
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# returns an array of static mod objects and static terminal mod objects
|
112
|
+
def create_static_mods(params)
|
113
|
+
|
114
|
+
####################################
|
115
|
+
## static mods
|
116
|
+
####################################
|
117
|
+
|
118
|
+
static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
|
119
|
+
static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
|
120
|
+
|
121
|
+
params.mods.each do |k,v|
|
122
|
+
v_to_f = v.to_f
|
123
|
+
if v_to_f != 0.0
|
124
|
+
if k =~ /add_(\w)_/
|
125
|
+
static_mods << [$1.to_sym, v_to_f]
|
126
|
+
else
|
127
|
+
static_terminal_mods << [k, v_to_f]
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
aa_hash = params.mass_table
|
132
|
+
|
133
|
+
## Create the static_mods objects
|
134
|
+
static_mods.map! do |mod|
|
135
|
+
hash = {
|
136
|
+
:aminoacid => mod[0].to_s,
|
137
|
+
:massdiff => mod[1],
|
138
|
+
:mass => aa_hash[mod[0]] + mod[1],
|
139
|
+
:variable => 'N',
|
140
|
+
:binary => 'Y',
|
141
|
+
}
|
142
|
+
Ms::Ident::Pepxml::AminoacidModification.new(hash)
|
143
|
+
end
|
144
|
+
|
145
|
+
## Create the static_terminal_mods objects
|
146
|
+
static_terminal_mods.map! do |mod|
|
147
|
+
terminus = if mod[0] =~ /Cterm/ ; 'c'
|
148
|
+
else ; 'n' # only two possible termini
|
149
|
+
end
|
150
|
+
protein_terminus = case mod[0]
|
151
|
+
when /Nterm_protein/ ; 'n'
|
152
|
+
when /Cterm_protein/ ; 'c'
|
153
|
+
else nil
|
154
|
+
end
|
155
|
+
|
156
|
+
# create the hash
|
157
|
+
hash = {
|
158
|
+
:terminus => terminus,
|
159
|
+
:massdiff => mod[1],
|
160
|
+
:variable => 'N',
|
161
|
+
:description => mod[0],
|
162
|
+
}
|
163
|
+
hash[:protein_terminus] = protein_terminus if protein_terminus
|
164
|
+
Ms::Ident::Pepxml::TerminalModification.new(hash)
|
165
|
+
end
|
166
|
+
[static_mods, static_terminal_mods]
|
167
|
+
end
|
168
|
+
|
169
|
+
# 1. sets aminoacid_modifications and terminal_modifications from a sequest params object
|
170
|
+
# 2. sets @params
|
171
|
+
# 3. sets @masses_by_diff_mod
|
172
|
+
def set_modifications(params, modification_symbols_string)
|
173
|
+
@params = params
|
174
|
+
|
175
|
+
set_hashes(modification_symbols_string)
|
176
|
+
(static_mods, static_terminal_mods) = create_static_mods(params)
|
177
|
+
|
178
|
+
aa_hash = params.mass_table
|
179
|
+
#################################
|
180
|
+
# Variable Mods:
|
181
|
+
#################################
|
182
|
+
arr = params.diff_search_options.rstrip.split(/\s+/)
|
183
|
+
# [aa.to_sym, diff.to_f]
|
184
|
+
variable_mods = []
|
185
|
+
(0...arr.size).step(2) do |i|
|
186
|
+
if arr[i].to_f != 0.0
|
187
|
+
variable_mods << [arr[i+1], arr[i].to_f]
|
188
|
+
end
|
189
|
+
end
|
190
|
+
mod_objects = []
|
191
|
+
variable_mods.each do |mod|
|
192
|
+
mod[0].split('').each do |aa|
|
193
|
+
hash = {
|
194
|
+
|
195
|
+
:aminoacid => aa,
|
196
|
+
:massdiff => mod[1],
|
197
|
+
:mass => aa_hash[aa.to_sym] + mod[1],
|
198
|
+
:variable => 'Y',
|
199
|
+
:binary => 'N',
|
200
|
+
:symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
|
201
|
+
}
|
202
|
+
mod_objects << Ms::Ident::Pepxml::AminoacidModification.new(hash)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
variable_mods = mod_objects
|
206
|
+
#################################
|
207
|
+
# TERMINAL Variable Mods:
|
208
|
+
#################################
|
209
|
+
# These are always peptide, not protein termini (for sequest)
|
210
|
+
(nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
|
211
|
+
|
212
|
+
to_add = []
|
213
|
+
if nterm_diff != 0.0
|
214
|
+
to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
|
215
|
+
end
|
216
|
+
if cterm_diff != 0.0
|
217
|
+
to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
|
218
|
+
end
|
219
|
+
|
220
|
+
variable_terminal_mods = to_add.map do |term, mssdiff, symb|
|
221
|
+
hash = {
|
222
|
+
:terminus => term,
|
223
|
+
:massdiff => mssdiff,
|
224
|
+
:variable => 'Y',
|
225
|
+
:symbol => symb,
|
226
|
+
}
|
227
|
+
Ms::Ident::Pepxml::TerminalModification.new(hash)
|
228
|
+
end
|
229
|
+
|
230
|
+
#########################
|
231
|
+
# COLLECT THEM
|
232
|
+
#########################
|
233
|
+
@aminoacid_modifications = static_mods + variable_mods
|
234
|
+
@terminal_modifications = static_terminal_mods + variable_terminal_mods
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'merge'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module Ms ; end
|
5
|
+
module Ms::Ident ; end
|
6
|
+
class Ms::Ident::Pepxml ; end
|
7
|
+
|
8
|
+
# holds a list of AminoacidModification and TerminalModification objects.
|
9
|
+
class Ms::Ident::Pepxml::Modifications < Array
|
10
|
+
## Generates the pepxml for static and differential amino acid mods based on
|
11
|
+
## sequest object
|
12
|
+
def to_xml(builder=nil)
|
13
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
14
|
+
self.each {|mod| mod.to_xml(xmlb) }
|
15
|
+
builder || xmlb.doc.root.to_xml
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Modified aminoacid, static or variable
|
20
|
+
# unless otherwise stated, all attributes can be anything
|
21
|
+
class Ms::Ident::Pepxml::AminoacidModification
|
22
|
+
include Merge
|
23
|
+
# The amino acid (one letter code)
|
24
|
+
attr_accessor :aminoacid
|
25
|
+
# Mass difference with respect to unmodified aminoacid, as a Float
|
26
|
+
attr_accessor :massdiff
|
27
|
+
# Mass of modified aminoacid, Float
|
28
|
+
attr_accessor :mass
|
29
|
+
# Y if both modified and unmodified aminoacid could be present in the
|
30
|
+
# dataset, N if only modified aminoacid can be present
|
31
|
+
attr_accessor :variable
|
32
|
+
# whether modification can reside only at protein terminus (specified 'n',
|
33
|
+
# 'c', or 'nc')
|
34
|
+
attr_accessor :peptide_terminus
|
35
|
+
# Symbol used by search engine to designate this modification
|
36
|
+
attr_accessor :symbol
|
37
|
+
# 'Y' if each peptide must have only modified or unmodified aminoacid, 'N' if a
|
38
|
+
# peptide may contain both modified and unmodified aminoacid
|
39
|
+
attr_accessor :binary
|
40
|
+
|
41
|
+
def initialize(hash={})
|
42
|
+
merge!(hash)
|
43
|
+
end
|
44
|
+
|
45
|
+
# returns the builder or an xml string if no builder supplied
|
46
|
+
def to_xml(builder=nil)
|
47
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
48
|
+
# note massdiff: must begin with either + (nonnegative) or - [e.g.
|
49
|
+
# +1.05446 or -2.3342] consider Numeric#to_plus_minus_string in
|
50
|
+
# Ms::Ident::Pepxml
|
51
|
+
attrs = [:aminoacid, :massdiff, :mass, :variable, :peptide_terminus, :symbol, :binary].map {|at| v=send(at) ; [at,v] if v }.compact
|
52
|
+
hash = Hash[attrs]
|
53
|
+
hash[:massdiff] = hash[:massdiff].to_plus_minus_string
|
54
|
+
xmlb.aminoacid_modification(hash)
|
55
|
+
builder || xmlb.doc.root.to_xml
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Modified aminoacid, static or variable
|
60
|
+
class Ms::Ident::Pepxml::TerminalModification
|
61
|
+
include Merge
|
62
|
+
# n for N-terminus, c for C-terminus
|
63
|
+
attr_accessor :terminus
|
64
|
+
# Mass difference with respect to unmodified terminus
|
65
|
+
attr_accessor :massdiff
|
66
|
+
# Mass of modified terminus
|
67
|
+
attr_accessor :mass
|
68
|
+
# Y if both modified and unmodified terminus could be present in the
|
69
|
+
# dataset, N if only modified terminus can be present
|
70
|
+
attr_accessor :variable
|
71
|
+
# MSial symbol used by search engine to designate this modification
|
72
|
+
attr_accessor :symbol
|
73
|
+
# whether modification can reside only at protein terminus (specified n or
|
74
|
+
# c)
|
75
|
+
attr_accessor :protein_terminus
|
76
|
+
attr_accessor :description
|
77
|
+
|
78
|
+
def initialize(hash={})
|
79
|
+
hash.each {|k,v| send("#{k}=", v) }
|
80
|
+
end
|
81
|
+
|
82
|
+
# returns the builder or an xml string if no builder supplied
|
83
|
+
def to_xml(builder=nil)
|
84
|
+
xmlb = builder || Nokogiri::XML::Builder.new
|
85
|
+
#short_element_xml_from_instance_vars("terminal_modification")
|
86
|
+
attrs = [:terminus, :massdiff, :mass, :variable, :protein_terminus, :description].map {|at| v=send(at) ; [at,v] if v }
|
87
|
+
hash = Hash[attrs]
|
88
|
+
hash[:massdiff] = hash[:massdiff].to_plus_minus_string
|
89
|
+
xmlb.terminal_modification(hash)
|
90
|
+
builder || xmlb.doc.root.to_xml
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
|