ms-sequest 0.0.17 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +26 -10
- data/Gemfile +4 -1
- data/Gemfile.lock +17 -2
- data/VERSION +1 -1
- data/bin/srf_to_pepxml.rb +7 -0
- data/bin/srf_to_search.rb +1 -1
- data/lib/ms/sequest/bioworks.rb +2 -2
- data/lib/ms/sequest/params.rb +0 -20
- data/lib/ms/sequest/pepxml.rb +7 -245
- data/lib/ms/sequest/pepxml/modifications.rb +247 -0
- data/lib/ms/sequest/pepxml/params.rb +32 -0
- data/lib/ms/sequest/sqt.rb +17 -17
- data/lib/ms/sequest/srf.rb +64 -54
- data/lib/ms/sequest/srf/pepxml.rb +316 -0
- data/lib/ms/sequest/srf/pepxml/sequest.rb +21 -0
- data/lib/ms/sequest/srf/sqt.rb +1 -1
- data/spec/ms/sequest/bioworks_spec.rb +11 -11
- data/spec/ms/sequest/pepxml/modifications_spec.rb +50 -0
- data/spec/ms/sequest/pepxml_spec.rb +0 -65
- data/spec/ms/sequest/srf/pepxml_spec.rb +84 -0
- data/spec/ms/sequest/srf_spec.rb +3 -3
- data/spec/ms/sequest/srf_spec_helper.rb +2 -2
- data/spec/spec_helper.rb +17 -18
- metadata +73 -19
data/.autotest
CHANGED
@@ -1,14 +1,30 @@
|
|
1
1
|
# -*- ruby -*-
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
require 'rubygems'
|
4
|
+
require 'spec/more'
|
5
|
+
require 'autotest/bacon'
|
6
|
+
#require 'redgreen/autotest'
|
6
7
|
|
7
|
-
Autotest
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
}
|
8
|
+
class Autotest::Bacon < Autotest
|
9
|
+
undef make_test_cmd
|
10
|
+
def make_test_cmd(files_to_test)
|
11
|
+
args = files_to_test.keys.flatten.join(' ')
|
12
|
+
args = '-a' if args.empty?
|
13
|
+
# TODO : make regex to pass to -n using values
|
14
|
+
"#{ruby} -S bacon -I#{libs} -o TestUnit #{args}"
|
15
|
+
end
|
14
16
|
end
|
17
|
+
|
18
|
+
|
19
|
+
#Autotest.add_hook :initialize do |at|
|
20
|
+
# at.clear_mappings
|
21
|
+
#end
|
22
|
+
|
23
|
+
#Autotest.add_hook :initialize do |at|
|
24
|
+
#at.add_mapping(%r%^lib/(.*)\.rb$%) { |_, m|
|
25
|
+
# #["spec/#{m[1]}_spec.rb"]
|
26
|
+
# #["test/#{m[1]}_test.rb"]
|
27
|
+
# ## for both specs and tests:
|
28
|
+
# ["spec/#{m[1]}_spec.rb"]
|
29
|
+
#}
|
30
|
+
#end
|
data/Gemfile
CHANGED
@@ -2,8 +2,11 @@ source "http://rubygems.org"
|
|
2
2
|
# Add dependencies required to use your gem here.
|
3
3
|
# Example:
|
4
4
|
# gem "activesupport", ">= 2.3.5"
|
5
|
-
gem "ms-
|
5
|
+
gem "ms-ident", ">= 0.0.17"
|
6
|
+
gem "ms-core", ">= 0.0.14"
|
6
7
|
gem "arrayclass", ">= 0.1.0"
|
8
|
+
gem "ms-msrun", ">= 0.3.3"
|
9
|
+
gem "trollop", "~> 1.16"
|
7
10
|
|
8
11
|
# Add dependencies to develop your gem here.
|
9
12
|
# Include everything needed to run rake, tests, features, etc.
|
data/Gemfile.lock
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
+
andand (1.3.1)
|
4
5
|
arrayclass (0.1.1)
|
5
6
|
bacon (1.1.0)
|
6
7
|
bio (1.4.1)
|
@@ -9,13 +10,24 @@ GEM
|
|
9
10
|
bundler (~> 1.0.0)
|
10
11
|
git (>= 1.2.5)
|
11
12
|
rake
|
12
|
-
ms-core (0.0.
|
13
|
+
ms-core (0.0.14)
|
13
14
|
bio (>= 1.4.1)
|
15
|
+
ms-ident (0.0.17)
|
16
|
+
andand
|
17
|
+
ms-core (>= 0.0.12)
|
18
|
+
nokogiri
|
19
|
+
ms-msrun (0.3.3)
|
20
|
+
ms-core (>= 0.0.3)
|
21
|
+
narray
|
22
|
+
nokogiri
|
14
23
|
ms-testdata (0.1.1)
|
24
|
+
narray (0.5.9.9)
|
25
|
+
nokogiri (1.4.4)
|
15
26
|
rake (0.8.7)
|
16
27
|
rcov (0.9.9)
|
17
28
|
spec-more (0.0.4)
|
18
29
|
bacon
|
30
|
+
trollop (1.16.2)
|
19
31
|
|
20
32
|
PLATFORMS
|
21
33
|
ruby
|
@@ -24,7 +36,10 @@ DEPENDENCIES
|
|
24
36
|
arrayclass (>= 0.1.0)
|
25
37
|
bundler (~> 1.0.0)
|
26
38
|
jeweler (~> 1.5.2)
|
27
|
-
ms-core (>= 0.0.
|
39
|
+
ms-core (>= 0.0.14)
|
40
|
+
ms-ident (>= 0.0.17)
|
41
|
+
ms-msrun (>= 0.3.3)
|
28
42
|
ms-testdata (>= 0.1.1)
|
29
43
|
rcov
|
30
44
|
spec-more
|
45
|
+
trollop (~> 1.16)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.18
|
data/bin/srf_to_search.rb
CHANGED
data/lib/ms/sequest/bioworks.rb
CHANGED
@@ -27,7 +27,7 @@ class Bioworks
|
|
27
27
|
@@origfilepath_re = /<origfilepath>(.*)<\/origfilepath>/o
|
28
28
|
|
29
29
|
|
30
|
-
attr_accessor :peps, :
|
30
|
+
attr_accessor :peps, :proteins, :version, :global_filename, :origfilename, :origfilepath
|
31
31
|
# a string of modifications e.g., "(M* +15.99491) (S@ +14.9322) "
|
32
32
|
attr_accessor :modifications
|
33
33
|
|
@@ -42,7 +42,7 @@ class Bioworks
|
|
42
42
|
|
43
43
|
# returns the number of prots. Raises an Exception if open and closing xml
|
44
44
|
# tags don't agree
|
45
|
-
def
|
45
|
+
def num_proteins(file)
|
46
46
|
re = /(<protein>)|(<\/protein>)/mo
|
47
47
|
begin_tags = 0
|
48
48
|
end_tags = 0
|
data/lib/ms/sequest/params.rb
CHANGED
@@ -269,26 +269,6 @@ class Ms::Sequest::Params
|
|
269
269
|
return "1"
|
270
270
|
end
|
271
271
|
|
272
|
-
## returns a SampleEnzyme object
|
273
|
-
#def sample_enzyme
|
274
|
-
# (offset, cleave_at, except_if_after) = enzyme_specificity.map do |v|
|
275
|
-
# if v == '' ; nil ; else v end
|
276
|
-
# end
|
277
|
-
# SampleEnzyme.new do |se|
|
278
|
-
# se.name = self.enzyme
|
279
|
-
# se.cut = cleave_at
|
280
|
-
# se.no_cut = except_if_after
|
281
|
-
# se.sense =
|
282
|
-
# if se.name == "No_Enzyme"
|
283
|
-
# nil
|
284
|
-
# elsif offset == 1
|
285
|
-
# 'C'
|
286
|
-
# elsif offset == 0
|
287
|
-
# 'N'
|
288
|
-
# end
|
289
|
-
# end
|
290
|
-
#end
|
291
|
-
|
292
272
|
# returns the enzyme name (but no parentheses connected with the name).
|
293
273
|
# this will likely be capitalized.
|
294
274
|
def enzyme
|
data/lib/ms/sequest/pepxml.rb
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
require 'ms/sequest/pepxml/params'
|
2
|
+
require 'ms/sequest/pepxml/modifications'
|
3
|
+
|
4
|
+
|
5
|
+
=begin
|
1
6
|
|
2
7
|
require 'sample_enzyme'
|
3
8
|
require 'ms/parser/mzxml'
|
@@ -861,251 +866,6 @@ class Sequest::PepXML::Parameters
|
|
861
866
|
end
|
862
867
|
end
|
863
868
|
|
864
|
-
class Sequest::PepXML::Modifications
|
865
|
-
include SpecIDXML
|
866
|
-
|
867
|
-
# sequest params object
|
868
|
-
attr_accessor :params
|
869
|
-
# array holding AAModifications
|
870
|
-
attr_accessor :aa_mods
|
871
|
-
# array holding TerminalModifications
|
872
|
-
attr_accessor :term_mods
|
873
|
-
# a hash of all differential modifications present by aa_one_letter_symbol
|
874
|
-
# and special_symbol. This is NOT the mass difference but the total mass {
|
875
|
-
# 'M*' => 155.5, 'S@' => 190.3 }. NOTE: Since the termini are dependent on
|
876
|
-
# the amino acid sequence, they are give the *differential* mass. The
|
877
|
-
# termini are given the special symbol as in sequest e.g. '[' => 12.22, #
|
878
|
-
# cterminus ']' => 14.55 # nterminus
|
879
|
-
attr_accessor :masses_by_diff_mod_hash
|
880
|
-
# a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
|
881
|
-
# values are the special_symbols
|
882
|
-
attr_accessor :mod_symbols_hash
|
883
|
-
|
884
|
-
# The modification symbols string looks like this:
|
885
|
-
# (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
|
886
|
-
# ct is cterminal peptide (differential)
|
887
|
-
# nt is nterminal peptide (differential)
|
888
|
-
# the C is just cysteine
|
889
|
-
# will set_modifications and masses_by_diff_mod hash
|
890
|
-
def initialize(params=nil, modification_symbols_string='')
|
891
|
-
@params = params
|
892
|
-
if @params
|
893
|
-
set_modifications(params, modification_symbols_string)
|
894
|
-
end
|
895
|
-
end
|
896
|
-
|
897
|
-
# set the masses_by_diff_mod and mod_symbols_hash from
|
898
|
-
def set_hashes(modification_symbols_string)
|
899
|
-
|
900
|
-
@mod_symbols_hash = {}
|
901
|
-
@masses_by_diff_mod = {}
|
902
|
-
if (modification_symbols_string == nil || modification_symbols_string == '')
|
903
|
-
return nil
|
904
|
-
end
|
905
|
-
table = @params.mass_table
|
906
|
-
modification_symbols_string.split(/\)\s+\(/).each do |mod|
|
907
|
-
if mod =~ /\(?(\w+)(.) (.[\d\.]+)\)?/
|
908
|
-
if $1 == 'ct' || $1 == 'nt'
|
909
|
-
mass_diff = $3.to_f
|
910
|
-
@masses_by_diff_mod[$2] = mass_diff
|
911
|
-
@mod_symbols_hash[[$1.to_sym, mass_diff]] = $2.dup
|
912
|
-
# changed from below to match tests, is this right?
|
913
|
-
# @mod_symbols_hash[[$1, mass_diff]] = $2.dup
|
914
|
-
else
|
915
|
-
symbol_string = $2.dup
|
916
|
-
mass_diff = $3.to_f
|
917
|
-
$1.split('').each do |aa|
|
918
|
-
aa_as_sym = aa.to_sym
|
919
|
-
@masses_by_diff_mod[aa+symbol_string] = mass_diff + table[aa_as_sym]
|
920
|
-
@mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
|
921
|
-
end
|
922
|
-
end
|
923
|
-
end
|
924
|
-
end
|
925
|
-
end
|
926
|
-
|
927
|
-
# given a bare peptide (no end pieces) returns a ModificationInfo object
|
928
|
-
# e.g. given "]PEPT*IDE", NOT 'K.PEPTIDE.R'
|
929
|
-
# if there are no modifications, returns nil
|
930
|
-
def modification_info(peptide)
|
931
|
-
if @masses_by_diff_mod.size == 0
|
932
|
-
return nil
|
933
|
-
end
|
934
|
-
hash = {}
|
935
|
-
hash[:modified_peptide] = peptide.dup
|
936
|
-
hsh = @masses_by_diff_mod
|
937
|
-
table = @params.mass_table
|
938
|
-
h = table[:h] # this? or h_plus ??
|
939
|
-
oh = table[:o] + h
|
940
|
-
## only the termini can match a single char
|
941
|
-
if hsh.key? peptide[0,1]
|
942
|
-
# AA + H + differential_mod
|
943
|
-
hash[:mod_nterm_mass] = table[peptide[1,1].to_sym] + h + hsh[peptide[0,1]]
|
944
|
-
peptide = peptide[1...(peptide.size)]
|
945
|
-
end
|
946
|
-
if hsh.key? peptide[(peptide.size-1),1]
|
947
|
-
# AA + OH + differential_mod
|
948
|
-
hash[:mod_cterm_mass] = table[peptide[(peptide.size-2),1].to_sym] + oh + hsh[peptide[-1,1]]
|
949
|
-
peptide.slice!( 0..-2 )
|
950
|
-
peptide = peptide[0...(peptide.size-1)]
|
951
|
-
end
|
952
|
-
mod_array = []
|
953
|
-
(0...peptide.size).each do |i|
|
954
|
-
if hsh.key? peptide[i,2]
|
955
|
-
mod_array << Sequest::PepXML::SearchHit::ModificationInfo::ModAminoacidMass.new([ i+1 , hsh[peptide[i,2]] ])
|
956
|
-
end
|
957
|
-
end
|
958
|
-
if mod_array.size > 0
|
959
|
-
hash[:mod_aminoacid_masses] = mod_array
|
960
|
-
end
|
961
|
-
if hash.size > 1 # if there is more than just the modified peptide there
|
962
|
-
Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
|
963
|
-
#Sequest::PepXML::SearchHit::ModificationInfo.new(hash.values_at(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
|
964
|
-
else
|
965
|
-
nil
|
966
|
-
end
|
967
|
-
end
|
968
|
-
|
969
|
-
# returns an array of static mod objects and static terminal mod objects
|
970
|
-
def create_static_mods(params)
|
971
|
-
|
972
|
-
####################################
|
973
|
-
## static mods
|
974
|
-
####################################
|
975
|
-
|
976
|
-
static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
|
977
|
-
static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
|
978
|
-
|
979
|
-
params.mods.each do |k,v|
|
980
|
-
v_to_f = v.to_f
|
981
|
-
if v_to_f != 0.0
|
982
|
-
if k =~ /add_(\w)_/
|
983
|
-
static_mods << [$1.to_sym, v_to_f]
|
984
|
-
else
|
985
|
-
static_terminal_mods << [k, v_to_f]
|
986
|
-
end
|
987
|
-
end
|
988
|
-
end
|
989
|
-
aa_hash = params.mass_table
|
990
|
-
|
991
|
-
## Create the static_mods objects
|
992
|
-
static_mods.map! do |mod|
|
993
|
-
hash = {
|
994
|
-
:aminoacid => mod[0].to_s,
|
995
|
-
:massdiff => mod[1],
|
996
|
-
:mass => aa_hash[mod[0]] + mod[1],
|
997
|
-
:variable => 'N',
|
998
|
-
:binary => 'Y',
|
999
|
-
}
|
1000
|
-
Sequest::PepXML::AAModification.new(hash)
|
1001
|
-
end
|
1002
|
-
|
1003
|
-
## Create the static_terminal_mods objects
|
1004
|
-
static_terminal_mods.map! do |mod|
|
1005
|
-
terminus = if mod[0] =~ /Cterm/ ; 'c'
|
1006
|
-
else ; 'n' # only two possible termini
|
1007
|
-
end
|
1008
|
-
protein_terminus = case mod[0]
|
1009
|
-
when /Nterm_protein/ ; 'n'
|
1010
|
-
when /Cterm_protein/ ; 'c'
|
1011
|
-
else nil
|
1012
|
-
end
|
1013
|
-
|
1014
|
-
# create the hash
|
1015
|
-
hash = {
|
1016
|
-
:terminus => terminus,
|
1017
|
-
:massdiff => mod[1],
|
1018
|
-
:variable => 'N',
|
1019
|
-
:description => mod[0],
|
1020
|
-
}
|
1021
|
-
hash[:protein_terminus] = protein_terminus if protein_terminus
|
1022
|
-
Sequest::PepXML::TerminalModification.new(hash)
|
1023
|
-
end
|
1024
|
-
[static_mods, static_terminal_mods]
|
1025
|
-
end
|
1026
|
-
|
1027
|
-
# 1. sets aa_mods and term_mods from a sequest params object
|
1028
|
-
# 2. sets @params
|
1029
|
-
# 3. sets @masses_by_diff_mod
|
1030
|
-
def set_modifications(params, modification_symbols_string)
|
1031
|
-
@params = params
|
1032
|
-
|
1033
|
-
set_hashes(modification_symbols_string)
|
1034
|
-
(static_mods, static_terminal_mods) = create_static_mods(params)
|
1035
|
-
|
1036
|
-
aa_hash = params.mass_table
|
1037
|
-
#################################
|
1038
|
-
# Variable Mods:
|
1039
|
-
#################################
|
1040
|
-
arr = params.diff_search_options.rstrip.split(/\s+/)
|
1041
|
-
# [aa.to_sym, diff.to_f]
|
1042
|
-
variable_mods = []
|
1043
|
-
(0...arr.size).step(2) do |i|
|
1044
|
-
if arr[i].to_f != 0.0
|
1045
|
-
variable_mods << [arr[i+1], arr[i].to_f]
|
1046
|
-
end
|
1047
|
-
end
|
1048
|
-
mod_objects = []
|
1049
|
-
variable_mods.each do |mod|
|
1050
|
-
mod[0].split('').each do |aa|
|
1051
|
-
hash = {
|
1052
|
-
|
1053
|
-
:aminoacid => aa,
|
1054
|
-
:massdiff => mod[1],
|
1055
|
-
:mass => aa_hash[aa.to_sym] + mod[1],
|
1056
|
-
:variable => 'Y',
|
1057
|
-
:binary => 'N',
|
1058
|
-
:symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
|
1059
|
-
}
|
1060
|
-
mod_objects << Sequest::PepXML::AAModification.new(hash)
|
1061
|
-
end
|
1062
|
-
end
|
1063
|
-
variable_mods = mod_objects
|
1064
|
-
#################################
|
1065
|
-
# TERMINAL Variable Mods:
|
1066
|
-
#################################
|
1067
|
-
# These are always peptide, not protein termini (for sequest)
|
1068
|
-
(nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
|
1069
|
-
|
1070
|
-
to_add = []
|
1071
|
-
if nterm_diff != 0.0
|
1072
|
-
to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
|
1073
|
-
end
|
1074
|
-
if cterm_diff != 0.0
|
1075
|
-
to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
|
1076
|
-
end
|
1077
|
-
|
1078
|
-
variable_terminal_mods = to_add.map do |term, mssdiff, symb|
|
1079
|
-
hash = {
|
1080
|
-
:terminus => term,
|
1081
|
-
:massdiff => mssdiff,
|
1082
|
-
:variable => 'Y',
|
1083
|
-
:symbol => symb,
|
1084
|
-
}
|
1085
|
-
Sequest::PepXML::TerminalModification.new(hash)
|
1086
|
-
end
|
1087
|
-
|
1088
|
-
#########################
|
1089
|
-
# COLLECT THEM
|
1090
|
-
#########################
|
1091
|
-
@aa_mods = static_mods + variable_mods
|
1092
|
-
@term_mods = static_terminal_mods + variable_terminal_mods
|
1093
|
-
end
|
1094
|
-
|
1095
|
-
## Generates the pepxml for static and differential amino acid mods based on
|
1096
|
-
## sequest object
|
1097
|
-
def to_pepxml
|
1098
|
-
st = ''
|
1099
|
-
if @aa_mods
|
1100
|
-
st << @aa_mods.map {|v| v.to_pepxml }.join
|
1101
|
-
end
|
1102
|
-
if @term_mods
|
1103
|
-
st << @term_mods.map {|v| v.to_pepxml }.join
|
1104
|
-
end
|
1105
|
-
st
|
1106
|
-
end
|
1107
|
-
|
1108
|
-
end
|
1109
869
|
|
1110
870
|
# Modified aminoacid, static or variable
|
1111
871
|
# unless otherwise stated, all attributes can be anything
|
@@ -1456,3 +1216,5 @@ class Sequest::PepXML::SearchHit::ModificationInfo
|
|
1456
1216
|
end
|
1457
1217
|
|
1458
1218
|
Sequest::PepXML::SearchHit::ModificationInfo::ModAminoacidMass = Arrayclass.new(%w(position mass))
|
1219
|
+
|
1220
|
+
=end
|
@@ -0,0 +1,247 @@
|
|
1
|
+
require 'ms/ident/pepxml/search_hit/modification_info'
|
2
|
+
|
3
|
+
module Ms ; end
|
4
|
+
module Ms::Sequest ; end
|
5
|
+
class Ms::Sequest::Pepxml ; end
|
6
|
+
|
7
|
+
class Ms::Sequest::Pepxml::Modifications
|
8
|
+
# sequest params object
|
9
|
+
attr_accessor :params
|
10
|
+
# array holding AAModifications
|
11
|
+
attr_accessor :aa_mods
|
12
|
+
# array holding TerminalModifications
|
13
|
+
attr_accessor :term_mods
|
14
|
+
# a hash of all differential modifications present by aa_one_letter_symbol
|
15
|
+
# and special_symbol. This is NOT the mass difference but the total mass {
|
16
|
+
# 'M*' => 155.5, 'S@' => 190.3 }. NOTE: Since the termini are dependent on
|
17
|
+
# the amino acid sequence, they are give the *differential* mass. The
|
18
|
+
# termini are given the special symbol as in sequest e.g. '[' => 12.22, #
|
19
|
+
# cterminus ']' => 14.55 # nterminus
|
20
|
+
attr_accessor :aa_mod_to_tot_mass
|
21
|
+
# a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
|
22
|
+
# values are the special_symbols
|
23
|
+
attr_accessor :mod_symbols_hash
|
24
|
+
|
25
|
+
# returns an array of all modifications (aa_mods, then term_mods)
|
26
|
+
def modifications
|
27
|
+
aa_mods + term_mods
|
28
|
+
end
|
29
|
+
|
30
|
+
# The modification symbols string looks like this:
|
31
|
+
# (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
|
32
|
+
# ct is cterminal peptide (differential)
|
33
|
+
# nt is nterminal peptide (differential)
|
34
|
+
# the C is just cysteine
|
35
|
+
# will set_modifications and aa_mod_to_tot_mass hash
|
36
|
+
def initialize(params=nil, modification_symbols_string='')
|
37
|
+
@params = params
|
38
|
+
if @params
|
39
|
+
set_modifications(params, modification_symbols_string)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# set the aa_mod_to_tot_mass and mod_symbols_hash from
|
44
|
+
def set_hashes(modification_symbols_string)
|
45
|
+
|
46
|
+
@mod_symbols_hash = {}
|
47
|
+
@aa_mod_to_tot_mass = {}
|
48
|
+
if (modification_symbols_string == nil || modification_symbols_string == '')
|
49
|
+
return nil
|
50
|
+
end
|
51
|
+
table = @params.mass_index(:precursor)
|
52
|
+
modification_symbols_string.split(/\)\s+\(/).each do |mod|
|
53
|
+
if mod =~ /\(?(\w+)(.) (.[\d\.]+)\)?/
|
54
|
+
if $1 == 'ct' || $1 == 'nt'
|
55
|
+
mass_diff = $3.to_f
|
56
|
+
@aa_mod_to_tot_mass[$2] = mass_diff
|
57
|
+
@mod_symbols_hash[[$1.to_sym, mass_diff]] = $2.dup
|
58
|
+
# changed from below to match tests, is this right?
|
59
|
+
# @mod_symbols_hash[[$1, mass_diff]] = $2.dup
|
60
|
+
else
|
61
|
+
symbol_string = $2.dup
|
62
|
+
mass_diff = $3.to_f
|
63
|
+
$1.split('').each do |aa|
|
64
|
+
aa_as_sym = aa.to_sym
|
65
|
+
@aa_mod_to_tot_mass[aa+symbol_string] = mass_diff + table[aa_as_sym]
|
66
|
+
@mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
# returns an array of static mod objects and static terminal mod objects
|
73
|
+
def create_static_mods(params)
|
74
|
+
|
75
|
+
####################################
|
76
|
+
## static mods
|
77
|
+
####################################
|
78
|
+
|
79
|
+
static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
|
80
|
+
static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
|
81
|
+
|
82
|
+
params.mods.each do |k,v|
|
83
|
+
v_to_f = v.to_f
|
84
|
+
if v_to_f != 0.0
|
85
|
+
if k =~ /add_(\w)_/
|
86
|
+
static_mods << [$1.to_sym, v_to_f]
|
87
|
+
else
|
88
|
+
static_terminal_mods << [k, v_to_f]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
aa_hash = params.mass_index(:precursor)
|
93
|
+
|
94
|
+
## Create the static_mods objects
|
95
|
+
static_mods.map! do |mod|
|
96
|
+
hash = {
|
97
|
+
:aminoacid => mod[0].to_s,
|
98
|
+
:massdiff => mod[1],
|
99
|
+
:mass => aa_hash[mod[0]] + mod[1],
|
100
|
+
:variable => 'N',
|
101
|
+
:binary => 'Y',
|
102
|
+
}
|
103
|
+
Ms::Ident::Pepxml::AminoacidModification.new(hash)
|
104
|
+
end
|
105
|
+
|
106
|
+
## Create the static_terminal_mods objects
|
107
|
+
static_terminal_mods.map! do |mod|
|
108
|
+
terminus = if mod[0] =~ /Cterm/ ; 'c'
|
109
|
+
else ; 'n' # only two possible termini
|
110
|
+
end
|
111
|
+
protein_terminus = case mod[0]
|
112
|
+
when /Nterm_protein/ ; 'n'
|
113
|
+
when /Cterm_protein/ ; 'c'
|
114
|
+
else nil
|
115
|
+
end
|
116
|
+
|
117
|
+
# create the hash
|
118
|
+
hash = {
|
119
|
+
:terminus => terminus,
|
120
|
+
:massdiff => mod[1],
|
121
|
+
:variable => 'N',
|
122
|
+
:description => mod[0],
|
123
|
+
}
|
124
|
+
hash[:protein_terminus] = protein_terminus if protein_terminus
|
125
|
+
Ms::Ident::Pepxml::TerminalModification.new(hash)
|
126
|
+
end
|
127
|
+
[static_mods, static_terminal_mods]
|
128
|
+
end
|
129
|
+
|
130
|
+
# 1. sets aa_mods and term_mods from a sequest params object
|
131
|
+
# 2. sets @params
|
132
|
+
# 3. sets @aa_mod_to_tot_mass
|
133
|
+
def set_modifications(params, modification_symbols_string)
|
134
|
+
@params = params
|
135
|
+
|
136
|
+
set_hashes(modification_symbols_string)
|
137
|
+
(static_mods, static_terminal_mods) = create_static_mods(params)
|
138
|
+
|
139
|
+
aa_hash = params.mass_index(:precursor)
|
140
|
+
#################################
|
141
|
+
# Variable Mods:
|
142
|
+
#################################
|
143
|
+
arr = params.diff_search_options.rstrip.split(/\s+/)
|
144
|
+
# [aa.to_sym, diff.to_f]
|
145
|
+
variable_mods = []
|
146
|
+
(0...arr.size).step(2) do |i|
|
147
|
+
if arr[i].to_f != 0.0
|
148
|
+
variable_mods << [arr[i+1], arr[i].to_f]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
mod_objects = []
|
152
|
+
variable_mods.each do |mod|
|
153
|
+
mod[0].split('').each do |aa|
|
154
|
+
hash = {
|
155
|
+
|
156
|
+
:aminoacid => aa,
|
157
|
+
:massdiff => mod[1],
|
158
|
+
:mass => aa_hash[aa.to_sym] + mod[1],
|
159
|
+
:variable => 'Y',
|
160
|
+
:binary => 'N',
|
161
|
+
:symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
|
162
|
+
}
|
163
|
+
mod_objects << Ms::Ident::Pepxml::AminoacidModification.new(hash)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
variable_mods = mod_objects
|
168
|
+
#################################
|
169
|
+
# TERMINAL Variable Mods:
|
170
|
+
#################################
|
171
|
+
# These are always peptide, not protein termini (for sequest)
|
172
|
+
(nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
|
173
|
+
|
174
|
+
to_add = []
|
175
|
+
if nterm_diff != 0.0
|
176
|
+
to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
|
177
|
+
end
|
178
|
+
if cterm_diff != 0.0
|
179
|
+
to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
|
180
|
+
end
|
181
|
+
|
182
|
+
variable_terminal_mods = to_add.map do |term, mssdiff, symb|
|
183
|
+
hash = {
|
184
|
+
:terminus => term,
|
185
|
+
:massdiff => mssdiff,
|
186
|
+
:variable => 'Y',
|
187
|
+
:symbol => symb,
|
188
|
+
}
|
189
|
+
Ms::Ident::Pepxml::TerminalModification.new(hash)
|
190
|
+
end
|
191
|
+
|
192
|
+
#########################
|
193
|
+
# COLLECT THEM
|
194
|
+
#########################
|
195
|
+
@aa_mods = static_mods + variable_mods
|
196
|
+
@term_mods = static_terminal_mods + variable_terminal_mods
|
197
|
+
end
|
198
|
+
|
199
|
+
# takes a peptide sequence with modifications but no preceding or trailing
|
200
|
+
# amino acids. (e.g. expects "]PEPT*IDE" but not 'K.PEPTIDE.R')
|
201
|
+
# returns a ModificationInfo object
|
202
|
+
# if there are no modifications, returns nil
|
203
|
+
def modification_info(mod_peptide)
|
204
|
+
return nil if @aa_mod_to_tot_mass.size == 0
|
205
|
+
mod_info = Ms::Ident::Pepxml::SearchHit::ModificationInfo.new( mod_peptide.dup )
|
206
|
+
mass_table = @params.mass_index(:precursor)
|
207
|
+
|
208
|
+
# TERMINI:
|
209
|
+
## only the termini can match a single char
|
210
|
+
if @aa_mod_to_tot_mass.key? mod_peptide[0,1]
|
211
|
+
# AA + H + differential_mod
|
212
|
+
mod_info.mod_nterm_mass = mass_table[mod_peptide[1,1].to_sym] + mass_table['h+'] + @aa_mod_to_tot_mass[mod_peptide[0,1]]
|
213
|
+
mod_peptide = mod_peptide[1...(mod_peptide.size)]
|
214
|
+
end
|
215
|
+
if @aa_mod_to_tot_mass.key? mod_peptide[(mod_peptide.size-1),1]
|
216
|
+
# AA + OH + differential_mod
|
217
|
+
mod_info.mod_cterm_mass = mass_table[mod_peptide[(mod_peptide.size-2),1].to_sym] + mass_table['oh'] + @aa_mod_to_tot_mass[mod_peptide[-1,1]]
|
218
|
+
mod_peptide = mod_peptide[0...(mod_peptide.size-1)]
|
219
|
+
end
|
220
|
+
|
221
|
+
# OTHER DIFFERENTIAL MODS:
|
222
|
+
mod_array = []
|
223
|
+
mod_cnt = 1
|
224
|
+
bare_cnt = 1
|
225
|
+
last_normal_aa = mod_peptide[0,1]
|
226
|
+
(1...mod_peptide.size).each do |i|
|
227
|
+
if @aa_mod_to_tot_mass.key?( last_normal_aa + mod_peptide[i,1] )
|
228
|
+
# we don't save the result because most amino acids will not be
|
229
|
+
# modified
|
230
|
+
mod_array << Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(bare_cnt, @aa_mod_to_tot_mass[last_normal_aa + mod_peptide[i,1]])
|
231
|
+
else
|
232
|
+
last_normal_aa = mod_peptide[i,1]
|
233
|
+
bare_cnt += 1
|
234
|
+
end
|
235
|
+
mod_cnt += 1
|
236
|
+
end
|
237
|
+
if mod_cnt == bare_cnt
|
238
|
+
nil
|
239
|
+
else
|
240
|
+
mod_info.mod_aminoacid_masses = mod_array if mod_array.size > 0
|
241
|
+
mod_info
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
end
|
247
|
+
|