ms-sequest 0.0.17 → 0.0.18
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +26 -10
- data/Gemfile +4 -1
- data/Gemfile.lock +17 -2
- data/VERSION +1 -1
- data/bin/srf_to_pepxml.rb +7 -0
- data/bin/srf_to_search.rb +1 -1
- data/lib/ms/sequest/bioworks.rb +2 -2
- data/lib/ms/sequest/params.rb +0 -20
- data/lib/ms/sequest/pepxml.rb +7 -245
- data/lib/ms/sequest/pepxml/modifications.rb +247 -0
- data/lib/ms/sequest/pepxml/params.rb +32 -0
- data/lib/ms/sequest/sqt.rb +17 -17
- data/lib/ms/sequest/srf.rb +64 -54
- data/lib/ms/sequest/srf/pepxml.rb +316 -0
- data/lib/ms/sequest/srf/pepxml/sequest.rb +21 -0
- data/lib/ms/sequest/srf/sqt.rb +1 -1
- data/spec/ms/sequest/bioworks_spec.rb +11 -11
- data/spec/ms/sequest/pepxml/modifications_spec.rb +50 -0
- data/spec/ms/sequest/pepxml_spec.rb +0 -65
- data/spec/ms/sequest/srf/pepxml_spec.rb +84 -0
- data/spec/ms/sequest/srf_spec.rb +3 -3
- data/spec/ms/sequest/srf_spec_helper.rb +2 -2
- data/spec/spec_helper.rb +17 -18
- metadata +73 -19
data/.autotest
CHANGED
@@ -1,14 +1,30 @@
|
|
1
1
|
# -*- ruby -*-
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
require 'rubygems'
|
4
|
+
require 'spec/more'
|
5
|
+
require 'autotest/bacon'
|
6
|
+
#require 'redgreen/autotest'
|
6
7
|
|
7
|
-
Autotest
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
}
|
8
|
+
class Autotest::Bacon < Autotest
|
9
|
+
undef make_test_cmd
|
10
|
+
def make_test_cmd(files_to_test)
|
11
|
+
args = files_to_test.keys.flatten.join(' ')
|
12
|
+
args = '-a' if args.empty?
|
13
|
+
# TODO : make regex to pass to -n using values
|
14
|
+
"#{ruby} -S bacon -I#{libs} -o TestUnit #{args}"
|
15
|
+
end
|
14
16
|
end
|
17
|
+
|
18
|
+
|
19
|
+
#Autotest.add_hook :initialize do |at|
|
20
|
+
# at.clear_mappings
|
21
|
+
#end
|
22
|
+
|
23
|
+
#Autotest.add_hook :initialize do |at|
|
24
|
+
#at.add_mapping(%r%^lib/(.*)\.rb$%) { |_, m|
|
25
|
+
# #["spec/#{m[1]}_spec.rb"]
|
26
|
+
# #["test/#{m[1]}_test.rb"]
|
27
|
+
# ## for both specs and tests:
|
28
|
+
# ["spec/#{m[1]}_spec.rb"]
|
29
|
+
#}
|
30
|
+
#end
|
data/Gemfile
CHANGED
@@ -2,8 +2,11 @@ source "http://rubygems.org"
|
|
2
2
|
# Add dependencies required to use your gem here.
|
3
3
|
# Example:
|
4
4
|
# gem "activesupport", ">= 2.3.5"
|
5
|
-
gem "ms-
|
5
|
+
gem "ms-ident", ">= 0.0.17"
|
6
|
+
gem "ms-core", ">= 0.0.14"
|
6
7
|
gem "arrayclass", ">= 0.1.0"
|
8
|
+
gem "ms-msrun", ">= 0.3.3"
|
9
|
+
gem "trollop", "~> 1.16"
|
7
10
|
|
8
11
|
# Add dependencies to develop your gem here.
|
9
12
|
# Include everything needed to run rake, tests, features, etc.
|
data/Gemfile.lock
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
+
andand (1.3.1)
|
4
5
|
arrayclass (0.1.1)
|
5
6
|
bacon (1.1.0)
|
6
7
|
bio (1.4.1)
|
@@ -9,13 +10,24 @@ GEM
|
|
9
10
|
bundler (~> 1.0.0)
|
10
11
|
git (>= 1.2.5)
|
11
12
|
rake
|
12
|
-
ms-core (0.0.
|
13
|
+
ms-core (0.0.14)
|
13
14
|
bio (>= 1.4.1)
|
15
|
+
ms-ident (0.0.17)
|
16
|
+
andand
|
17
|
+
ms-core (>= 0.0.12)
|
18
|
+
nokogiri
|
19
|
+
ms-msrun (0.3.3)
|
20
|
+
ms-core (>= 0.0.3)
|
21
|
+
narray
|
22
|
+
nokogiri
|
14
23
|
ms-testdata (0.1.1)
|
24
|
+
narray (0.5.9.9)
|
25
|
+
nokogiri (1.4.4)
|
15
26
|
rake (0.8.7)
|
16
27
|
rcov (0.9.9)
|
17
28
|
spec-more (0.0.4)
|
18
29
|
bacon
|
30
|
+
trollop (1.16.2)
|
19
31
|
|
20
32
|
PLATFORMS
|
21
33
|
ruby
|
@@ -24,7 +36,10 @@ DEPENDENCIES
|
|
24
36
|
arrayclass (>= 0.1.0)
|
25
37
|
bundler (~> 1.0.0)
|
26
38
|
jeweler (~> 1.5.2)
|
27
|
-
ms-core (>= 0.0.
|
39
|
+
ms-core (>= 0.0.14)
|
40
|
+
ms-ident (>= 0.0.17)
|
41
|
+
ms-msrun (>= 0.3.3)
|
28
42
|
ms-testdata (>= 0.1.1)
|
29
43
|
rcov
|
30
44
|
spec-more
|
45
|
+
trollop (~> 1.16)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.18
|
data/bin/srf_to_search.rb
CHANGED
data/lib/ms/sequest/bioworks.rb
CHANGED
@@ -27,7 +27,7 @@ class Bioworks
|
|
27
27
|
@@origfilepath_re = /<origfilepath>(.*)<\/origfilepath>/o
|
28
28
|
|
29
29
|
|
30
|
-
attr_accessor :peps, :
|
30
|
+
attr_accessor :peps, :proteins, :version, :global_filename, :origfilename, :origfilepath
|
31
31
|
# a string of modifications e.g., "(M* +15.99491) (S@ +14.9322) "
|
32
32
|
attr_accessor :modifications
|
33
33
|
|
@@ -42,7 +42,7 @@ class Bioworks
|
|
42
42
|
|
43
43
|
# returns the number of prots. Raises an Exception if open and closing xml
|
44
44
|
# tags don't agree
|
45
|
-
def
|
45
|
+
def num_proteins(file)
|
46
46
|
re = /(<protein>)|(<\/protein>)/mo
|
47
47
|
begin_tags = 0
|
48
48
|
end_tags = 0
|
data/lib/ms/sequest/params.rb
CHANGED
@@ -269,26 +269,6 @@ class Ms::Sequest::Params
|
|
269
269
|
return "1"
|
270
270
|
end
|
271
271
|
|
272
|
-
## returns a SampleEnzyme object
|
273
|
-
#def sample_enzyme
|
274
|
-
# (offset, cleave_at, except_if_after) = enzyme_specificity.map do |v|
|
275
|
-
# if v == '' ; nil ; else v end
|
276
|
-
# end
|
277
|
-
# SampleEnzyme.new do |se|
|
278
|
-
# se.name = self.enzyme
|
279
|
-
# se.cut = cleave_at
|
280
|
-
# se.no_cut = except_if_after
|
281
|
-
# se.sense =
|
282
|
-
# if se.name == "No_Enzyme"
|
283
|
-
# nil
|
284
|
-
# elsif offset == 1
|
285
|
-
# 'C'
|
286
|
-
# elsif offset == 0
|
287
|
-
# 'N'
|
288
|
-
# end
|
289
|
-
# end
|
290
|
-
#end
|
291
|
-
|
292
272
|
# returns the enzyme name (but no parentheses connected with the name).
|
293
273
|
# this will likely be capitalized.
|
294
274
|
def enzyme
|
data/lib/ms/sequest/pepxml.rb
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
require 'ms/sequest/pepxml/params'
|
2
|
+
require 'ms/sequest/pepxml/modifications'
|
3
|
+
|
4
|
+
|
5
|
+
=begin
|
1
6
|
|
2
7
|
require 'sample_enzyme'
|
3
8
|
require 'ms/parser/mzxml'
|
@@ -861,251 +866,6 @@ class Sequest::PepXML::Parameters
|
|
861
866
|
end
|
862
867
|
end
|
863
868
|
|
864
|
-
class Sequest::PepXML::Modifications
|
865
|
-
include SpecIDXML
|
866
|
-
|
867
|
-
# sequest params object
|
868
|
-
attr_accessor :params
|
869
|
-
# array holding AAModifications
|
870
|
-
attr_accessor :aa_mods
|
871
|
-
# array holding TerminalModifications
|
872
|
-
attr_accessor :term_mods
|
873
|
-
# a hash of all differential modifications present by aa_one_letter_symbol
|
874
|
-
# and special_symbol. This is NOT the mass difference but the total mass {
|
875
|
-
# 'M*' => 155.5, 'S@' => 190.3 }. NOTE: Since the termini are dependent on
|
876
|
-
# the amino acid sequence, they are give the *differential* mass. The
|
877
|
-
# termini are given the special symbol as in sequest e.g. '[' => 12.22, #
|
878
|
-
# cterminus ']' => 14.55 # nterminus
|
879
|
-
attr_accessor :masses_by_diff_mod_hash
|
880
|
-
# a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
|
881
|
-
# values are the special_symbols
|
882
|
-
attr_accessor :mod_symbols_hash
|
883
|
-
|
884
|
-
# The modification symbols string looks like this:
|
885
|
-
# (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
|
886
|
-
# ct is cterminal peptide (differential)
|
887
|
-
# nt is nterminal peptide (differential)
|
888
|
-
# the C is just cysteine
|
889
|
-
# will set_modifications and masses_by_diff_mod hash
|
890
|
-
def initialize(params=nil, modification_symbols_string='')
|
891
|
-
@params = params
|
892
|
-
if @params
|
893
|
-
set_modifications(params, modification_symbols_string)
|
894
|
-
end
|
895
|
-
end
|
896
|
-
|
897
|
-
# set the masses_by_diff_mod and mod_symbols_hash from
|
898
|
-
def set_hashes(modification_symbols_string)
|
899
|
-
|
900
|
-
@mod_symbols_hash = {}
|
901
|
-
@masses_by_diff_mod = {}
|
902
|
-
if (modification_symbols_string == nil || modification_symbols_string == '')
|
903
|
-
return nil
|
904
|
-
end
|
905
|
-
table = @params.mass_table
|
906
|
-
modification_symbols_string.split(/\)\s+\(/).each do |mod|
|
907
|
-
if mod =~ /\(?(\w+)(.) (.[\d\.]+)\)?/
|
908
|
-
if $1 == 'ct' || $1 == 'nt'
|
909
|
-
mass_diff = $3.to_f
|
910
|
-
@masses_by_diff_mod[$2] = mass_diff
|
911
|
-
@mod_symbols_hash[[$1.to_sym, mass_diff]] = $2.dup
|
912
|
-
# changed from below to match tests, is this right?
|
913
|
-
# @mod_symbols_hash[[$1, mass_diff]] = $2.dup
|
914
|
-
else
|
915
|
-
symbol_string = $2.dup
|
916
|
-
mass_diff = $3.to_f
|
917
|
-
$1.split('').each do |aa|
|
918
|
-
aa_as_sym = aa.to_sym
|
919
|
-
@masses_by_diff_mod[aa+symbol_string] = mass_diff + table[aa_as_sym]
|
920
|
-
@mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
|
921
|
-
end
|
922
|
-
end
|
923
|
-
end
|
924
|
-
end
|
925
|
-
end
|
926
|
-
|
927
|
-
# given a bare peptide (no end pieces) returns a ModificationInfo object
|
928
|
-
# e.g. given "]PEPT*IDE", NOT 'K.PEPTIDE.R'
|
929
|
-
# if there are no modifications, returns nil
|
930
|
-
def modification_info(peptide)
|
931
|
-
if @masses_by_diff_mod.size == 0
|
932
|
-
return nil
|
933
|
-
end
|
934
|
-
hash = {}
|
935
|
-
hash[:modified_peptide] = peptide.dup
|
936
|
-
hsh = @masses_by_diff_mod
|
937
|
-
table = @params.mass_table
|
938
|
-
h = table[:h] # this? or h_plus ??
|
939
|
-
oh = table[:o] + h
|
940
|
-
## only the termini can match a single char
|
941
|
-
if hsh.key? peptide[0,1]
|
942
|
-
# AA + H + differential_mod
|
943
|
-
hash[:mod_nterm_mass] = table[peptide[1,1].to_sym] + h + hsh[peptide[0,1]]
|
944
|
-
peptide = peptide[1...(peptide.size)]
|
945
|
-
end
|
946
|
-
if hsh.key? peptide[(peptide.size-1),1]
|
947
|
-
# AA + OH + differential_mod
|
948
|
-
hash[:mod_cterm_mass] = table[peptide[(peptide.size-2),1].to_sym] + oh + hsh[peptide[-1,1]]
|
949
|
-
peptide.slice!( 0..-2 )
|
950
|
-
peptide = peptide[0...(peptide.size-1)]
|
951
|
-
end
|
952
|
-
mod_array = []
|
953
|
-
(0...peptide.size).each do |i|
|
954
|
-
if hsh.key? peptide[i,2]
|
955
|
-
mod_array << Sequest::PepXML::SearchHit::ModificationInfo::ModAminoacidMass.new([ i+1 , hsh[peptide[i,2]] ])
|
956
|
-
end
|
957
|
-
end
|
958
|
-
if mod_array.size > 0
|
959
|
-
hash[:mod_aminoacid_masses] = mod_array
|
960
|
-
end
|
961
|
-
if hash.size > 1 # if there is more than just the modified peptide there
|
962
|
-
Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
|
963
|
-
#Sequest::PepXML::SearchHit::ModificationInfo.new(hash.values_at(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
|
964
|
-
else
|
965
|
-
nil
|
966
|
-
end
|
967
|
-
end
|
968
|
-
|
969
|
-
# returns an array of static mod objects and static terminal mod objects
|
970
|
-
def create_static_mods(params)
|
971
|
-
|
972
|
-
####################################
|
973
|
-
## static mods
|
974
|
-
####################################
|
975
|
-
|
976
|
-
static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
|
977
|
-
static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
|
978
|
-
|
979
|
-
params.mods.each do |k,v|
|
980
|
-
v_to_f = v.to_f
|
981
|
-
if v_to_f != 0.0
|
982
|
-
if k =~ /add_(\w)_/
|
983
|
-
static_mods << [$1.to_sym, v_to_f]
|
984
|
-
else
|
985
|
-
static_terminal_mods << [k, v_to_f]
|
986
|
-
end
|
987
|
-
end
|
988
|
-
end
|
989
|
-
aa_hash = params.mass_table
|
990
|
-
|
991
|
-
## Create the static_mods objects
|
992
|
-
static_mods.map! do |mod|
|
993
|
-
hash = {
|
994
|
-
:aminoacid => mod[0].to_s,
|
995
|
-
:massdiff => mod[1],
|
996
|
-
:mass => aa_hash[mod[0]] + mod[1],
|
997
|
-
:variable => 'N',
|
998
|
-
:binary => 'Y',
|
999
|
-
}
|
1000
|
-
Sequest::PepXML::AAModification.new(hash)
|
1001
|
-
end
|
1002
|
-
|
1003
|
-
## Create the static_terminal_mods objects
|
1004
|
-
static_terminal_mods.map! do |mod|
|
1005
|
-
terminus = if mod[0] =~ /Cterm/ ; 'c'
|
1006
|
-
else ; 'n' # only two possible termini
|
1007
|
-
end
|
1008
|
-
protein_terminus = case mod[0]
|
1009
|
-
when /Nterm_protein/ ; 'n'
|
1010
|
-
when /Cterm_protein/ ; 'c'
|
1011
|
-
else nil
|
1012
|
-
end
|
1013
|
-
|
1014
|
-
# create the hash
|
1015
|
-
hash = {
|
1016
|
-
:terminus => terminus,
|
1017
|
-
:massdiff => mod[1],
|
1018
|
-
:variable => 'N',
|
1019
|
-
:description => mod[0],
|
1020
|
-
}
|
1021
|
-
hash[:protein_terminus] = protein_terminus if protein_terminus
|
1022
|
-
Sequest::PepXML::TerminalModification.new(hash)
|
1023
|
-
end
|
1024
|
-
[static_mods, static_terminal_mods]
|
1025
|
-
end
|
1026
|
-
|
1027
|
-
# 1. sets aa_mods and term_mods from a sequest params object
|
1028
|
-
# 2. sets @params
|
1029
|
-
# 3. sets @masses_by_diff_mod
|
1030
|
-
def set_modifications(params, modification_symbols_string)
|
1031
|
-
@params = params
|
1032
|
-
|
1033
|
-
set_hashes(modification_symbols_string)
|
1034
|
-
(static_mods, static_terminal_mods) = create_static_mods(params)
|
1035
|
-
|
1036
|
-
aa_hash = params.mass_table
|
1037
|
-
#################################
|
1038
|
-
# Variable Mods:
|
1039
|
-
#################################
|
1040
|
-
arr = params.diff_search_options.rstrip.split(/\s+/)
|
1041
|
-
# [aa.to_sym, diff.to_f]
|
1042
|
-
variable_mods = []
|
1043
|
-
(0...arr.size).step(2) do |i|
|
1044
|
-
if arr[i].to_f != 0.0
|
1045
|
-
variable_mods << [arr[i+1], arr[i].to_f]
|
1046
|
-
end
|
1047
|
-
end
|
1048
|
-
mod_objects = []
|
1049
|
-
variable_mods.each do |mod|
|
1050
|
-
mod[0].split('').each do |aa|
|
1051
|
-
hash = {
|
1052
|
-
|
1053
|
-
:aminoacid => aa,
|
1054
|
-
:massdiff => mod[1],
|
1055
|
-
:mass => aa_hash[aa.to_sym] + mod[1],
|
1056
|
-
:variable => 'Y',
|
1057
|
-
:binary => 'N',
|
1058
|
-
:symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
|
1059
|
-
}
|
1060
|
-
mod_objects << Sequest::PepXML::AAModification.new(hash)
|
1061
|
-
end
|
1062
|
-
end
|
1063
|
-
variable_mods = mod_objects
|
1064
|
-
#################################
|
1065
|
-
# TERMINAL Variable Mods:
|
1066
|
-
#################################
|
1067
|
-
# These are always peptide, not protein termini (for sequest)
|
1068
|
-
(nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
|
1069
|
-
|
1070
|
-
to_add = []
|
1071
|
-
if nterm_diff != 0.0
|
1072
|
-
to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
|
1073
|
-
end
|
1074
|
-
if cterm_diff != 0.0
|
1075
|
-
to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
|
1076
|
-
end
|
1077
|
-
|
1078
|
-
variable_terminal_mods = to_add.map do |term, mssdiff, symb|
|
1079
|
-
hash = {
|
1080
|
-
:terminus => term,
|
1081
|
-
:massdiff => mssdiff,
|
1082
|
-
:variable => 'Y',
|
1083
|
-
:symbol => symb,
|
1084
|
-
}
|
1085
|
-
Sequest::PepXML::TerminalModification.new(hash)
|
1086
|
-
end
|
1087
|
-
|
1088
|
-
#########################
|
1089
|
-
# COLLECT THEM
|
1090
|
-
#########################
|
1091
|
-
@aa_mods = static_mods + variable_mods
|
1092
|
-
@term_mods = static_terminal_mods + variable_terminal_mods
|
1093
|
-
end
|
1094
|
-
|
1095
|
-
## Generates the pepxml for static and differential amino acid mods based on
|
1096
|
-
## sequest object
|
1097
|
-
def to_pepxml
|
1098
|
-
st = ''
|
1099
|
-
if @aa_mods
|
1100
|
-
st << @aa_mods.map {|v| v.to_pepxml }.join
|
1101
|
-
end
|
1102
|
-
if @term_mods
|
1103
|
-
st << @term_mods.map {|v| v.to_pepxml }.join
|
1104
|
-
end
|
1105
|
-
st
|
1106
|
-
end
|
1107
|
-
|
1108
|
-
end
|
1109
869
|
|
1110
870
|
# Modified aminoacid, static or variable
|
1111
871
|
# unless otherwise stated, all attributes can be anything
|
@@ -1456,3 +1216,5 @@ class Sequest::PepXML::SearchHit::ModificationInfo
|
|
1456
1216
|
end
|
1457
1217
|
|
1458
1218
|
Sequest::PepXML::SearchHit::ModificationInfo::ModAminoacidMass = Arrayclass.new(%w(position mass))
|
1219
|
+
|
1220
|
+
=end
|
@@ -0,0 +1,247 @@
|
|
1
|
+
require 'ms/ident/pepxml/search_hit/modification_info'
|
2
|
+
|
3
|
+
module Ms ; end
|
4
|
+
module Ms::Sequest ; end
|
5
|
+
class Ms::Sequest::Pepxml ; end
|
6
|
+
|
7
|
+
class Ms::Sequest::Pepxml::Modifications
|
8
|
+
# sequest params object
|
9
|
+
attr_accessor :params
|
10
|
+
# array holding AAModifications
|
11
|
+
attr_accessor :aa_mods
|
12
|
+
# array holding TerminalModifications
|
13
|
+
attr_accessor :term_mods
|
14
|
+
# a hash of all differential modifications present by aa_one_letter_symbol
|
15
|
+
# and special_symbol. This is NOT the mass difference but the total mass {
|
16
|
+
# 'M*' => 155.5, 'S@' => 190.3 }. NOTE: Since the termini are dependent on
|
17
|
+
# the amino acid sequence, they are give the *differential* mass. The
|
18
|
+
# termini are given the special symbol as in sequest e.g. '[' => 12.22, #
|
19
|
+
# cterminus ']' => 14.55 # nterminus
|
20
|
+
attr_accessor :aa_mod_to_tot_mass
|
21
|
+
# a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
|
22
|
+
# values are the special_symbols
|
23
|
+
attr_accessor :mod_symbols_hash
|
24
|
+
|
25
|
+
# returns an array of all modifications (aa_mods, then term_mods)
|
26
|
+
def modifications
|
27
|
+
aa_mods + term_mods
|
28
|
+
end
|
29
|
+
|
30
|
+
# The modification symbols string looks like this:
|
31
|
+
# (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
|
32
|
+
# ct is cterminal peptide (differential)
|
33
|
+
# nt is nterminal peptide (differential)
|
34
|
+
# the C is just cysteine
|
35
|
+
# will set_modifications and aa_mod_to_tot_mass hash
|
36
|
+
def initialize(params=nil, modification_symbols_string='')
|
37
|
+
@params = params
|
38
|
+
if @params
|
39
|
+
set_modifications(params, modification_symbols_string)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# set the aa_mod_to_tot_mass and mod_symbols_hash from
|
44
|
+
def set_hashes(modification_symbols_string)
|
45
|
+
|
46
|
+
@mod_symbols_hash = {}
|
47
|
+
@aa_mod_to_tot_mass = {}
|
48
|
+
if (modification_symbols_string == nil || modification_symbols_string == '')
|
49
|
+
return nil
|
50
|
+
end
|
51
|
+
table = @params.mass_index(:precursor)
|
52
|
+
modification_symbols_string.split(/\)\s+\(/).each do |mod|
|
53
|
+
if mod =~ /\(?(\w+)(.) (.[\d\.]+)\)?/
|
54
|
+
if $1 == 'ct' || $1 == 'nt'
|
55
|
+
mass_diff = $3.to_f
|
56
|
+
@aa_mod_to_tot_mass[$2] = mass_diff
|
57
|
+
@mod_symbols_hash[[$1.to_sym, mass_diff]] = $2.dup
|
58
|
+
# changed from below to match tests, is this right?
|
59
|
+
# @mod_symbols_hash[[$1, mass_diff]] = $2.dup
|
60
|
+
else
|
61
|
+
symbol_string = $2.dup
|
62
|
+
mass_diff = $3.to_f
|
63
|
+
$1.split('').each do |aa|
|
64
|
+
aa_as_sym = aa.to_sym
|
65
|
+
@aa_mod_to_tot_mass[aa+symbol_string] = mass_diff + table[aa_as_sym]
|
66
|
+
@mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
# returns an array of static mod objects and static terminal mod objects
|
73
|
+
def create_static_mods(params)
|
74
|
+
|
75
|
+
####################################
|
76
|
+
## static mods
|
77
|
+
####################################
|
78
|
+
|
79
|
+
static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
|
80
|
+
static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
|
81
|
+
|
82
|
+
params.mods.each do |k,v|
|
83
|
+
v_to_f = v.to_f
|
84
|
+
if v_to_f != 0.0
|
85
|
+
if k =~ /add_(\w)_/
|
86
|
+
static_mods << [$1.to_sym, v_to_f]
|
87
|
+
else
|
88
|
+
static_terminal_mods << [k, v_to_f]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
aa_hash = params.mass_index(:precursor)
|
93
|
+
|
94
|
+
## Create the static_mods objects
|
95
|
+
static_mods.map! do |mod|
|
96
|
+
hash = {
|
97
|
+
:aminoacid => mod[0].to_s,
|
98
|
+
:massdiff => mod[1],
|
99
|
+
:mass => aa_hash[mod[0]] + mod[1],
|
100
|
+
:variable => 'N',
|
101
|
+
:binary => 'Y',
|
102
|
+
}
|
103
|
+
Ms::Ident::Pepxml::AminoacidModification.new(hash)
|
104
|
+
end
|
105
|
+
|
106
|
+
## Create the static_terminal_mods objects
|
107
|
+
static_terminal_mods.map! do |mod|
|
108
|
+
terminus = if mod[0] =~ /Cterm/ ; 'c'
|
109
|
+
else ; 'n' # only two possible termini
|
110
|
+
end
|
111
|
+
protein_terminus = case mod[0]
|
112
|
+
when /Nterm_protein/ ; 'n'
|
113
|
+
when /Cterm_protein/ ; 'c'
|
114
|
+
else nil
|
115
|
+
end
|
116
|
+
|
117
|
+
# create the hash
|
118
|
+
hash = {
|
119
|
+
:terminus => terminus,
|
120
|
+
:massdiff => mod[1],
|
121
|
+
:variable => 'N',
|
122
|
+
:description => mod[0],
|
123
|
+
}
|
124
|
+
hash[:protein_terminus] = protein_terminus if protein_terminus
|
125
|
+
Ms::Ident::Pepxml::TerminalModification.new(hash)
|
126
|
+
end
|
127
|
+
[static_mods, static_terminal_mods]
|
128
|
+
end
|
129
|
+
|
130
|
+
# 1. sets aa_mods and term_mods from a sequest params object
|
131
|
+
# 2. sets @params
|
132
|
+
# 3. sets @aa_mod_to_tot_mass
|
133
|
+
def set_modifications(params, modification_symbols_string)
|
134
|
+
@params = params
|
135
|
+
|
136
|
+
set_hashes(modification_symbols_string)
|
137
|
+
(static_mods, static_terminal_mods) = create_static_mods(params)
|
138
|
+
|
139
|
+
aa_hash = params.mass_index(:precursor)
|
140
|
+
#################################
|
141
|
+
# Variable Mods:
|
142
|
+
#################################
|
143
|
+
arr = params.diff_search_options.rstrip.split(/\s+/)
|
144
|
+
# [aa.to_sym, diff.to_f]
|
145
|
+
variable_mods = []
|
146
|
+
(0...arr.size).step(2) do |i|
|
147
|
+
if arr[i].to_f != 0.0
|
148
|
+
variable_mods << [arr[i+1], arr[i].to_f]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
mod_objects = []
|
152
|
+
variable_mods.each do |mod|
|
153
|
+
mod[0].split('').each do |aa|
|
154
|
+
hash = {
|
155
|
+
|
156
|
+
:aminoacid => aa,
|
157
|
+
:massdiff => mod[1],
|
158
|
+
:mass => aa_hash[aa.to_sym] + mod[1],
|
159
|
+
:variable => 'Y',
|
160
|
+
:binary => 'N',
|
161
|
+
:symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
|
162
|
+
}
|
163
|
+
mod_objects << Ms::Ident::Pepxml::AminoacidModification.new(hash)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
variable_mods = mod_objects
|
168
|
+
#################################
|
169
|
+
# TERMINAL Variable Mods:
|
170
|
+
#################################
|
171
|
+
# These are always peptide, not protein termini (for sequest)
|
172
|
+
(nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
|
173
|
+
|
174
|
+
to_add = []
|
175
|
+
if nterm_diff != 0.0
|
176
|
+
to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
|
177
|
+
end
|
178
|
+
if cterm_diff != 0.0
|
179
|
+
to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
|
180
|
+
end
|
181
|
+
|
182
|
+
variable_terminal_mods = to_add.map do |term, mssdiff, symb|
|
183
|
+
hash = {
|
184
|
+
:terminus => term,
|
185
|
+
:massdiff => mssdiff,
|
186
|
+
:variable => 'Y',
|
187
|
+
:symbol => symb,
|
188
|
+
}
|
189
|
+
Ms::Ident::Pepxml::TerminalModification.new(hash)
|
190
|
+
end
|
191
|
+
|
192
|
+
#########################
|
193
|
+
# COLLECT THEM
|
194
|
+
#########################
|
195
|
+
@aa_mods = static_mods + variable_mods
|
196
|
+
@term_mods = static_terminal_mods + variable_terminal_mods
|
197
|
+
end
|
198
|
+
|
199
|
+
# takes a peptide sequence with modifications but no preceding or trailing
|
200
|
+
# amino acids. (e.g. expects "]PEPT*IDE" but not 'K.PEPTIDE.R')
|
201
|
+
# returns a ModificationInfo object
|
202
|
+
# if there are no modifications, returns nil
|
203
|
+
def modification_info(mod_peptide)
|
204
|
+
return nil if @aa_mod_to_tot_mass.size == 0
|
205
|
+
mod_info = Ms::Ident::Pepxml::SearchHit::ModificationInfo.new( mod_peptide.dup )
|
206
|
+
mass_table = @params.mass_index(:precursor)
|
207
|
+
|
208
|
+
# TERMINI:
|
209
|
+
## only the termini can match a single char
|
210
|
+
if @aa_mod_to_tot_mass.key? mod_peptide[0,1]
|
211
|
+
# AA + H + differential_mod
|
212
|
+
mod_info.mod_nterm_mass = mass_table[mod_peptide[1,1].to_sym] + mass_table['h+'] + @aa_mod_to_tot_mass[mod_peptide[0,1]]
|
213
|
+
mod_peptide = mod_peptide[1...(mod_peptide.size)]
|
214
|
+
end
|
215
|
+
if @aa_mod_to_tot_mass.key? mod_peptide[(mod_peptide.size-1),1]
|
216
|
+
# AA + OH + differential_mod
|
217
|
+
mod_info.mod_cterm_mass = mass_table[mod_peptide[(mod_peptide.size-2),1].to_sym] + mass_table['oh'] + @aa_mod_to_tot_mass[mod_peptide[-1,1]]
|
218
|
+
mod_peptide = mod_peptide[0...(mod_peptide.size-1)]
|
219
|
+
end
|
220
|
+
|
221
|
+
# OTHER DIFFERENTIAL MODS:
|
222
|
+
mod_array = []
|
223
|
+
mod_cnt = 1
|
224
|
+
bare_cnt = 1
|
225
|
+
last_normal_aa = mod_peptide[0,1]
|
226
|
+
(1...mod_peptide.size).each do |i|
|
227
|
+
if @aa_mod_to_tot_mass.key?( last_normal_aa + mod_peptide[i,1] )
|
228
|
+
# we don't save the result because most amino acids will not be
|
229
|
+
# modified
|
230
|
+
mod_array << Ms::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(bare_cnt, @aa_mod_to_tot_mass[last_normal_aa + mod_peptide[i,1]])
|
231
|
+
else
|
232
|
+
last_normal_aa = mod_peptide[i,1]
|
233
|
+
bare_cnt += 1
|
234
|
+
end
|
235
|
+
mod_cnt += 1
|
236
|
+
end
|
237
|
+
if mod_cnt == bare_cnt
|
238
|
+
nil
|
239
|
+
else
|
240
|
+
mod_info.mod_aminoacid_masses = mod_array if mod_array.size > 0
|
241
|
+
mod_info
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
end
|
247
|
+
|