isoelectric_point 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +28 -15
- data/lib/isoelectric_point/aa.rb +84 -0
- data/lib/isoelectric_point/pka_data.rb +58 -0
- data/lib/isoelectric_point.rb +1 -1
- data/test/{sequence_test.rb → aa_test.rb} +42 -10
- metadata +11 -11
- data/lib/isoelectric_point/data.rb +0 -56
- data/lib/isoelectric_point/sequence.rb +0 -67
data/README.rdoc
CHANGED
@@ -1,26 +1,39 @@
|
|
1
|
-
|
2
|
-
A ruby class for calculating the isoelectric point of a protein. It's based on bioperl
|
1
|
+
= isoelectric_point
|
2
|
+
A ruby class for calculating the isoelectric point of a protein. It's based on bioperl.
|
3
|
+
It's currently beeing refactored to fit into bioruby.
|
3
4
|
|
4
|
-
|
5
|
+
== PKA Sets
|
5
6
|
The available pka sets are:
|
6
|
-
*
|
7
|
-
*
|
8
|
-
*
|
9
|
-
*
|
10
|
-
*
|
7
|
+
* dta_select
|
8
|
+
* emboss
|
9
|
+
* rodwell
|
10
|
+
* wikipedia
|
11
|
+
* sillero
|
11
12
|
|
12
|
-
|
13
|
+
== Installation
|
13
14
|
gem install isoelectric_point
|
14
15
|
|
15
|
-
|
16
|
+
== Usage
|
16
17
|
require 'isoelectric_point'
|
17
18
|
# Either prefix or include namespace
|
18
|
-
include
|
19
|
-
|
20
|
-
sequence = Sequence.new("KKGFTCGELA")
|
19
|
+
include Bio::Sequence
|
20
|
+
aa = AA.new("KKGFTCGELA")
|
21
21
|
|
22
22
|
#what is the protein charge at ph 14?
|
23
|
-
charge =
|
23
|
+
charge = aa.calculate_charge_at(14)
|
24
24
|
|
25
25
|
#calculate the ph and round off to 7 decimal places
|
26
|
-
ph =
|
26
|
+
ph = aa.calculate_iep(7)
|
27
|
+
|
28
|
+
# calculate the ph with a custom set
|
29
|
+
custom = { "N_TERMINUS" => 8.1,
|
30
|
+
"K" => 10.1,
|
31
|
+
"R" => 12.1,
|
32
|
+
"H" => 6.4,
|
33
|
+
"C_TERMINUS" => 3.15,
|
34
|
+
"D" => 4.34,
|
35
|
+
"E" => 4.33,
|
36
|
+
"C" => 8.33,
|
37
|
+
"Y" => 9.5
|
38
|
+
}
|
39
|
+
ph = aa.calculate_iep(7, custom)
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Bio
|
2
|
+
module Sequence
|
3
|
+
class AA < String
|
4
|
+
CHARGED_GROUPS = ['K', 'R', 'H', 'D', 'E', 'C', 'Y']
|
5
|
+
|
6
|
+
def initialize(sequence)
|
7
|
+
raise ArgumentError.new("sequence is required") if sequence.nil? || sequence.strip == ''
|
8
|
+
super(sequence.upcase.gsub(/\s/, ''))
|
9
|
+
end
|
10
|
+
|
11
|
+
# Calculate the Isoelectric Point
|
12
|
+
# pka_name_or_set: the name of a PKA set or a custom PKA set
|
13
|
+
# places: specify the number of decimal places the value should be rounded to.
|
14
|
+
# loop_limit: how man iterations should be made to find the point. You should not need to tweak this.
|
15
|
+
def calculate_iep(pka_name_or_set = 'dtaselect', places = 2, loop_limit = 100)
|
16
|
+
loops = 0
|
17
|
+
ph = 7.5
|
18
|
+
step = 3.5
|
19
|
+
begin
|
20
|
+
current_charge = calculate_charge_at(ph, pka_name_or_set)
|
21
|
+
if current_charge > 0
|
22
|
+
ph += step
|
23
|
+
else
|
24
|
+
ph -= step
|
25
|
+
end
|
26
|
+
step /= 2.0
|
27
|
+
loops += 1
|
28
|
+
raise "Could not find a result within #{loop_limit} loops using #{pka_name_or_set.inspect}" if loops == loop_limit
|
29
|
+
end while not iep_reached?(current_charge)
|
30
|
+
ph.round_to_places(places)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Calculate the charge of the sequence at a given ph
|
34
|
+
# As a second argument you can pass the name of the PKA set or a custom PKA set
|
35
|
+
def calculate_charge_at(ph, pka_name_or_set = 'dtaselect')
|
36
|
+
['K', 'R', 'H'].inject(partial_charge(select_pka(pka_name_or_set)['N_TERMINUS'], ph)) do |memo, item|
|
37
|
+
memo += partial_charge(select_pka(pka_name_or_set)[item], ph) * charged_residue_frequencies[item]
|
38
|
+
end -
|
39
|
+
['D', 'E', 'C', 'Y'].inject(partial_charge(ph, select_pka(pka_name_or_set)['C_TERMINUS'])) do |memo, item|
|
40
|
+
memo += partial_charge(ph, select_pka(pka_name_or_set)[item]) * charged_residue_frequencies[item]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def iep_reached?(current_charge)
|
46
|
+
current_charge =! nil && 0.0.round_to_places(5) == current_charge.round_to_places(5)
|
47
|
+
end
|
48
|
+
|
49
|
+
def charged_residue_frequencies
|
50
|
+
@charged_residue_frequency ||= calculate_charged_residue_frequencies
|
51
|
+
end
|
52
|
+
|
53
|
+
def partial_charge(a, b)
|
54
|
+
x = 10 ** (a - b)
|
55
|
+
x / (x + 1).to_f
|
56
|
+
end
|
57
|
+
|
58
|
+
# Count the occurences of the charged groups in the AA.
|
59
|
+
# Returns a Hash where the key is the group and the value is the number of
|
60
|
+
# occurences in self.
|
61
|
+
def calculate_charged_residue_frequencies
|
62
|
+
CHARGED_GROUPS.inject(Hash.new(0)) do |memo, item|
|
63
|
+
memo[item] = self.count(item)
|
64
|
+
memo
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
#
|
69
|
+
# Select a PKA set according to the name or supply a custom set.
|
70
|
+
# Raises ArgumentError if the name can not be mapped to a PKA set.
|
71
|
+
# If the argument is a String it is used as a key to lookup the set,
|
72
|
+
# if it's a Hash then it's assumed a custom set has been supplied.
|
73
|
+
def select_pka(pka_name_or_set = 'dtaselect')
|
74
|
+
if pka_name_or_set.is_a?(Hash)
|
75
|
+
pka_name_or_set
|
76
|
+
else
|
77
|
+
set = PkaData::PKAS[pka_name_or_set]
|
78
|
+
raise ArgumentError.new("Set '#{pka_name_or_set}' is unknown. Please specify one of #{PkaData::PKAS.keys.join(', ')} or pass a custom set") unless set
|
79
|
+
set
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Bio
|
2
|
+
module Sequence
|
3
|
+
module PkaData
|
4
|
+
PKAS = {
|
5
|
+
'dtaselect' => { 'N_TERMINUS' => 8.0,
|
6
|
+
'K' => 10.0,
|
7
|
+
'R' => 12.0,
|
8
|
+
'H' => 6.5,
|
9
|
+
'C_TERMINUS' => 3.1,
|
10
|
+
'D' => 4.4,
|
11
|
+
'E' => 4.4,
|
12
|
+
'C' => 8.5,
|
13
|
+
'Y' => 10.0
|
14
|
+
}.freeze,
|
15
|
+
'emboss' => { 'N_TERMINUS' => 8.0,
|
16
|
+
'K' => 10.0,
|
17
|
+
'R' => 12.0,
|
18
|
+
'H' => 6.5,
|
19
|
+
'C_TERMINUS' => 3.1,
|
20
|
+
'D' => 4.4,
|
21
|
+
'E' => 4.4,
|
22
|
+
'C' => 8.5,
|
23
|
+
'Y' => 10.0
|
24
|
+
}.freeze,
|
25
|
+
'rodwell' => { 'N_TERMINUS' => 8.0,
|
26
|
+
'K' => 11.5,
|
27
|
+
'R' => 11.5,
|
28
|
+
'H' => 6.0,
|
29
|
+
'C_TERMINUS' => 3.1,
|
30
|
+
'D' => 3.68,
|
31
|
+
'E' => 4.25,
|
32
|
+
'C' => 8.33,
|
33
|
+
'Y' => 10.07
|
34
|
+
}.freeze,
|
35
|
+
'wikipedia' => { 'N_TERMINUS' => 8.2,
|
36
|
+
'K' => 10.54,
|
37
|
+
'R' => 12.48,
|
38
|
+
'H' => 6.04,
|
39
|
+
'C_TERMINUS' => 3.65,
|
40
|
+
'D' => 3.9,
|
41
|
+
'E' => 4.07,
|
42
|
+
'C' => 8.18,
|
43
|
+
'Y' => 10.47
|
44
|
+
}.freeze,
|
45
|
+
'silerio' => { 'N_TERMINUS' => 8.2,
|
46
|
+
'K' => 10.4,
|
47
|
+
'R' => 12.0,
|
48
|
+
'H' => 6.4,
|
49
|
+
'C_TERMINUS' => 3.2,
|
50
|
+
'D' => 4.0,
|
51
|
+
'E' => 4.5,
|
52
|
+
'C' => 9.0,
|
53
|
+
'Y' => 10.0
|
54
|
+
}.freeze
|
55
|
+
}.freeze
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
data/lib/isoelectric_point.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'test_helper'
|
2
|
-
include
|
3
|
-
class
|
2
|
+
include Bio::Sequence
|
3
|
+
class AATest < Test::Unit::TestCase
|
4
4
|
|
5
5
|
context 'some known sequences' do
|
6
6
|
setup do
|
@@ -23,10 +23,10 @@ class SequenceTest < Test::Unit::TestCase
|
|
23
23
|
MMS' => 8.30908203125
|
24
24
|
}
|
25
25
|
end
|
26
|
-
should 'calculate' do
|
26
|
+
should 'calculate iep' do
|
27
27
|
places = 2
|
28
28
|
@known.each do |sequence, expected|
|
29
|
-
actual =
|
29
|
+
actual = AA.new(sequence).calculate_iep('dtaselect', places)
|
30
30
|
assert_equal expected.round_to_places(places), actual, "Expected the iep to be #{expected} but was #{actual} for #{sequence}"
|
31
31
|
end
|
32
32
|
end
|
@@ -34,33 +34,65 @@ class SequenceTest < Test::Unit::TestCase
|
|
34
34
|
|
35
35
|
should "Raise if not sequence given" do
|
36
36
|
assert_raise ArgumentError do
|
37
|
-
|
37
|
+
AA.new(nil)
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
41
|
should "Raise if empty sequence given" do
|
42
42
|
assert_raise ArgumentError do
|
43
|
-
|
43
|
+
AA.new(' ')
|
44
44
|
end
|
45
45
|
end
|
46
46
|
|
47
47
|
|
48
48
|
should "Raise if unknown pks used" do
|
49
49
|
assert_raise ArgumentError do
|
50
|
-
|
50
|
+
AA.new('PG', 'youdontknowme')
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
54
|
context "a Sequence" do
|
55
55
|
setup do
|
56
|
-
@sequence =
|
56
|
+
@sequence = AA.new("PGAKAAAKKPKKAAG")
|
57
57
|
end
|
58
58
|
|
59
59
|
should "calculates the isolectric point to 0 places" do
|
60
|
-
assert_equal 11, @sequence.calculate_iep(0)
|
60
|
+
assert_equal 11, @sequence.calculate_iep('dtaselect', 0)
|
61
61
|
end
|
62
62
|
should "calculates the isolectric pointto 3 places" do
|
63
|
-
assert_equal 10.603, @sequence.calculate_iep(3)
|
63
|
+
assert_equal 10.603, @sequence.calculate_iep('dtaselect', 3)
|
64
|
+
end
|
65
|
+
|
66
|
+
context 'use a custom pka set' do
|
67
|
+
setup do
|
68
|
+
@custom = { "N_TERMINUS" => 8.0,
|
69
|
+
"K" => 9.5, # changed from dta_select where it is 10.0
|
70
|
+
"R" => 12.0,
|
71
|
+
"H" => 6.5,
|
72
|
+
"C_TERMINUS" => 3.1,
|
73
|
+
"D" => 4.4,
|
74
|
+
"E" => 4.4,
|
75
|
+
"C" => 8.5,
|
76
|
+
"Y" => 10.1
|
77
|
+
}
|
78
|
+
end
|
79
|
+
should 'accept a custom pka set and use it for calculation' do
|
80
|
+
assert_equal 10.106, @sequence.calculate_iep(@custom, 3)
|
81
|
+
end
|
82
|
+
|
83
|
+
should 'raise when no result can be found due to a invalid set' do
|
84
|
+
@custom['K'] = 20
|
85
|
+
assert_raises RuntimeError do
|
86
|
+
@sequence.calculate_iep(@custom, 3)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
context 'use another pka set' do
|
91
|
+
should 'work with all provided sets without raising' do
|
92
|
+
Bio::Sequence::PkaData::PKAS.keys.each do |key|
|
93
|
+
@sequence.calculate_iep(key, 3, 25)
|
94
|
+
end
|
95
|
+
end
|
64
96
|
end
|
65
97
|
end
|
66
98
|
end
|
metadata
CHANGED
@@ -1,22 +1,22 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isoelectric_point
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
|
-
-
|
13
|
+
- GeorgeG
|
14
14
|
- pascalbetz
|
15
15
|
autorequire:
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2010-10-
|
19
|
+
date: 2010-10-07 00:00:00 +02:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
@@ -60,14 +60,14 @@ files:
|
|
60
60
|
- LICENSE
|
61
61
|
- README.rdoc
|
62
62
|
- lib/isoelectric_point.rb
|
63
|
-
- lib/isoelectric_point/
|
63
|
+
- lib/isoelectric_point/pka_data.rb
|
64
64
|
- lib/isoelectric_point/extensions.rb
|
65
|
-
- lib/isoelectric_point/
|
66
|
-
- test/
|
65
|
+
- lib/isoelectric_point/aa.rb
|
66
|
+
- test/aa_test.rb
|
67
67
|
- test/extensions_test.rb
|
68
68
|
- test/test_helper.rb
|
69
69
|
has_rdoc: true
|
70
|
-
homepage: http://github.com/
|
70
|
+
homepage: http://github.com/GeorgeG/isoelectric_point_4_R
|
71
71
|
licenses: []
|
72
72
|
|
73
73
|
post_install_message:
|
@@ -99,8 +99,8 @@ rubyforge_project:
|
|
99
99
|
rubygems_version: 1.3.7
|
100
100
|
signing_key:
|
101
101
|
specification_version: 3
|
102
|
-
summary: Calculate isoelectric point.
|
102
|
+
summary: Calculate isoelectric point.
|
103
103
|
test_files:
|
104
|
-
- test/
|
104
|
+
- test/aa_test.rb
|
105
105
|
- test/extensions_test.rb
|
106
106
|
- test/test_helper.rb
|
@@ -1,56 +0,0 @@
|
|
1
|
-
module IsoelectricPoint
|
2
|
-
module Data
|
3
|
-
PKAS = {
|
4
|
-
'dtaselect' => { "N_TERMINUS" => 8.0,
|
5
|
-
"K" => 10.0,
|
6
|
-
"R" => 12.0,
|
7
|
-
"H" => 6.5,
|
8
|
-
"C_TERMINUS" => 3.1,
|
9
|
-
"D" => 4.4,
|
10
|
-
"E" => 4.4,
|
11
|
-
"C" => 8.5,
|
12
|
-
"Y" => 10.0
|
13
|
-
}.freeze,
|
14
|
-
'emboss' => { "N_TERMINUS" => 8.0,
|
15
|
-
"K" => 10.0,
|
16
|
-
"R" => 12.0,
|
17
|
-
"H" => 6.5,
|
18
|
-
"C_TERMINUS" => 3.1,
|
19
|
-
"D" => 4.4,
|
20
|
-
"E" => 4.4,
|
21
|
-
"C" => 8.5,
|
22
|
-
"Y" => 10.0
|
23
|
-
}.freeze,
|
24
|
-
'rodwell' => { "N_TERMINUS" => 8.0,
|
25
|
-
"K" => 11.5,
|
26
|
-
"R" => 11.5,
|
27
|
-
"H" => 6.0,
|
28
|
-
"C_TERMINUS" => 3.1,
|
29
|
-
"D" => 3.68,
|
30
|
-
"E" => 4.25,
|
31
|
-
"C" => 8.33,
|
32
|
-
"Y" => 10.07
|
33
|
-
}.freeze,
|
34
|
-
'wikipedia' => { "N_TERMINUS" => 8.2,
|
35
|
-
"K" => 10.54,
|
36
|
-
"R" => 12.48,
|
37
|
-
"H" => 6.04,
|
38
|
-
"C_TERMINUS" => 3.65,
|
39
|
-
"D" => 3.9,
|
40
|
-
"E" => 4.07,
|
41
|
-
"C" => 8.18,
|
42
|
-
"Y" => 10.47
|
43
|
-
}.freeze,
|
44
|
-
'silerio' => { "N_TERMINUS" => 8.2,
|
45
|
-
"K" => 10.4,
|
46
|
-
"R" => 12.0,
|
47
|
-
"H" => 6.4,
|
48
|
-
"C_TERMINUS" => 3.2,
|
49
|
-
"D" => 4.0,
|
50
|
-
"E" => 4.5,
|
51
|
-
"C" => 9.0,
|
52
|
-
"Y" => 10.0
|
53
|
-
}.freeze
|
54
|
-
}.freeze
|
55
|
-
end
|
56
|
-
end
|
@@ -1,67 +0,0 @@
|
|
1
|
-
#calculates the isoelectric point of a given protein sequence
|
2
|
-
module IsoelectricPoint
|
3
|
-
class Sequence
|
4
|
-
CHARGED_GROUPS = %w{K R H D E C Y}
|
5
|
-
KEYS_PLUS = ['K', 'R', 'H']
|
6
|
-
KEYS_MINUS = ['D', 'E', 'C', 'Y']
|
7
|
-
|
8
|
-
attr_accessor :value
|
9
|
-
attr_reader :pks
|
10
|
-
|
11
|
-
def initialize(sequence, pka_set_name = 'dtaselect')
|
12
|
-
raise ArgumentError.new("pka_set_name is required") if pka_set_name.nil? || pka_set_name.strip == ''
|
13
|
-
raise ArgumentError.new("sequence is required") if sequence.nil? || sequence.strip == ''
|
14
|
-
@pks = Data::PKAS[pka_set_name]
|
15
|
-
@value = sequence.upcase.gsub(/\s/, '')
|
16
|
-
raise ArgumentError.new("pka_set '#{pka_set_name}' is unknown. Please specify one of #{Data::PKAS.keys.join(', ')}") unless self.pks
|
17
|
-
end
|
18
|
-
|
19
|
-
def calculate_iep(places = 2)
|
20
|
-
precission = 15
|
21
|
-
ph = 7.5
|
22
|
-
step = 3.5
|
23
|
-
target_charge = 0.0
|
24
|
-
begin
|
25
|
-
current_charge = calculate_charge_at(ph)
|
26
|
-
if current_charge > 0
|
27
|
-
ph += step
|
28
|
-
else
|
29
|
-
ph -= step
|
30
|
-
end
|
31
|
-
step /= 2.0
|
32
|
-
#puts "#{self.value}: %.10f / %.10f / %.10f : direction was #{current_charge > 0 ? '+' : '-'}" % [current_charge, step, ph]
|
33
|
-
#sleep 0.1
|
34
|
-
end while current_charge == nil || target_charge.round_to_places(precission) != current_charge.round_to_places(precission)
|
35
|
-
ph.round_to_places(places)
|
36
|
-
end
|
37
|
-
|
38
|
-
|
39
|
-
def calculate_charge_at(ph)
|
40
|
-
KEYS_PLUS.inject(partial_charge(pks['N_TERMINUS'], ph)) do |memo, item|
|
41
|
-
memo += partial_charge(self.pks[item], ph) * charged_residue_frequencies[item]
|
42
|
-
end -
|
43
|
-
KEYS_MINUS.inject(partial_charge(ph, pks['C_TERMINUS'])) do |memo, item|
|
44
|
-
memo += partial_charge(ph, self.pks[item]) * charged_residue_frequencies[item]
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
private
|
49
|
-
|
50
|
-
def charged_residue_frequencies
|
51
|
-
@charged_residue_frequency ||= calculate_charged_residue_frequencies
|
52
|
-
end
|
53
|
-
|
54
|
-
def partial_charge(a, b)
|
55
|
-
x = 10 ** (a - b)
|
56
|
-
x / (x + 1).to_f
|
57
|
-
end
|
58
|
-
|
59
|
-
def calculate_charged_residue_frequencies
|
60
|
-
CHARGED_GROUPS.inject(Hash.new(0)) do |memo, item|
|
61
|
-
memo[item] = self.value.count(item)
|
62
|
-
memo
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
end
|
67
|
-
end
|