isoelectric_point 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +28 -15
- data/lib/isoelectric_point/aa.rb +84 -0
- data/lib/isoelectric_point/pka_data.rb +58 -0
- data/lib/isoelectric_point.rb +1 -1
- data/test/{sequence_test.rb → aa_test.rb} +42 -10
- metadata +11 -11
- data/lib/isoelectric_point/data.rb +0 -56
- data/lib/isoelectric_point/sequence.rb +0 -67
data/README.rdoc
CHANGED
@@ -1,26 +1,39 @@
|
|
1
|
-
|
2
|
-
A ruby class for calculating the isoelectric point of a protein. It's based on bioperl
|
1
|
+
= isoelectric_point
|
2
|
+
A ruby class for calculating the isoelectric point of a protein. It's based on bioperl.
|
3
|
+
It's currently beeing refactored to fit into bioruby.
|
3
4
|
|
4
|
-
|
5
|
+
== PKA Sets
|
5
6
|
The available pka sets are:
|
6
|
-
*
|
7
|
-
*
|
8
|
-
*
|
9
|
-
*
|
10
|
-
*
|
7
|
+
* dta_select
|
8
|
+
* emboss
|
9
|
+
* rodwell
|
10
|
+
* wikipedia
|
11
|
+
* sillero
|
11
12
|
|
12
|
-
|
13
|
+
== Installation
|
13
14
|
gem install isoelectric_point
|
14
15
|
|
15
|
-
|
16
|
+
== Usage
|
16
17
|
require 'isoelectric_point'
|
17
18
|
# Either prefix or include namespace
|
18
|
-
include
|
19
|
-
|
20
|
-
sequence = Sequence.new("KKGFTCGELA")
|
19
|
+
include Bio::Sequence
|
20
|
+
aa = AA.new("KKGFTCGELA")
|
21
21
|
|
22
22
|
#what is the protein charge at ph 14?
|
23
|
-
charge =
|
23
|
+
charge = aa.calculate_charge_at(14)
|
24
24
|
|
25
25
|
#calculate the ph and round off to 7 decimal places
|
26
|
-
ph =
|
26
|
+
ph = aa.calculate_iep(7)
|
27
|
+
|
28
|
+
# calculate the ph with a custom set
|
29
|
+
custom = { "N_TERMINUS" => 8.1,
|
30
|
+
"K" => 10.1,
|
31
|
+
"R" => 12.1,
|
32
|
+
"H" => 6.4,
|
33
|
+
"C_TERMINUS" => 3.15,
|
34
|
+
"D" => 4.34,
|
35
|
+
"E" => 4.33,
|
36
|
+
"C" => 8.33,
|
37
|
+
"Y" => 9.5
|
38
|
+
}
|
39
|
+
ph = aa.calculate_iep(7, custom)
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Bio
|
2
|
+
module Sequence
|
3
|
+
class AA < String
|
4
|
+
CHARGED_GROUPS = ['K', 'R', 'H', 'D', 'E', 'C', 'Y']
|
5
|
+
|
6
|
+
def initialize(sequence)
|
7
|
+
raise ArgumentError.new("sequence is required") if sequence.nil? || sequence.strip == ''
|
8
|
+
super(sequence.upcase.gsub(/\s/, ''))
|
9
|
+
end
|
10
|
+
|
11
|
+
# Calculate the Isoelectric Point
|
12
|
+
# pka_name_or_set: the name of a PKA set or a custom PKA set
|
13
|
+
# places: specify the number of decimal places the value should be rounded to.
|
14
|
+
# loop_limit: how man iterations should be made to find the point. You should not need to tweak this.
|
15
|
+
def calculate_iep(pka_name_or_set = 'dtaselect', places = 2, loop_limit = 100)
|
16
|
+
loops = 0
|
17
|
+
ph = 7.5
|
18
|
+
step = 3.5
|
19
|
+
begin
|
20
|
+
current_charge = calculate_charge_at(ph, pka_name_or_set)
|
21
|
+
if current_charge > 0
|
22
|
+
ph += step
|
23
|
+
else
|
24
|
+
ph -= step
|
25
|
+
end
|
26
|
+
step /= 2.0
|
27
|
+
loops += 1
|
28
|
+
raise "Could not find a result within #{loop_limit} loops using #{pka_name_or_set.inspect}" if loops == loop_limit
|
29
|
+
end while not iep_reached?(current_charge)
|
30
|
+
ph.round_to_places(places)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Calculate the charge of the sequence at a given ph
|
34
|
+
# As a second argument you can pass the name of the PKA set or a custom PKA set
|
35
|
+
def calculate_charge_at(ph, pka_name_or_set = 'dtaselect')
|
36
|
+
['K', 'R', 'H'].inject(partial_charge(select_pka(pka_name_or_set)['N_TERMINUS'], ph)) do |memo, item|
|
37
|
+
memo += partial_charge(select_pka(pka_name_or_set)[item], ph) * charged_residue_frequencies[item]
|
38
|
+
end -
|
39
|
+
['D', 'E', 'C', 'Y'].inject(partial_charge(ph, select_pka(pka_name_or_set)['C_TERMINUS'])) do |memo, item|
|
40
|
+
memo += partial_charge(ph, select_pka(pka_name_or_set)[item]) * charged_residue_frequencies[item]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def iep_reached?(current_charge)
|
46
|
+
current_charge =! nil && 0.0.round_to_places(5) == current_charge.round_to_places(5)
|
47
|
+
end
|
48
|
+
|
49
|
+
def charged_residue_frequencies
|
50
|
+
@charged_residue_frequency ||= calculate_charged_residue_frequencies
|
51
|
+
end
|
52
|
+
|
53
|
+
def partial_charge(a, b)
|
54
|
+
x = 10 ** (a - b)
|
55
|
+
x / (x + 1).to_f
|
56
|
+
end
|
57
|
+
|
58
|
+
# Count the occurences of the charged groups in the AA.
|
59
|
+
# Returns a Hash where the key is the group and the value is the number of
|
60
|
+
# occurences in self.
|
61
|
+
def calculate_charged_residue_frequencies
|
62
|
+
CHARGED_GROUPS.inject(Hash.new(0)) do |memo, item|
|
63
|
+
memo[item] = self.count(item)
|
64
|
+
memo
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
#
|
69
|
+
# Select a PKA set according to the name or supply a custom set.
|
70
|
+
# Raises ArgumentError if the name can not be mapped to a PKA set.
|
71
|
+
# If the argument is a String it is used as a key to lookup the set,
|
72
|
+
# if it's a Hash then it's assumed a custom set has been supplied.
|
73
|
+
def select_pka(pka_name_or_set = 'dtaselect')
|
74
|
+
if pka_name_or_set.is_a?(Hash)
|
75
|
+
pka_name_or_set
|
76
|
+
else
|
77
|
+
set = PkaData::PKAS[pka_name_or_set]
|
78
|
+
raise ArgumentError.new("Set '#{pka_name_or_set}' is unknown. Please specify one of #{PkaData::PKAS.keys.join(', ')} or pass a custom set") unless set
|
79
|
+
set
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Bio
|
2
|
+
module Sequence
|
3
|
+
module PkaData
|
4
|
+
PKAS = {
|
5
|
+
'dtaselect' => { 'N_TERMINUS' => 8.0,
|
6
|
+
'K' => 10.0,
|
7
|
+
'R' => 12.0,
|
8
|
+
'H' => 6.5,
|
9
|
+
'C_TERMINUS' => 3.1,
|
10
|
+
'D' => 4.4,
|
11
|
+
'E' => 4.4,
|
12
|
+
'C' => 8.5,
|
13
|
+
'Y' => 10.0
|
14
|
+
}.freeze,
|
15
|
+
'emboss' => { 'N_TERMINUS' => 8.0,
|
16
|
+
'K' => 10.0,
|
17
|
+
'R' => 12.0,
|
18
|
+
'H' => 6.5,
|
19
|
+
'C_TERMINUS' => 3.1,
|
20
|
+
'D' => 4.4,
|
21
|
+
'E' => 4.4,
|
22
|
+
'C' => 8.5,
|
23
|
+
'Y' => 10.0
|
24
|
+
}.freeze,
|
25
|
+
'rodwell' => { 'N_TERMINUS' => 8.0,
|
26
|
+
'K' => 11.5,
|
27
|
+
'R' => 11.5,
|
28
|
+
'H' => 6.0,
|
29
|
+
'C_TERMINUS' => 3.1,
|
30
|
+
'D' => 3.68,
|
31
|
+
'E' => 4.25,
|
32
|
+
'C' => 8.33,
|
33
|
+
'Y' => 10.07
|
34
|
+
}.freeze,
|
35
|
+
'wikipedia' => { 'N_TERMINUS' => 8.2,
|
36
|
+
'K' => 10.54,
|
37
|
+
'R' => 12.48,
|
38
|
+
'H' => 6.04,
|
39
|
+
'C_TERMINUS' => 3.65,
|
40
|
+
'D' => 3.9,
|
41
|
+
'E' => 4.07,
|
42
|
+
'C' => 8.18,
|
43
|
+
'Y' => 10.47
|
44
|
+
}.freeze,
|
45
|
+
'silerio' => { 'N_TERMINUS' => 8.2,
|
46
|
+
'K' => 10.4,
|
47
|
+
'R' => 12.0,
|
48
|
+
'H' => 6.4,
|
49
|
+
'C_TERMINUS' => 3.2,
|
50
|
+
'D' => 4.0,
|
51
|
+
'E' => 4.5,
|
52
|
+
'C' => 9.0,
|
53
|
+
'Y' => 10.0
|
54
|
+
}.freeze
|
55
|
+
}.freeze
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
data/lib/isoelectric_point.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'test_helper'
|
2
|
-
include
|
3
|
-
class
|
2
|
+
include Bio::Sequence
|
3
|
+
class AATest < Test::Unit::TestCase
|
4
4
|
|
5
5
|
context 'some known sequences' do
|
6
6
|
setup do
|
@@ -23,10 +23,10 @@ class SequenceTest < Test::Unit::TestCase
|
|
23
23
|
MMS' => 8.30908203125
|
24
24
|
}
|
25
25
|
end
|
26
|
-
should 'calculate' do
|
26
|
+
should 'calculate iep' do
|
27
27
|
places = 2
|
28
28
|
@known.each do |sequence, expected|
|
29
|
-
actual =
|
29
|
+
actual = AA.new(sequence).calculate_iep('dtaselect', places)
|
30
30
|
assert_equal expected.round_to_places(places), actual, "Expected the iep to be #{expected} but was #{actual} for #{sequence}"
|
31
31
|
end
|
32
32
|
end
|
@@ -34,33 +34,65 @@ class SequenceTest < Test::Unit::TestCase
|
|
34
34
|
|
35
35
|
should "Raise if not sequence given" do
|
36
36
|
assert_raise ArgumentError do
|
37
|
-
|
37
|
+
AA.new(nil)
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
41
|
should "Raise if empty sequence given" do
|
42
42
|
assert_raise ArgumentError do
|
43
|
-
|
43
|
+
AA.new(' ')
|
44
44
|
end
|
45
45
|
end
|
46
46
|
|
47
47
|
|
48
48
|
should "Raise if unknown pks used" do
|
49
49
|
assert_raise ArgumentError do
|
50
|
-
|
50
|
+
AA.new('PG', 'youdontknowme')
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
54
|
context "a Sequence" do
|
55
55
|
setup do
|
56
|
-
@sequence =
|
56
|
+
@sequence = AA.new("PGAKAAAKKPKKAAG")
|
57
57
|
end
|
58
58
|
|
59
59
|
should "calculates the isolectric point to 0 places" do
|
60
|
-
assert_equal 11, @sequence.calculate_iep(0)
|
60
|
+
assert_equal 11, @sequence.calculate_iep('dtaselect', 0)
|
61
61
|
end
|
62
62
|
should "calculates the isolectric pointto 3 places" do
|
63
|
-
assert_equal 10.603, @sequence.calculate_iep(3)
|
63
|
+
assert_equal 10.603, @sequence.calculate_iep('dtaselect', 3)
|
64
|
+
end
|
65
|
+
|
66
|
+
context 'use a custom pka set' do
|
67
|
+
setup do
|
68
|
+
@custom = { "N_TERMINUS" => 8.0,
|
69
|
+
"K" => 9.5, # changed from dta_select where it is 10.0
|
70
|
+
"R" => 12.0,
|
71
|
+
"H" => 6.5,
|
72
|
+
"C_TERMINUS" => 3.1,
|
73
|
+
"D" => 4.4,
|
74
|
+
"E" => 4.4,
|
75
|
+
"C" => 8.5,
|
76
|
+
"Y" => 10.1
|
77
|
+
}
|
78
|
+
end
|
79
|
+
should 'accept a custom pka set and use it for calculation' do
|
80
|
+
assert_equal 10.106, @sequence.calculate_iep(@custom, 3)
|
81
|
+
end
|
82
|
+
|
83
|
+
should 'raise when no result can be found due to a invalid set' do
|
84
|
+
@custom['K'] = 20
|
85
|
+
assert_raises RuntimeError do
|
86
|
+
@sequence.calculate_iep(@custom, 3)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
context 'use another pka set' do
|
91
|
+
should 'work with all provided sets without raising' do
|
92
|
+
Bio::Sequence::PkaData::PKAS.keys.each do |key|
|
93
|
+
@sequence.calculate_iep(key, 3, 25)
|
94
|
+
end
|
95
|
+
end
|
64
96
|
end
|
65
97
|
end
|
66
98
|
end
|
metadata
CHANGED
@@ -1,22 +1,22 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isoelectric_point
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
|
-
-
|
13
|
+
- GeorgeG
|
14
14
|
- pascalbetz
|
15
15
|
autorequire:
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2010-10-
|
19
|
+
date: 2010-10-07 00:00:00 +02:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
@@ -60,14 +60,14 @@ files:
|
|
60
60
|
- LICENSE
|
61
61
|
- README.rdoc
|
62
62
|
- lib/isoelectric_point.rb
|
63
|
-
- lib/isoelectric_point/
|
63
|
+
- lib/isoelectric_point/pka_data.rb
|
64
64
|
- lib/isoelectric_point/extensions.rb
|
65
|
-
- lib/isoelectric_point/
|
66
|
-
- test/
|
65
|
+
- lib/isoelectric_point/aa.rb
|
66
|
+
- test/aa_test.rb
|
67
67
|
- test/extensions_test.rb
|
68
68
|
- test/test_helper.rb
|
69
69
|
has_rdoc: true
|
70
|
-
homepage: http://github.com/
|
70
|
+
homepage: http://github.com/GeorgeG/isoelectric_point_4_R
|
71
71
|
licenses: []
|
72
72
|
|
73
73
|
post_install_message:
|
@@ -99,8 +99,8 @@ rubyforge_project:
|
|
99
99
|
rubygems_version: 1.3.7
|
100
100
|
signing_key:
|
101
101
|
specification_version: 3
|
102
|
-
summary: Calculate isoelectric point.
|
102
|
+
summary: Calculate isoelectric point.
|
103
103
|
test_files:
|
104
|
-
- test/
|
104
|
+
- test/aa_test.rb
|
105
105
|
- test/extensions_test.rb
|
106
106
|
- test/test_helper.rb
|
@@ -1,56 +0,0 @@
|
|
1
|
-
module IsoelectricPoint
|
2
|
-
module Data
|
3
|
-
PKAS = {
|
4
|
-
'dtaselect' => { "N_TERMINUS" => 8.0,
|
5
|
-
"K" => 10.0,
|
6
|
-
"R" => 12.0,
|
7
|
-
"H" => 6.5,
|
8
|
-
"C_TERMINUS" => 3.1,
|
9
|
-
"D" => 4.4,
|
10
|
-
"E" => 4.4,
|
11
|
-
"C" => 8.5,
|
12
|
-
"Y" => 10.0
|
13
|
-
}.freeze,
|
14
|
-
'emboss' => { "N_TERMINUS" => 8.0,
|
15
|
-
"K" => 10.0,
|
16
|
-
"R" => 12.0,
|
17
|
-
"H" => 6.5,
|
18
|
-
"C_TERMINUS" => 3.1,
|
19
|
-
"D" => 4.4,
|
20
|
-
"E" => 4.4,
|
21
|
-
"C" => 8.5,
|
22
|
-
"Y" => 10.0
|
23
|
-
}.freeze,
|
24
|
-
'rodwell' => { "N_TERMINUS" => 8.0,
|
25
|
-
"K" => 11.5,
|
26
|
-
"R" => 11.5,
|
27
|
-
"H" => 6.0,
|
28
|
-
"C_TERMINUS" => 3.1,
|
29
|
-
"D" => 3.68,
|
30
|
-
"E" => 4.25,
|
31
|
-
"C" => 8.33,
|
32
|
-
"Y" => 10.07
|
33
|
-
}.freeze,
|
34
|
-
'wikipedia' => { "N_TERMINUS" => 8.2,
|
35
|
-
"K" => 10.54,
|
36
|
-
"R" => 12.48,
|
37
|
-
"H" => 6.04,
|
38
|
-
"C_TERMINUS" => 3.65,
|
39
|
-
"D" => 3.9,
|
40
|
-
"E" => 4.07,
|
41
|
-
"C" => 8.18,
|
42
|
-
"Y" => 10.47
|
43
|
-
}.freeze,
|
44
|
-
'silerio' => { "N_TERMINUS" => 8.2,
|
45
|
-
"K" => 10.4,
|
46
|
-
"R" => 12.0,
|
47
|
-
"H" => 6.4,
|
48
|
-
"C_TERMINUS" => 3.2,
|
49
|
-
"D" => 4.0,
|
50
|
-
"E" => 4.5,
|
51
|
-
"C" => 9.0,
|
52
|
-
"Y" => 10.0
|
53
|
-
}.freeze
|
54
|
-
}.freeze
|
55
|
-
end
|
56
|
-
end
|
@@ -1,67 +0,0 @@
|
|
1
|
-
#calculates the isoelectric point of a given protein sequence
|
2
|
-
module IsoelectricPoint
|
3
|
-
class Sequence
|
4
|
-
CHARGED_GROUPS = %w{K R H D E C Y}
|
5
|
-
KEYS_PLUS = ['K', 'R', 'H']
|
6
|
-
KEYS_MINUS = ['D', 'E', 'C', 'Y']
|
7
|
-
|
8
|
-
attr_accessor :value
|
9
|
-
attr_reader :pks
|
10
|
-
|
11
|
-
def initialize(sequence, pka_set_name = 'dtaselect')
|
12
|
-
raise ArgumentError.new("pka_set_name is required") if pka_set_name.nil? || pka_set_name.strip == ''
|
13
|
-
raise ArgumentError.new("sequence is required") if sequence.nil? || sequence.strip == ''
|
14
|
-
@pks = Data::PKAS[pka_set_name]
|
15
|
-
@value = sequence.upcase.gsub(/\s/, '')
|
16
|
-
raise ArgumentError.new("pka_set '#{pka_set_name}' is unknown. Please specify one of #{Data::PKAS.keys.join(', ')}") unless self.pks
|
17
|
-
end
|
18
|
-
|
19
|
-
def calculate_iep(places = 2)
|
20
|
-
precission = 15
|
21
|
-
ph = 7.5
|
22
|
-
step = 3.5
|
23
|
-
target_charge = 0.0
|
24
|
-
begin
|
25
|
-
current_charge = calculate_charge_at(ph)
|
26
|
-
if current_charge > 0
|
27
|
-
ph += step
|
28
|
-
else
|
29
|
-
ph -= step
|
30
|
-
end
|
31
|
-
step /= 2.0
|
32
|
-
#puts "#{self.value}: %.10f / %.10f / %.10f : direction was #{current_charge > 0 ? '+' : '-'}" % [current_charge, step, ph]
|
33
|
-
#sleep 0.1
|
34
|
-
end while current_charge == nil || target_charge.round_to_places(precission) != current_charge.round_to_places(precission)
|
35
|
-
ph.round_to_places(places)
|
36
|
-
end
|
37
|
-
|
38
|
-
|
39
|
-
def calculate_charge_at(ph)
|
40
|
-
KEYS_PLUS.inject(partial_charge(pks['N_TERMINUS'], ph)) do |memo, item|
|
41
|
-
memo += partial_charge(self.pks[item], ph) * charged_residue_frequencies[item]
|
42
|
-
end -
|
43
|
-
KEYS_MINUS.inject(partial_charge(ph, pks['C_TERMINUS'])) do |memo, item|
|
44
|
-
memo += partial_charge(ph, self.pks[item]) * charged_residue_frequencies[item]
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
private
|
49
|
-
|
50
|
-
def charged_residue_frequencies
|
51
|
-
@charged_residue_frequency ||= calculate_charged_residue_frequencies
|
52
|
-
end
|
53
|
-
|
54
|
-
def partial_charge(a, b)
|
55
|
-
x = 10 ** (a - b)
|
56
|
-
x / (x + 1).to_f
|
57
|
-
end
|
58
|
-
|
59
|
-
def calculate_charged_residue_frequencies
|
60
|
-
CHARGED_GROUPS.inject(Hash.new(0)) do |memo, item|
|
61
|
-
memo[item] = self.value.count(item)
|
62
|
-
memo
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
end
|
67
|
-
end
|