isoelectric_point 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -1,26 +1,39 @@
1
- == isoelectric_point
2
- A ruby class for calculating the isoelectric point of a protein. It's based on bioperl and the library from GeorgeG.
1
+ = isoelectric_point
2
+ A ruby class for calculating the isoelectric point of a protein. It's based on bioperl.
3
+ It's currently beeing refactored to fit into bioruby.
3
4
 
4
- = PKA Sets
5
+ == PKA Sets
5
6
  The available pka sets are:
6
- * dta_select_pkas
7
- * emboss_pkas
8
- * rodwell_pkas
9
- * wikipedia_pkas
10
- * sillero_pkas
7
+ * dta_select
8
+ * emboss
9
+ * rodwell
10
+ * wikipedia
11
+ * sillero
11
12
 
12
- = Installation
13
+ == Installation
13
14
  gem install isoelectric_point
14
15
 
15
- = Usage
16
+ == Usage
16
17
  require 'isoelectric_point'
17
18
  # Either prefix or include namespace
18
- include IsoelectricPoint
19
- #initialize a pka clculator object with dtaselect pkas and the KKGFTCGELA as an amino acid sequence
20
- sequence = Sequence.new("KKGFTCGELA")
19
+ include Bio::Sequence
20
+ aa = AA.new("KKGFTCGELA")
21
21
 
22
22
  #what is the protein charge at ph 14?
23
- charge = sequence.calculate_charge_at(14)
23
+ charge = aa.calculate_charge_at(14)
24
24
 
25
25
  #calculate the ph and round off to 7 decimal places
26
- ph = sequence.calculate_iep(7)
26
+ ph = aa.calculate_iep(7)
27
+
28
+ # calculate the ph with a custom set
29
+ custom = { "N_TERMINUS" => 8.1,
30
+ "K" => 10.1,
31
+ "R" => 12.1,
32
+ "H" => 6.4,
33
+ "C_TERMINUS" => 3.15,
34
+ "D" => 4.34,
35
+ "E" => 4.33,
36
+ "C" => 8.33,
37
+ "Y" => 9.5
38
+ }
39
+ ph = aa.calculate_iep(7, custom)
@@ -0,0 +1,84 @@
1
+ module Bio
2
+ module Sequence
3
+ class AA < String
4
+ CHARGED_GROUPS = ['K', 'R', 'H', 'D', 'E', 'C', 'Y']
5
+
6
+ def initialize(sequence)
7
+ raise ArgumentError.new("sequence is required") if sequence.nil? || sequence.strip == ''
8
+ super(sequence.upcase.gsub(/\s/, ''))
9
+ end
10
+
11
+ # Calculate the Isoelectric Point
12
+ # pka_name_or_set: the name of a PKA set or a custom PKA set
13
+ # places: specify the number of decimal places the value should be rounded to.
14
+ # loop_limit: how man iterations should be made to find the point. You should not need to tweak this.
15
+ def calculate_iep(pka_name_or_set = 'dtaselect', places = 2, loop_limit = 100)
16
+ loops = 0
17
+ ph = 7.5
18
+ step = 3.5
19
+ begin
20
+ current_charge = calculate_charge_at(ph, pka_name_or_set)
21
+ if current_charge > 0
22
+ ph += step
23
+ else
24
+ ph -= step
25
+ end
26
+ step /= 2.0
27
+ loops += 1
28
+ raise "Could not find a result within #{loop_limit} loops using #{pka_name_or_set.inspect}" if loops == loop_limit
29
+ end while not iep_reached?(current_charge)
30
+ ph.round_to_places(places)
31
+ end
32
+
33
+ # Calculate the charge of the sequence at a given ph
34
+ # As a second argument you can pass the name of the PKA set or a custom PKA set
35
+ def calculate_charge_at(ph, pka_name_or_set = 'dtaselect')
36
+ ['K', 'R', 'H'].inject(partial_charge(select_pka(pka_name_or_set)['N_TERMINUS'], ph)) do |memo, item|
37
+ memo += partial_charge(select_pka(pka_name_or_set)[item], ph) * charged_residue_frequencies[item]
38
+ end -
39
+ ['D', 'E', 'C', 'Y'].inject(partial_charge(ph, select_pka(pka_name_or_set)['C_TERMINUS'])) do |memo, item|
40
+ memo += partial_charge(ph, select_pka(pka_name_or_set)[item]) * charged_residue_frequencies[item]
41
+ end
42
+ end
43
+
44
+ private
45
+ def iep_reached?(current_charge)
46
+ current_charge =! nil && 0.0.round_to_places(5) == current_charge.round_to_places(5)
47
+ end
48
+
49
+ def charged_residue_frequencies
50
+ @charged_residue_frequency ||= calculate_charged_residue_frequencies
51
+ end
52
+
53
+ def partial_charge(a, b)
54
+ x = 10 ** (a - b)
55
+ x / (x + 1).to_f
56
+ end
57
+
58
+ # Count the occurences of the charged groups in the AA.
59
+ # Returns a Hash where the key is the group and the value is the number of
60
+ # occurences in self.
61
+ def calculate_charged_residue_frequencies
62
+ CHARGED_GROUPS.inject(Hash.new(0)) do |memo, item|
63
+ memo[item] = self.count(item)
64
+ memo
65
+ end
66
+ end
67
+
68
+ #
69
+ # Select a PKA set according to the name or supply a custom set.
70
+ # Raises ArgumentError if the name can not be mapped to a PKA set.
71
+ # If the argument is a String it is used as a key to lookup the set,
72
+ # if it's a Hash then it's assumed a custom set has been supplied.
73
+ def select_pka(pka_name_or_set = 'dtaselect')
74
+ if pka_name_or_set.is_a?(Hash)
75
+ pka_name_or_set
76
+ else
77
+ set = PkaData::PKAS[pka_name_or_set]
78
+ raise ArgumentError.new("Set '#{pka_name_or_set}' is unknown. Please specify one of #{PkaData::PKAS.keys.join(', ')} or pass a custom set") unless set
79
+ set
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,58 @@
1
+ module Bio
2
+ module Sequence
3
+ module PkaData
4
+ PKAS = {
5
+ 'dtaselect' => { 'N_TERMINUS' => 8.0,
6
+ 'K' => 10.0,
7
+ 'R' => 12.0,
8
+ 'H' => 6.5,
9
+ 'C_TERMINUS' => 3.1,
10
+ 'D' => 4.4,
11
+ 'E' => 4.4,
12
+ 'C' => 8.5,
13
+ 'Y' => 10.0
14
+ }.freeze,
15
+ 'emboss' => { 'N_TERMINUS' => 8.0,
16
+ 'K' => 10.0,
17
+ 'R' => 12.0,
18
+ 'H' => 6.5,
19
+ 'C_TERMINUS' => 3.1,
20
+ 'D' => 4.4,
21
+ 'E' => 4.4,
22
+ 'C' => 8.5,
23
+ 'Y' => 10.0
24
+ }.freeze,
25
+ 'rodwell' => { 'N_TERMINUS' => 8.0,
26
+ 'K' => 11.5,
27
+ 'R' => 11.5,
28
+ 'H' => 6.0,
29
+ 'C_TERMINUS' => 3.1,
30
+ 'D' => 3.68,
31
+ 'E' => 4.25,
32
+ 'C' => 8.33,
33
+ 'Y' => 10.07
34
+ }.freeze,
35
+ 'wikipedia' => { 'N_TERMINUS' => 8.2,
36
+ 'K' => 10.54,
37
+ 'R' => 12.48,
38
+ 'H' => 6.04,
39
+ 'C_TERMINUS' => 3.65,
40
+ 'D' => 3.9,
41
+ 'E' => 4.07,
42
+ 'C' => 8.18,
43
+ 'Y' => 10.47
44
+ }.freeze,
45
+ 'silerio' => { 'N_TERMINUS' => 8.2,
46
+ 'K' => 10.4,
47
+ 'R' => 12.0,
48
+ 'H' => 6.4,
49
+ 'C_TERMINUS' => 3.2,
50
+ 'D' => 4.0,
51
+ 'E' => 4.5,
52
+ 'C' => 9.0,
53
+ 'Y' => 10.0
54
+ }.freeze
55
+ }.freeze
56
+ end
57
+ end
58
+ end
@@ -1,3 +1,3 @@
1
- ['data', 'extensions', 'sequence'].each do |name|
1
+ ['pka_data', 'extensions', 'aa'].each do |name|
2
2
  require File.join(File.dirname(__FILE__), 'isoelectric_point', name)
3
3
  end
@@ -1,6 +1,6 @@
1
1
  require 'test_helper'
2
- include IsoelectricPoint
3
- class SequenceTest < Test::Unit::TestCase
2
+ include Bio::Sequence
3
+ class AATest < Test::Unit::TestCase
4
4
 
5
5
  context 'some known sequences' do
6
6
  setup do
@@ -23,10 +23,10 @@ class SequenceTest < Test::Unit::TestCase
23
23
  MMS' => 8.30908203125
24
24
  }
25
25
  end
26
- should 'calculate' do
26
+ should 'calculate iep' do
27
27
  places = 2
28
28
  @known.each do |sequence, expected|
29
- actual = Sequence.new(sequence).calculate_iep(places)
29
+ actual = AA.new(sequence).calculate_iep('dtaselect', places)
30
30
  assert_equal expected.round_to_places(places), actual, "Expected the iep to be #{expected} but was #{actual} for #{sequence}"
31
31
  end
32
32
  end
@@ -34,33 +34,65 @@ class SequenceTest < Test::Unit::TestCase
34
34
 
35
35
  should "Raise if not sequence given" do
36
36
  assert_raise ArgumentError do
37
- Sequence.new(nil)
37
+ AA.new(nil)
38
38
  end
39
39
  end
40
40
 
41
41
  should "Raise if empty sequence given" do
42
42
  assert_raise ArgumentError do
43
- Sequence.new(' ')
43
+ AA.new(' ')
44
44
  end
45
45
  end
46
46
 
47
47
 
48
48
  should "Raise if unknown pks used" do
49
49
  assert_raise ArgumentError do
50
- Sequence.new('PG', 'youdontknowme')
50
+ AA.new('PG', 'youdontknowme')
51
51
  end
52
52
  end
53
53
 
54
54
  context "a Sequence" do
55
55
  setup do
56
- @sequence = Sequence.new("PGAKAAAKKPKKAAG")
56
+ @sequence = AA.new("PGAKAAAKKPKKAAG")
57
57
  end
58
58
 
59
59
  should "calculates the isolectric point to 0 places" do
60
- assert_equal 11, @sequence.calculate_iep(0)
60
+ assert_equal 11, @sequence.calculate_iep('dtaselect', 0)
61
61
  end
62
62
  should "calculates the isolectric pointto 3 places" do
63
- assert_equal 10.603, @sequence.calculate_iep(3)
63
+ assert_equal 10.603, @sequence.calculate_iep('dtaselect', 3)
64
+ end
65
+
66
+ context 'use a custom pka set' do
67
+ setup do
68
+ @custom = { "N_TERMINUS" => 8.0,
69
+ "K" => 9.5, # changed from dta_select where it is 10.0
70
+ "R" => 12.0,
71
+ "H" => 6.5,
72
+ "C_TERMINUS" => 3.1,
73
+ "D" => 4.4,
74
+ "E" => 4.4,
75
+ "C" => 8.5,
76
+ "Y" => 10.1
77
+ }
78
+ end
79
+ should 'accept a custom pka set and use it for calculation' do
80
+ assert_equal 10.106, @sequence.calculate_iep(@custom, 3)
81
+ end
82
+
83
+ should 'raise when no result can be found due to a invalid set' do
84
+ @custom['K'] = 20
85
+ assert_raises RuntimeError do
86
+ @sequence.calculate_iep(@custom, 3)
87
+ end
88
+ end
89
+ end
90
+ context 'use another pka set' do
91
+ should 'work with all provided sets without raising' do
92
+ Bio::Sequence::PkaData::PKAS.keys.each do |key|
93
+ @sequence.calculate_iep(key, 3, 25)
94
+ end
95
+ end
64
96
  end
65
97
  end
66
98
  end
metadata CHANGED
@@ -1,22 +1,22 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isoelectric_point
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 3
10
- version: 0.0.3
9
+ - 4
10
+ version: 0.0.4
11
11
  platform: ruby
12
12
  authors:
13
- - GeorgeR
13
+ - GeorgeG
14
14
  - pascalbetz
15
15
  autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2010-10-05 00:00:00 +02:00
19
+ date: 2010-10-07 00:00:00 +02:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency
@@ -60,14 +60,14 @@ files:
60
60
  - LICENSE
61
61
  - README.rdoc
62
62
  - lib/isoelectric_point.rb
63
- - lib/isoelectric_point/data.rb
63
+ - lib/isoelectric_point/pka_data.rb
64
64
  - lib/isoelectric_point/extensions.rb
65
- - lib/isoelectric_point/sequence.rb
66
- - test/sequence_test.rb
65
+ - lib/isoelectric_point/aa.rb
66
+ - test/aa_test.rb
67
67
  - test/extensions_test.rb
68
68
  - test/test_helper.rb
69
69
  has_rdoc: true
70
- homepage: http://github.com/simplificator/isoelectric_point
70
+ homepage: http://github.com/GeorgeG/isoelectric_point_4_R
71
71
  licenses: []
72
72
 
73
73
  post_install_message:
@@ -99,8 +99,8 @@ rubyforge_project:
99
99
  rubygems_version: 1.3.7
100
100
  signing_key:
101
101
  specification_version: 3
102
- summary: Calculate isoelectric point. Based on code frmo GeorgeR. We just took it and made a gem of it.
102
+ summary: Calculate isoelectric point.
103
103
  test_files:
104
- - test/sequence_test.rb
104
+ - test/aa_test.rb
105
105
  - test/extensions_test.rb
106
106
  - test/test_helper.rb
@@ -1,56 +0,0 @@
1
- module IsoelectricPoint
2
- module Data
3
- PKAS = {
4
- 'dtaselect' => { "N_TERMINUS" => 8.0,
5
- "K" => 10.0,
6
- "R" => 12.0,
7
- "H" => 6.5,
8
- "C_TERMINUS" => 3.1,
9
- "D" => 4.4,
10
- "E" => 4.4,
11
- "C" => 8.5,
12
- "Y" => 10.0
13
- }.freeze,
14
- 'emboss' => { "N_TERMINUS" => 8.0,
15
- "K" => 10.0,
16
- "R" => 12.0,
17
- "H" => 6.5,
18
- "C_TERMINUS" => 3.1,
19
- "D" => 4.4,
20
- "E" => 4.4,
21
- "C" => 8.5,
22
- "Y" => 10.0
23
- }.freeze,
24
- 'rodwell' => { "N_TERMINUS" => 8.0,
25
- "K" => 11.5,
26
- "R" => 11.5,
27
- "H" => 6.0,
28
- "C_TERMINUS" => 3.1,
29
- "D" => 3.68,
30
- "E" => 4.25,
31
- "C" => 8.33,
32
- "Y" => 10.07
33
- }.freeze,
34
- 'wikipedia' => { "N_TERMINUS" => 8.2,
35
- "K" => 10.54,
36
- "R" => 12.48,
37
- "H" => 6.04,
38
- "C_TERMINUS" => 3.65,
39
- "D" => 3.9,
40
- "E" => 4.07,
41
- "C" => 8.18,
42
- "Y" => 10.47
43
- }.freeze,
44
- 'silerio' => { "N_TERMINUS" => 8.2,
45
- "K" => 10.4,
46
- "R" => 12.0,
47
- "H" => 6.4,
48
- "C_TERMINUS" => 3.2,
49
- "D" => 4.0,
50
- "E" => 4.5,
51
- "C" => 9.0,
52
- "Y" => 10.0
53
- }.freeze
54
- }.freeze
55
- end
56
- end
@@ -1,67 +0,0 @@
1
- #calculates the isoelectric point of a given protein sequence
2
- module IsoelectricPoint
3
- class Sequence
4
- CHARGED_GROUPS = %w{K R H D E C Y}
5
- KEYS_PLUS = ['K', 'R', 'H']
6
- KEYS_MINUS = ['D', 'E', 'C', 'Y']
7
-
8
- attr_accessor :value
9
- attr_reader :pks
10
-
11
- def initialize(sequence, pka_set_name = 'dtaselect')
12
- raise ArgumentError.new("pka_set_name is required") if pka_set_name.nil? || pka_set_name.strip == ''
13
- raise ArgumentError.new("sequence is required") if sequence.nil? || sequence.strip == ''
14
- @pks = Data::PKAS[pka_set_name]
15
- @value = sequence.upcase.gsub(/\s/, '')
16
- raise ArgumentError.new("pka_set '#{pka_set_name}' is unknown. Please specify one of #{Data::PKAS.keys.join(', ')}") unless self.pks
17
- end
18
-
19
- def calculate_iep(places = 2)
20
- precission = 15
21
- ph = 7.5
22
- step = 3.5
23
- target_charge = 0.0
24
- begin
25
- current_charge = calculate_charge_at(ph)
26
- if current_charge > 0
27
- ph += step
28
- else
29
- ph -= step
30
- end
31
- step /= 2.0
32
- #puts "#{self.value}: %.10f / %.10f / %.10f : direction was #{current_charge > 0 ? '+' : '-'}" % [current_charge, step, ph]
33
- #sleep 0.1
34
- end while current_charge == nil || target_charge.round_to_places(precission) != current_charge.round_to_places(precission)
35
- ph.round_to_places(places)
36
- end
37
-
38
-
39
- def calculate_charge_at(ph)
40
- KEYS_PLUS.inject(partial_charge(pks['N_TERMINUS'], ph)) do |memo, item|
41
- memo += partial_charge(self.pks[item], ph) * charged_residue_frequencies[item]
42
- end -
43
- KEYS_MINUS.inject(partial_charge(ph, pks['C_TERMINUS'])) do |memo, item|
44
- memo += partial_charge(ph, self.pks[item]) * charged_residue_frequencies[item]
45
- end
46
- end
47
-
48
- private
49
-
50
- def charged_residue_frequencies
51
- @charged_residue_frequency ||= calculate_charged_residue_frequencies
52
- end
53
-
54
- def partial_charge(a, b)
55
- x = 10 ** (a - b)
56
- x / (x + 1).to_f
57
- end
58
-
59
- def calculate_charged_residue_frequencies
60
- CHARGED_GROUPS.inject(Hash.new(0)) do |memo, item|
61
- memo[item] = self.value.count(item)
62
- memo
63
- end
64
- end
65
-
66
- end
67
- end