isoelectric_point 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -1,26 +1,39 @@
1
- == isoelectric_point
2
- A ruby class for calculating the isoelectric point of a protein. It's based on bioperl and the library from GeorgeG.
1
+ = isoelectric_point
2
+ A ruby class for calculating the isoelectric point of a protein. It's based on bioperl.
3
+ It's currently beeing refactored to fit into bioruby.
3
4
 
4
- = PKA Sets
5
+ == PKA Sets
5
6
  The available pka sets are:
6
- * dta_select_pkas
7
- * emboss_pkas
8
- * rodwell_pkas
9
- * wikipedia_pkas
10
- * sillero_pkas
7
+ * dta_select
8
+ * emboss
9
+ * rodwell
10
+ * wikipedia
11
+ * sillero
11
12
 
12
- = Installation
13
+ == Installation
13
14
  gem install isoelectric_point
14
15
 
15
- = Usage
16
+ == Usage
16
17
  require 'isoelectric_point'
17
18
  # Either prefix or include namespace
18
- include IsoelectricPoint
19
- #initialize a pka clculator object with dtaselect pkas and the KKGFTCGELA as an amino acid sequence
20
- sequence = Sequence.new("KKGFTCGELA")
19
+ include Bio::Sequence
20
+ aa = AA.new("KKGFTCGELA")
21
21
 
22
22
  #what is the protein charge at ph 14?
23
- charge = sequence.calculate_charge_at(14)
23
+ charge = aa.calculate_charge_at(14)
24
24
 
25
25
  #calculate the ph and round off to 7 decimal places
26
- ph = sequence.calculate_iep(7)
26
+ ph = aa.calculate_iep(7)
27
+
28
+ # calculate the ph with a custom set
29
+ custom = { "N_TERMINUS" => 8.1,
30
+ "K" => 10.1,
31
+ "R" => 12.1,
32
+ "H" => 6.4,
33
+ "C_TERMINUS" => 3.15,
34
+ "D" => 4.34,
35
+ "E" => 4.33,
36
+ "C" => 8.33,
37
+ "Y" => 9.5
38
+ }
39
+ ph = aa.calculate_iep(7, custom)
@@ -0,0 +1,84 @@
1
+ module Bio
2
+ module Sequence
3
+ class AA < String
4
+ CHARGED_GROUPS = ['K', 'R', 'H', 'D', 'E', 'C', 'Y']
5
+
6
+ def initialize(sequence)
7
+ raise ArgumentError.new("sequence is required") if sequence.nil? || sequence.strip == ''
8
+ super(sequence.upcase.gsub(/\s/, ''))
9
+ end
10
+
11
+ # Calculate the Isoelectric Point
12
+ # pka_name_or_set: the name of a PKA set or a custom PKA set
13
+ # places: specify the number of decimal places the value should be rounded to.
14
+ # loop_limit: how man iterations should be made to find the point. You should not need to tweak this.
15
+ def calculate_iep(pka_name_or_set = 'dtaselect', places = 2, loop_limit = 100)
16
+ loops = 0
17
+ ph = 7.5
18
+ step = 3.5
19
+ begin
20
+ current_charge = calculate_charge_at(ph, pka_name_or_set)
21
+ if current_charge > 0
22
+ ph += step
23
+ else
24
+ ph -= step
25
+ end
26
+ step /= 2.0
27
+ loops += 1
28
+ raise "Could not find a result within #{loop_limit} loops using #{pka_name_or_set.inspect}" if loops == loop_limit
29
+ end while not iep_reached?(current_charge)
30
+ ph.round_to_places(places)
31
+ end
32
+
33
+ # Calculate the charge of the sequence at a given ph
34
+ # As a second argument you can pass the name of the PKA set or a custom PKA set
35
+ def calculate_charge_at(ph, pka_name_or_set = 'dtaselect')
36
+ ['K', 'R', 'H'].inject(partial_charge(select_pka(pka_name_or_set)['N_TERMINUS'], ph)) do |memo, item|
37
+ memo += partial_charge(select_pka(pka_name_or_set)[item], ph) * charged_residue_frequencies[item]
38
+ end -
39
+ ['D', 'E', 'C', 'Y'].inject(partial_charge(ph, select_pka(pka_name_or_set)['C_TERMINUS'])) do |memo, item|
40
+ memo += partial_charge(ph, select_pka(pka_name_or_set)[item]) * charged_residue_frequencies[item]
41
+ end
42
+ end
43
+
44
+ private
45
+ def iep_reached?(current_charge)
46
+ current_charge =! nil && 0.0.round_to_places(5) == current_charge.round_to_places(5)
47
+ end
48
+
49
+ def charged_residue_frequencies
50
+ @charged_residue_frequency ||= calculate_charged_residue_frequencies
51
+ end
52
+
53
+ def partial_charge(a, b)
54
+ x = 10 ** (a - b)
55
+ x / (x + 1).to_f
56
+ end
57
+
58
+ # Count the occurences of the charged groups in the AA.
59
+ # Returns a Hash where the key is the group and the value is the number of
60
+ # occurences in self.
61
+ def calculate_charged_residue_frequencies
62
+ CHARGED_GROUPS.inject(Hash.new(0)) do |memo, item|
63
+ memo[item] = self.count(item)
64
+ memo
65
+ end
66
+ end
67
+
68
+ #
69
+ # Select a PKA set according to the name or supply a custom set.
70
+ # Raises ArgumentError if the name can not be mapped to a PKA set.
71
+ # If the argument is a String it is used as a key to lookup the set,
72
+ # if it's a Hash then it's assumed a custom set has been supplied.
73
+ def select_pka(pka_name_or_set = 'dtaselect')
74
+ if pka_name_or_set.is_a?(Hash)
75
+ pka_name_or_set
76
+ else
77
+ set = PkaData::PKAS[pka_name_or_set]
78
+ raise ArgumentError.new("Set '#{pka_name_or_set}' is unknown. Please specify one of #{PkaData::PKAS.keys.join(', ')} or pass a custom set") unless set
79
+ set
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,58 @@
1
+ module Bio
2
+ module Sequence
3
+ module PkaData
4
+ PKAS = {
5
+ 'dtaselect' => { 'N_TERMINUS' => 8.0,
6
+ 'K' => 10.0,
7
+ 'R' => 12.0,
8
+ 'H' => 6.5,
9
+ 'C_TERMINUS' => 3.1,
10
+ 'D' => 4.4,
11
+ 'E' => 4.4,
12
+ 'C' => 8.5,
13
+ 'Y' => 10.0
14
+ }.freeze,
15
+ 'emboss' => { 'N_TERMINUS' => 8.0,
16
+ 'K' => 10.0,
17
+ 'R' => 12.0,
18
+ 'H' => 6.5,
19
+ 'C_TERMINUS' => 3.1,
20
+ 'D' => 4.4,
21
+ 'E' => 4.4,
22
+ 'C' => 8.5,
23
+ 'Y' => 10.0
24
+ }.freeze,
25
+ 'rodwell' => { 'N_TERMINUS' => 8.0,
26
+ 'K' => 11.5,
27
+ 'R' => 11.5,
28
+ 'H' => 6.0,
29
+ 'C_TERMINUS' => 3.1,
30
+ 'D' => 3.68,
31
+ 'E' => 4.25,
32
+ 'C' => 8.33,
33
+ 'Y' => 10.07
34
+ }.freeze,
35
+ 'wikipedia' => { 'N_TERMINUS' => 8.2,
36
+ 'K' => 10.54,
37
+ 'R' => 12.48,
38
+ 'H' => 6.04,
39
+ 'C_TERMINUS' => 3.65,
40
+ 'D' => 3.9,
41
+ 'E' => 4.07,
42
+ 'C' => 8.18,
43
+ 'Y' => 10.47
44
+ }.freeze,
45
+ 'silerio' => { 'N_TERMINUS' => 8.2,
46
+ 'K' => 10.4,
47
+ 'R' => 12.0,
48
+ 'H' => 6.4,
49
+ 'C_TERMINUS' => 3.2,
50
+ 'D' => 4.0,
51
+ 'E' => 4.5,
52
+ 'C' => 9.0,
53
+ 'Y' => 10.0
54
+ }.freeze
55
+ }.freeze
56
+ end
57
+ end
58
+ end
@@ -1,3 +1,3 @@
1
- ['data', 'extensions', 'sequence'].each do |name|
1
+ ['pka_data', 'extensions', 'aa'].each do |name|
2
2
  require File.join(File.dirname(__FILE__), 'isoelectric_point', name)
3
3
  end
@@ -1,6 +1,6 @@
1
1
  require 'test_helper'
2
- include IsoelectricPoint
3
- class SequenceTest < Test::Unit::TestCase
2
+ include Bio::Sequence
3
+ class AATest < Test::Unit::TestCase
4
4
 
5
5
  context 'some known sequences' do
6
6
  setup do
@@ -23,10 +23,10 @@ class SequenceTest < Test::Unit::TestCase
23
23
  MMS' => 8.30908203125
24
24
  }
25
25
  end
26
- should 'calculate' do
26
+ should 'calculate iep' do
27
27
  places = 2
28
28
  @known.each do |sequence, expected|
29
- actual = Sequence.new(sequence).calculate_iep(places)
29
+ actual = AA.new(sequence).calculate_iep('dtaselect', places)
30
30
  assert_equal expected.round_to_places(places), actual, "Expected the iep to be #{expected} but was #{actual} for #{sequence}"
31
31
  end
32
32
  end
@@ -34,33 +34,65 @@ class SequenceTest < Test::Unit::TestCase
34
34
 
35
35
  should "Raise if not sequence given" do
36
36
  assert_raise ArgumentError do
37
- Sequence.new(nil)
37
+ AA.new(nil)
38
38
  end
39
39
  end
40
40
 
41
41
  should "Raise if empty sequence given" do
42
42
  assert_raise ArgumentError do
43
- Sequence.new(' ')
43
+ AA.new(' ')
44
44
  end
45
45
  end
46
46
 
47
47
 
48
48
  should "Raise if unknown pks used" do
49
49
  assert_raise ArgumentError do
50
- Sequence.new('PG', 'youdontknowme')
50
+ AA.new('PG', 'youdontknowme')
51
51
  end
52
52
  end
53
53
 
54
54
  context "a Sequence" do
55
55
  setup do
56
- @sequence = Sequence.new("PGAKAAAKKPKKAAG")
56
+ @sequence = AA.new("PGAKAAAKKPKKAAG")
57
57
  end
58
58
 
59
59
  should "calculates the isolectric point to 0 places" do
60
- assert_equal 11, @sequence.calculate_iep(0)
60
+ assert_equal 11, @sequence.calculate_iep('dtaselect', 0)
61
61
  end
62
62
  should "calculates the isolectric pointto 3 places" do
63
- assert_equal 10.603, @sequence.calculate_iep(3)
63
+ assert_equal 10.603, @sequence.calculate_iep('dtaselect', 3)
64
+ end
65
+
66
+ context 'use a custom pka set' do
67
+ setup do
68
+ @custom = { "N_TERMINUS" => 8.0,
69
+ "K" => 9.5, # changed from dta_select where it is 10.0
70
+ "R" => 12.0,
71
+ "H" => 6.5,
72
+ "C_TERMINUS" => 3.1,
73
+ "D" => 4.4,
74
+ "E" => 4.4,
75
+ "C" => 8.5,
76
+ "Y" => 10.1
77
+ }
78
+ end
79
+ should 'accept a custom pka set and use it for calculation' do
80
+ assert_equal 10.106, @sequence.calculate_iep(@custom, 3)
81
+ end
82
+
83
+ should 'raise when no result can be found due to a invalid set' do
84
+ @custom['K'] = 20
85
+ assert_raises RuntimeError do
86
+ @sequence.calculate_iep(@custom, 3)
87
+ end
88
+ end
89
+ end
90
+ context 'use another pka set' do
91
+ should 'work with all provided sets without raising' do
92
+ Bio::Sequence::PkaData::PKAS.keys.each do |key|
93
+ @sequence.calculate_iep(key, 3, 25)
94
+ end
95
+ end
64
96
  end
65
97
  end
66
98
  end
metadata CHANGED
@@ -1,22 +1,22 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isoelectric_point
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 3
10
- version: 0.0.3
9
+ - 4
10
+ version: 0.0.4
11
11
  platform: ruby
12
12
  authors:
13
- - GeorgeR
13
+ - GeorgeG
14
14
  - pascalbetz
15
15
  autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2010-10-05 00:00:00 +02:00
19
+ date: 2010-10-07 00:00:00 +02:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency
@@ -60,14 +60,14 @@ files:
60
60
  - LICENSE
61
61
  - README.rdoc
62
62
  - lib/isoelectric_point.rb
63
- - lib/isoelectric_point/data.rb
63
+ - lib/isoelectric_point/pka_data.rb
64
64
  - lib/isoelectric_point/extensions.rb
65
- - lib/isoelectric_point/sequence.rb
66
- - test/sequence_test.rb
65
+ - lib/isoelectric_point/aa.rb
66
+ - test/aa_test.rb
67
67
  - test/extensions_test.rb
68
68
  - test/test_helper.rb
69
69
  has_rdoc: true
70
- homepage: http://github.com/simplificator/isoelectric_point
70
+ homepage: http://github.com/GeorgeG/isoelectric_point_4_R
71
71
  licenses: []
72
72
 
73
73
  post_install_message:
@@ -99,8 +99,8 @@ rubyforge_project:
99
99
  rubygems_version: 1.3.7
100
100
  signing_key:
101
101
  specification_version: 3
102
- summary: Calculate isoelectric point. Based on code frmo GeorgeR. We just took it and made a gem of it.
102
+ summary: Calculate isoelectric point.
103
103
  test_files:
104
- - test/sequence_test.rb
104
+ - test/aa_test.rb
105
105
  - test/extensions_test.rb
106
106
  - test/test_helper.rb
@@ -1,56 +0,0 @@
1
- module IsoelectricPoint
2
- module Data
3
- PKAS = {
4
- 'dtaselect' => { "N_TERMINUS" => 8.0,
5
- "K" => 10.0,
6
- "R" => 12.0,
7
- "H" => 6.5,
8
- "C_TERMINUS" => 3.1,
9
- "D" => 4.4,
10
- "E" => 4.4,
11
- "C" => 8.5,
12
- "Y" => 10.0
13
- }.freeze,
14
- 'emboss' => { "N_TERMINUS" => 8.0,
15
- "K" => 10.0,
16
- "R" => 12.0,
17
- "H" => 6.5,
18
- "C_TERMINUS" => 3.1,
19
- "D" => 4.4,
20
- "E" => 4.4,
21
- "C" => 8.5,
22
- "Y" => 10.0
23
- }.freeze,
24
- 'rodwell' => { "N_TERMINUS" => 8.0,
25
- "K" => 11.5,
26
- "R" => 11.5,
27
- "H" => 6.0,
28
- "C_TERMINUS" => 3.1,
29
- "D" => 3.68,
30
- "E" => 4.25,
31
- "C" => 8.33,
32
- "Y" => 10.07
33
- }.freeze,
34
- 'wikipedia' => { "N_TERMINUS" => 8.2,
35
- "K" => 10.54,
36
- "R" => 12.48,
37
- "H" => 6.04,
38
- "C_TERMINUS" => 3.65,
39
- "D" => 3.9,
40
- "E" => 4.07,
41
- "C" => 8.18,
42
- "Y" => 10.47
43
- }.freeze,
44
- 'silerio' => { "N_TERMINUS" => 8.2,
45
- "K" => 10.4,
46
- "R" => 12.0,
47
- "H" => 6.4,
48
- "C_TERMINUS" => 3.2,
49
- "D" => 4.0,
50
- "E" => 4.5,
51
- "C" => 9.0,
52
- "Y" => 10.0
53
- }.freeze
54
- }.freeze
55
- end
56
- end
@@ -1,67 +0,0 @@
1
- #calculates the isoelectric point of a given protein sequence
2
- module IsoelectricPoint
3
- class Sequence
4
- CHARGED_GROUPS = %w{K R H D E C Y}
5
- KEYS_PLUS = ['K', 'R', 'H']
6
- KEYS_MINUS = ['D', 'E', 'C', 'Y']
7
-
8
- attr_accessor :value
9
- attr_reader :pks
10
-
11
- def initialize(sequence, pka_set_name = 'dtaselect')
12
- raise ArgumentError.new("pka_set_name is required") if pka_set_name.nil? || pka_set_name.strip == ''
13
- raise ArgumentError.new("sequence is required") if sequence.nil? || sequence.strip == ''
14
- @pks = Data::PKAS[pka_set_name]
15
- @value = sequence.upcase.gsub(/\s/, '')
16
- raise ArgumentError.new("pka_set '#{pka_set_name}' is unknown. Please specify one of #{Data::PKAS.keys.join(', ')}") unless self.pks
17
- end
18
-
19
- def calculate_iep(places = 2)
20
- precission = 15
21
- ph = 7.5
22
- step = 3.5
23
- target_charge = 0.0
24
- begin
25
- current_charge = calculate_charge_at(ph)
26
- if current_charge > 0
27
- ph += step
28
- else
29
- ph -= step
30
- end
31
- step /= 2.0
32
- #puts "#{self.value}: %.10f / %.10f / %.10f : direction was #{current_charge > 0 ? '+' : '-'}" % [current_charge, step, ph]
33
- #sleep 0.1
34
- end while current_charge == nil || target_charge.round_to_places(precission) != current_charge.round_to_places(precission)
35
- ph.round_to_places(places)
36
- end
37
-
38
-
39
- def calculate_charge_at(ph)
40
- KEYS_PLUS.inject(partial_charge(pks['N_TERMINUS'], ph)) do |memo, item|
41
- memo += partial_charge(self.pks[item], ph) * charged_residue_frequencies[item]
42
- end -
43
- KEYS_MINUS.inject(partial_charge(ph, pks['C_TERMINUS'])) do |memo, item|
44
- memo += partial_charge(ph, self.pks[item]) * charged_residue_frequencies[item]
45
- end
46
- end
47
-
48
- private
49
-
50
- def charged_residue_frequencies
51
- @charged_residue_frequency ||= calculate_charged_residue_frequencies
52
- end
53
-
54
- def partial_charge(a, b)
55
- x = 10 ** (a - b)
56
- x / (x + 1).to_f
57
- end
58
-
59
- def calculate_charged_residue_frequencies
60
- CHARGED_GROUPS.inject(Hash.new(0)) do |memo, item|
61
- memo[item] = self.value.count(item)
62
- memo
63
- end
64
- end
65
-
66
- end
67
- end