molecules 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +21 -0
- data/README +73 -0
- data/Rakefile +78 -0
- data/lib/molecules.rb +4 -0
- data/lib/molecules/calc.rb +127 -0
- data/lib/molecules/empirical_formula.rb +325 -0
- data/lib/molecules/libraries/polypeptide.rb +91 -0
- data/lib/molecules/libraries/residue.rb +165 -0
- data/lib/molecules/utils.rb +49 -0
- data/tap.yml +0 -0
- data/test/molecules/calc_test.rb +37 -0
- data/test/molecules/empirical_formula_class_test.rb +196 -0
- data/test/molecules/empirical_formula_test.rb +204 -0
- data/test/molecules/libraries/polypeptide_test.rb +128 -0
- data/test/molecules/libraries/residue_test.rb +289 -0
- data/test/molecules/utils_test.rb +147 -0
- data/test/molecules_test.rb +24 -0
- data/test/molecules_test_helper.rb +31 -0
- data/test/molecules_test_suite.rb +3 -0
- data/test/tap_test_helper.rb +3 -0
- metadata +82 -0
@@ -0,0 +1,204 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '../molecules_test_helper.rb')
|
2
|
+
require 'molecules/empirical_formula'
|
3
|
+
|
4
|
+
class EmpiricalFormulaTest < Test::Unit::TestCase
|
5
|
+
include Molecules
|
6
|
+
|
7
|
+
#
|
8
|
+
# documentation test
|
9
|
+
#
|
10
|
+
|
11
|
+
def test_documentation
|
12
|
+
assert_equal "Hydrogen", EmpiricalFormula::ELEMENT_INDEX[0].name
|
13
|
+
assert_equal "Oxygen", EmpiricalFormula::ELEMENT_INDEX[1].name
|
14
|
+
|
15
|
+
water = EmpiricalFormula.new [2,1]
|
16
|
+
assert_equal 'H(2)O', water.to_s
|
17
|
+
assert_equal 18.0105646863, water.mass
|
18
|
+
|
19
|
+
alanine = EmpiricalFormula.new [5,1,3,1]
|
20
|
+
assert_equal [3,0,3,1], (alanine - water).formula
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
# initialize test
|
25
|
+
#
|
26
|
+
|
27
|
+
def test_initialize
|
28
|
+
e = EmpiricalFormula.new([])
|
29
|
+
assert_equal([], e.formula)
|
30
|
+
|
31
|
+
e = EmpiricalFormula.new([2,1])
|
32
|
+
assert_equal([2,1], e.formula)
|
33
|
+
|
34
|
+
e = EmpiricalFormula.new([2,-1])
|
35
|
+
assert_equal([2,-1], e.formula)
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_intialize_normalizes_formula_by_removing_trailing_zeros
|
39
|
+
zero = EmpiricalFormula.new([0,1,0])
|
40
|
+
assert_equal([0,1], zero.formula)
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_intialize_normalizes_formula_by_converting_nils_to_zero
|
44
|
+
zero = EmpiricalFormula.new([nil,1,nil,0])
|
45
|
+
assert_equal([0,1], zero.formula)
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_intialize_freezes_formula
|
49
|
+
formula = [1,2,3]
|
50
|
+
e = EmpiricalFormula.new(formula)
|
51
|
+
|
52
|
+
assert e.formula.frozen?
|
53
|
+
assert_equal formula.object_id, e.formula.object_id
|
54
|
+
end
|
55
|
+
|
56
|
+
#
|
57
|
+
# + test
|
58
|
+
#
|
59
|
+
|
60
|
+
def test_PLUS
|
61
|
+
c1 = EmpiricalFormula.new([1, 0, -1])
|
62
|
+
c2 = EmpiricalFormula.new([1, 1, 1])
|
63
|
+
|
64
|
+
c3 = c1 + c2
|
65
|
+
assert_equal([2,1], c3.formula)
|
66
|
+
end
|
67
|
+
|
68
|
+
#
|
69
|
+
# - test
|
70
|
+
#
|
71
|
+
|
72
|
+
def test_MINUS
|
73
|
+
c1 = EmpiricalFormula.new([1, 0, -1])
|
74
|
+
c2 = EmpiricalFormula.new([1, 1, 1])
|
75
|
+
|
76
|
+
c3 = c1 - c2
|
77
|
+
assert_equal([0, -1, -2], c3.formula)
|
78
|
+
end
|
79
|
+
|
80
|
+
#
|
81
|
+
# * test
|
82
|
+
#
|
83
|
+
|
84
|
+
def test_MULTIPLY
|
85
|
+
c1 = EmpiricalFormula.new([1, 0, -1])
|
86
|
+
|
87
|
+
c3 = c1 * 2 * 3
|
88
|
+
assert_equal([6, 0, -6], c3.formula)
|
89
|
+
end
|
90
|
+
|
91
|
+
#
|
92
|
+
# == test
|
93
|
+
#
|
94
|
+
|
95
|
+
def test_EQUAL
|
96
|
+
assert EmpiricalFormula.new([1]) == EmpiricalFormula.new([1])
|
97
|
+
assert EmpiricalFormula.new([1]) == EmpiricalFormula.new([1, 0])
|
98
|
+
end
|
99
|
+
|
100
|
+
#
|
101
|
+
# each test
|
102
|
+
#
|
103
|
+
|
104
|
+
def test_each_returns_elements_and_formula_for_non_zero_formula
|
105
|
+
formula = EmpiricalFormula.new([2,0,1])
|
106
|
+
composition = {}
|
107
|
+
formula.each {|element, factor| composition[element] = factor }
|
108
|
+
|
109
|
+
assert_equal({EmpiricalFormula::ELEMENT_INDEX[0] => 2, EmpiricalFormula::ELEMENT_INDEX[2] => 1}, composition)
|
110
|
+
end
|
111
|
+
|
112
|
+
#
|
113
|
+
# to_s test
|
114
|
+
#
|
115
|
+
|
116
|
+
def test_to_s
|
117
|
+
c1 = EmpiricalFormula.new([2,1])
|
118
|
+
assert_equal "H(2)O", c1.to_s
|
119
|
+
|
120
|
+
c1 = EmpiricalFormula.new([-2,-1])
|
121
|
+
assert_equal "H(-2)O(-1)", c1.to_s
|
122
|
+
end
|
123
|
+
|
124
|
+
def test_to_s_symbols_are_sorted_alphabetically
|
125
|
+
c = EmpiricalFormula.new([1, 1, 1])
|
126
|
+
assert_equal "CHO", c.to_s
|
127
|
+
end
|
128
|
+
|
129
|
+
#
|
130
|
+
# mass test
|
131
|
+
#
|
132
|
+
|
133
|
+
def test_mass_documentation
|
134
|
+
water = EmpiricalFormula.new [2,1]
|
135
|
+
|
136
|
+
assert_equal 18.0105646863, water.mass
|
137
|
+
assert_equal 18.0105646863, water.mass {|e| e.mass }
|
138
|
+
|
139
|
+
assert_equal 18.01528, water.mass {|e| e.std_atomic_weight.value }
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_mass_returns_monoisotopic_mass_if_no_block_is_given
|
143
|
+
water = EmpiricalFormula.new [2,1]
|
144
|
+
assert_equal 18.0105646863, water.mass
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_mass_calculates_mass_using_block_result
|
148
|
+
water = EmpiricalFormula.new [2,1]
|
149
|
+
assert_equal 18.01528, water.mass {|e| e.std_atomic_weight.value }
|
150
|
+
end
|
151
|
+
|
152
|
+
class AltMass
|
153
|
+
attr_reader :value
|
154
|
+
|
155
|
+
def initialize(value)
|
156
|
+
@value = value
|
157
|
+
end
|
158
|
+
|
159
|
+
def +(another)
|
160
|
+
another = another.value if another.kind_of?(AltMass)
|
161
|
+
AltMass.new @value + another
|
162
|
+
end
|
163
|
+
|
164
|
+
def *(another)
|
165
|
+
another = another.value if another.kind_of?(AltMass)
|
166
|
+
AltMass.new @value * another
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def test_mass_calculation_operates_on_block_result
|
171
|
+
water = EmpiricalFormula.new [2,1]
|
172
|
+
result = water.mass {|e| AltMass.new e.mass }
|
173
|
+
|
174
|
+
assert result.kind_of?(AltMass)
|
175
|
+
assert_equal 18.0105646863, result.value
|
176
|
+
end
|
177
|
+
|
178
|
+
#
|
179
|
+
# benchmark
|
180
|
+
#
|
181
|
+
|
182
|
+
def test_operation_speed
|
183
|
+
benchmark_test(20) do |x|
|
184
|
+
n = 10
|
185
|
+
a = EmpiricalFormula.new [1,2,3,4]
|
186
|
+
b = EmpiricalFormula.new [0,-1]
|
187
|
+
|
188
|
+
x.report("#{n}k +") { (n*1000).times { a + b } }
|
189
|
+
x.report("#{n}k -") { (n*1000).times { a - b } }
|
190
|
+
x.report("#{n}k *") { (n*1000).times { a * 3} }
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def test_mass_speed
|
195
|
+
benchmark_test(20) do |x|
|
196
|
+
n = 10
|
197
|
+
a = EmpiricalFormula.new [1,2,3,4]
|
198
|
+
b = EmpiricalFormula.new [0,-1]
|
199
|
+
|
200
|
+
x.report("#{n}k [1,2,3,4] mass") { (n*1000).times { a.mass {|e| e.mass } } }
|
201
|
+
x.report("#{n}k [0,-1] mass") { (n*1000).times { b.mass {|e| e.mass } } }
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '../../molecules_test_helper.rb')
|
2
|
+
require 'molecules/libraries/polypeptide'
|
3
|
+
|
4
|
+
class PolypeptideTest < Test::Unit::TestCase
|
5
|
+
include Molecules::Libraries
|
6
|
+
|
7
|
+
#
|
8
|
+
# normalize test
|
9
|
+
#
|
10
|
+
|
11
|
+
def test_normalize_removes_whitespace_and_upcases_sequence
|
12
|
+
assert_equal "ABC", Polypeptide.normalize("Ab\n\rC\t\s")
|
13
|
+
end
|
14
|
+
|
15
|
+
#
|
16
|
+
# initialize test
|
17
|
+
#
|
18
|
+
|
19
|
+
def test_initialize
|
20
|
+
p = Polypeptide.new("")
|
21
|
+
assert_equal "", p.sequence
|
22
|
+
assert_equal([],p.formula)
|
23
|
+
assert_equal({},p.residue_composition)
|
24
|
+
|
25
|
+
bradykinin = Polypeptide.new("RPPGFSPFR")
|
26
|
+
assert_equal "RPPGFSPFR", bradykinin.sequence
|
27
|
+
assert_equal [71, 10, 50, 15], bradykinin.formula
|
28
|
+
assert_equal({
|
29
|
+
Residue::R => 2,
|
30
|
+
Residue::P => 3,
|
31
|
+
Residue::G => 1,
|
32
|
+
Residue::F => 2,
|
33
|
+
Residue::S => 1},
|
34
|
+
bradykinin.residue_composition)
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_spaces_are_allowed_in_initialize
|
38
|
+
p = Polypeptide.new("\s\t\r\n")
|
39
|
+
assert_equal "\s\t\r\n", p.sequence
|
40
|
+
assert_equal([],p.formula)
|
41
|
+
assert_equal({},p.residue_composition)
|
42
|
+
|
43
|
+
bradykinin = Polypeptide.new(" R PP\t\nGFSP\s FR\r")
|
44
|
+
assert_equal " R PP\t\nGFSP\s FR\r", bradykinin.sequence
|
45
|
+
assert_equal [71, 10, 50, 15], bradykinin.formula
|
46
|
+
assert_equal({
|
47
|
+
Residue::R => 2,
|
48
|
+
Residue::P => 3,
|
49
|
+
Residue::G => 1,
|
50
|
+
Residue::F => 2,
|
51
|
+
Residue::S => 1},
|
52
|
+
bradykinin.residue_composition)
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_initialize_raises_error_for_unknown_residues
|
56
|
+
assert_nil Residue['Z']
|
57
|
+
assert_raise(Polypeptide::UnknownResidueError) { Polypeptide.new("Z") }
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_initialize_is_case_sensitive_for_residues
|
61
|
+
assert_not_nil Residue['A']
|
62
|
+
assert_raise(Polypeptide::UnknownResidueError) { Polypeptide.new("a") }
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# each_residue test
|
67
|
+
#
|
68
|
+
|
69
|
+
def test_each_residue_returns_each_residue_sequentially
|
70
|
+
residues = []
|
71
|
+
p = Polypeptide.new("\sRP PG\t F")
|
72
|
+
p.each_residue {|r| residues << r}
|
73
|
+
|
74
|
+
assert_equal [Residue::R, Residue::P, Residue::P, Residue::G, Residue::F], residues
|
75
|
+
end
|
76
|
+
|
77
|
+
#
|
78
|
+
# benchmark
|
79
|
+
#
|
80
|
+
|
81
|
+
def test_initialize_speed
|
82
|
+
benchmark_test(20) do |x|
|
83
|
+
n = 10
|
84
|
+
|
85
|
+
x.report("#{n}k RPPGFSPFR") do
|
86
|
+
(n*1000).times { Polypeptide.new("RPPGFSPFR") }
|
87
|
+
end
|
88
|
+
x.report("#{n}k RPPGFSPFR * 10") do
|
89
|
+
(n*1000).times { Polypeptide.new("RPPGFSPFR" * 10) }
|
90
|
+
end
|
91
|
+
x.report("#{n*10} RPPGFSPFR * 1000") do
|
92
|
+
(n*10).times { Polypeptide.new("RPPGFSPFR" * 1000) }
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def test_each_residue_speed
|
98
|
+
benchmark_test(20) do |x|
|
99
|
+
p = Polypeptide.new("RPPGFSPFR" * 10)
|
100
|
+
|
101
|
+
x.report("1k RPPGFSPFR * 10") do
|
102
|
+
1000.times { p.each_residue {|r| r} }
|
103
|
+
end
|
104
|
+
x.report("1k each_byte:") do
|
105
|
+
1000.times { p.sequence.each_byte {|b| b} }
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
#def test_counting_vs_each_byte
|
111
|
+
# benchmark_test(20) do |x|
|
112
|
+
# sequence = "RPPGFSPFR" * 1000
|
113
|
+
#
|
114
|
+
# x.report("1k count") do
|
115
|
+
# 1000.times do
|
116
|
+
# Utils.count(sequence, Polypeptide::SEQUENCE_TOKENS)
|
117
|
+
# end
|
118
|
+
# end
|
119
|
+
#
|
120
|
+
# x.report("1k each_byte") do
|
121
|
+
# 1000.times do
|
122
|
+
# array = Array.new(100, 0)
|
123
|
+
# sequence.each_byte {|b| array[b] += 1}
|
124
|
+
# end
|
125
|
+
# end
|
126
|
+
# end
|
127
|
+
#end
|
128
|
+
end
|
@@ -0,0 +1,289 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '../../molecules_test_helper.rb')
|
2
|
+
require 'molecules/libraries/residue'
|
3
|
+
|
4
|
+
class ResidueTest < Test::Unit::TestCase
|
5
|
+
include Molecules::Libraries
|
6
|
+
|
7
|
+
#
|
8
|
+
# documentation test
|
9
|
+
#
|
10
|
+
|
11
|
+
def test_documentation
|
12
|
+
r = Residue::A
|
13
|
+
assert_equal "Alanine", r.name
|
14
|
+
assert_equal "Ala", r.abbr
|
15
|
+
assert_equal "A", r.letter
|
16
|
+
assert_equal "CH(3)", r.side_chain.to_s
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_common_returns_array_of_common_residues
|
20
|
+
assert_equal 20, Residue.common.length
|
21
|
+
assert_equal ['A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'].sort, Residue.common.collect {|e| e.letter}.sort
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_class_lookup
|
25
|
+
a = Residue::A
|
26
|
+
|
27
|
+
assert_equal a, Residue['A']
|
28
|
+
assert_equal a, Residue['Ala']
|
29
|
+
assert_equal a, Residue['Alanine']
|
30
|
+
|
31
|
+
assert_nil Residue['X']
|
32
|
+
assert_nil Residue['BACKBONE']
|
33
|
+
assert_raise(NoMethodError) { Residue.backbone }
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_residue_mass_equals_mass_with_parameters
|
37
|
+
ala = Residue::A
|
38
|
+
assert_equal ala.mass, ala.residue_mass
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_mass_values
|
42
|
+
{
|
43
|
+
'A' => 71.03711,
|
44
|
+
'R' => 156.10111,
|
45
|
+
'N' => 114.04293,
|
46
|
+
'D' => 115.02694,
|
47
|
+
'C' => 103.00919,
|
48
|
+
'E' => 129.04259,
|
49
|
+
'Q' => 128.05858,
|
50
|
+
'G' => 57.02146,
|
51
|
+
'H' => 137.05891,
|
52
|
+
'I' => 113.08406,
|
53
|
+
'L' => 113.08406,
|
54
|
+
'K' => 128.09496,
|
55
|
+
'M' => 131.04049,
|
56
|
+
'O' => 211.14465,
|
57
|
+
'F' => 147.06841,
|
58
|
+
'P' => 97.05276,
|
59
|
+
'S' => 87.03203,
|
60
|
+
'T' => 101.04768,
|
61
|
+
'U' => 150.95363,
|
62
|
+
'W' => 186.07931,
|
63
|
+
'Y' => 163.06333,
|
64
|
+
'V' => 99.06841
|
65
|
+
}.each_pair do |residue, expected|
|
66
|
+
assert_in_delta expected, Residue[residue].mass, delta_mass
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_immonium_ion_mass
|
71
|
+
{
|
72
|
+
'A' => 44.05002,
|
73
|
+
'R' => 129.11402,
|
74
|
+
'N' => 87.05584,
|
75
|
+
'D' => 88.03985,
|
76
|
+
'C' => 76.02210,
|
77
|
+
'E' => 102.05550,
|
78
|
+
'Q' => 101.07149,
|
79
|
+
'G' => 30.03437,
|
80
|
+
'H' => 110.07182,
|
81
|
+
'I' => 86.09697,
|
82
|
+
'L' => 86.09697,
|
83
|
+
'K' => 101.10787,
|
84
|
+
'M' => 104.05340,
|
85
|
+
'O' => 184.15756,
|
86
|
+
'F' => 120.08132,
|
87
|
+
'P' => 70.06567,
|
88
|
+
'S' => 60.04494,
|
89
|
+
'T' => 74.06059,
|
90
|
+
'U' => 123.96654,
|
91
|
+
'W' => 159.09222,
|
92
|
+
'Y' => 136.07624,
|
93
|
+
'V' => 72.08132
|
94
|
+
}.each_pair do |residue, expected|
|
95
|
+
assert_in_delta expected, Residue[residue].immonium_ion_mass, delta_mass
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# vs the Proteome Commons Residue Reference, 2008-01-11
|
100
|
+
# http://www.proteomecommons.org/archive/1129086318745/docs/residue-reference.html
|
101
|
+
def test_mass_values_vs_proteome_commons
|
102
|
+
str = %Q{
|
103
|
+
Alanine A 71.0371137878
|
104
|
+
Arginine R 156.1011110281
|
105
|
+
Asparagine N 114.0429274472
|
106
|
+
Aspartic Acid D 115.02694303199999
|
107
|
+
Cysteine C 103.00918447779999
|
108
|
+
Glutamic Acid E 129.0425930962
|
109
|
+
Glutamine Q 128.05857751140002
|
110
|
+
Glycine G 57.0214637236
|
111
|
+
Histidine H 137.0589118624
|
112
|
+
Isoleucine I 113.0840639804
|
113
|
+
Leucine L 113.0840639804
|
114
|
+
Lysine K 128.0949630177
|
115
|
+
Methionine M 131.0404846062
|
116
|
+
Phenylalanine F 147.0684139162
|
117
|
+
Proline P 97.052763852
|
118
|
+
Serine S 87.0320284099
|
119
|
+
Threonine T 101.0476784741
|
120
|
+
Tryptophan W 186.0793129535
|
121
|
+
Tyrosine Y 163.0633285383
|
122
|
+
Valine V 99.0684139162}
|
123
|
+
|
124
|
+
residues = str.split(/\n/)
|
125
|
+
residues.each do |residue_str|
|
126
|
+
next if residue_str.empty?
|
127
|
+
|
128
|
+
residue_str =~ /(.*)\s(\w)\s(\d+\.\d+)/
|
129
|
+
name = $1.strip
|
130
|
+
letter = $2
|
131
|
+
mass = $3.to_f
|
132
|
+
|
133
|
+
residue = Residue[letter]
|
134
|
+
assert_not_nil residue, residue_str
|
135
|
+
assert_equal name, residue.name
|
136
|
+
assert_in_delta mass, residue.mass, delta_mass, residue_str
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# vs the Mascot Amino Acid Reference Data, 2008-01-11
|
141
|
+
# http://hsc-mascot.uchsc.edu/mascot/help/aa_help.html
|
142
|
+
#
|
143
|
+
# minor formatting was done on this table to make it nice for the test;
|
144
|
+
# the formatting consisted of condensing residue names and formula
|
145
|
+
# to the same line and moving composites to a separate string. ex:
|
146
|
+
# Alanine
|
147
|
+
# C3H5NO Ala A 71.03712 71.08 Ala
|
148
|
+
# became
|
149
|
+
# Alanine C3H5NO Ala A 71.03712 71.08 Ala
|
150
|
+
#
|
151
|
+
# Note there are minor capitalization differences in the names and
|
152
|
+
# abbreviations relative to those in Residue
|
153
|
+
def test_mass_values_vs_mascot
|
154
|
+
str = %Q{
|
155
|
+
Alanine C3H5NO Ala A 71.03712 71.08 Ala
|
156
|
+
Arginine C6H12N4O Arg R 156.10112 156.19 Arg
|
157
|
+
Asparagine C4H6N2O2 Asn N 114.04293 114.10 Asn
|
158
|
+
Aspartic acid C4H5NO3 Asp D 115.02695 115.09 Asp
|
159
|
+
Cysteine C3H5NOS Cys C 103.00919 103.14 Cys
|
160
|
+
Glutamic acid C5H7NO3 Glu E 129.04260 129.12 Glu
|
161
|
+
Glutamine C5H8N2O2 Gln Q 128.05858 128.13 Gln
|
162
|
+
Glycine C2H3NO Gly G 57.02147 57.05 Gly
|
163
|
+
Histidine C6H7N3O His H 137.05891 137.14 His
|
164
|
+
Isoleucine C6H11NO Ile I 113.08407 113.16 Ile
|
165
|
+
Leucine C6H11NO Leu L 113.08407 113.16 Leu
|
166
|
+
Lysine C6H12N2O Lys K 128.09497 128.17 Lys
|
167
|
+
Methionine C5H9NOS Met M 131.04049 131.19 Met
|
168
|
+
Phenylalanine C9H9NO Phe F 147.06842 147.18 Phe
|
169
|
+
Proline C5H7NO Pro P 97.05277 97.12 Pro
|
170
|
+
Serine C3H5NO2 Ser S 87.03203 87.08 Ser
|
171
|
+
Threonine C4H7NO2 Thr T 101.04768 101.10 Thr
|
172
|
+
Selenocysteine C3H5NOSe SeC U 150.95364 150.03 SeC
|
173
|
+
Tryptophan C11H10N2O Trp W 186.07932 186.21 Trp
|
174
|
+
Tyrosine C9H9NO2 Tyr Y 163.06333 163.18 Tyr
|
175
|
+
Valine C5H9NO Val V 99.06842 99.13 Val}
|
176
|
+
|
177
|
+
composites = %Q{
|
178
|
+
Asn or Asp Asx B
|
179
|
+
Glu or Gln Glx Z
|
180
|
+
Unknown Xaa X}
|
181
|
+
|
182
|
+
residues = str.split(/\n/)
|
183
|
+
residues.each do |residue_str|
|
184
|
+
next if residue_str.empty?
|
185
|
+
|
186
|
+
residue_str =~ /(.*)\s+([\w\d]+)\s+(\w\w\w)\s+(\w)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+\w\w\w/
|
187
|
+
name = $1
|
188
|
+
formula = $2
|
189
|
+
abbr = $3
|
190
|
+
letter = $4
|
191
|
+
monoisotopic = $5.to_f
|
192
|
+
average = $6.to_f
|
193
|
+
|
194
|
+
residue = Residue[letter]
|
195
|
+
assert_not_nil residue, residue_str
|
196
|
+
assert_equal name.upcase, residue.name.upcase, residue_str
|
197
|
+
assert_equal abbr.upcase, residue.abbr.upcase, residue_str
|
198
|
+
assert_equal formula, residue.to_s.gsub(/\(|\)/, ""), residue_str
|
199
|
+
|
200
|
+
assert_in_delta monoisotopic, residue.mass, delta_mass, residue_str
|
201
|
+
# TODO -- check average mass
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# vs the VG Analytical Organic Mass Spectrometry reference, reference date unknown (prior to 2005)
|
206
|
+
# the data from the data sheet was copied manually to doc/VG Analytical DataSheet.txt
|
207
|
+
def test_mass_values_vs_vg_analytical
|
208
|
+
common = %Q{
|
209
|
+
Ala A Alanine C3H5NO 71.03711 71.0788
|
210
|
+
Arg R Arginine C6H12N4O 156.10111 156.1875
|
211
|
+
Asn N Asparagine C4H6N2O2 114.04293 114.1038
|
212
|
+
Asp D Aspartic Acid C4H5NO3 115.02694 115.0886
|
213
|
+
Cys C Cysteine C3H5NOS 103.00919 103.1388
|
214
|
+
Glu E Glutamic Acid C5H7NO3 129.04259 129.115
|
215
|
+
Gln Q Glutamine C5H8N2O2 128.05858 128.1307
|
216
|
+
Gly G Glycine C2H3NO 57.02146 57.0519
|
217
|
+
His H Histidine C6H7N3O 137.05891 137.1411
|
218
|
+
Ile I Isoleucine C6H11NO 113.08406 113.1594
|
219
|
+
Leu L Leucine C6H11NO 113.08406 113.1594
|
220
|
+
Lys K Lysine C6H12N2O 128.09496 128.1741
|
221
|
+
Met M Methionine C5H9NOS 131.04049 131.1926
|
222
|
+
Phe F Phenylalanine C9H9NO 147.06841 147.1766
|
223
|
+
Pro P Proline C5H7NO 97.05276 97.1167
|
224
|
+
Ser S Serine C3H5NO2 87.03203 87.0782
|
225
|
+
Thr T Threonine C4H7NO2 101.04768 101.1051
|
226
|
+
Trp W Tryptophan C11H10N2O 186.07931 186.2132
|
227
|
+
Tyr Y Tyrosine C9H9NO2 163.06333 163.1760
|
228
|
+
Val V Valine C5H9NO 99.06841 99.1326}
|
229
|
+
|
230
|
+
residues = common.split(/\n/)
|
231
|
+
residues.each do |residue_str|
|
232
|
+
next if residue_str.empty?
|
233
|
+
|
234
|
+
residue_str =~ /(\w\w\w) (\w) (\w+( Acid)?) ([\w\d]+) (\d+\.\d+) (\d+\.\d+)/
|
235
|
+
abbr = $1
|
236
|
+
letter = $2
|
237
|
+
name = $3
|
238
|
+
formula = $5
|
239
|
+
monoisotopic = $6.to_f
|
240
|
+
average = $7.to_f
|
241
|
+
|
242
|
+
residue = Residue[letter]
|
243
|
+
assert_not_nil residue, residue_str
|
244
|
+
assert_equal name, residue.name, residue_str
|
245
|
+
assert_equal abbr, residue.abbr, residue_str
|
246
|
+
assert_equal formula, residue.to_s.gsub(/\(|\)/, ""), residue_str
|
247
|
+
|
248
|
+
assert_in_delta monoisotopic, residue.mass, delta_mass, residue_str
|
249
|
+
# TODO -- check average mass
|
250
|
+
end
|
251
|
+
|
252
|
+
uncommon = %Q{
|
253
|
+
Orn Ornithine C5H10N2O 114.07931 114.1472
|
254
|
+
Aba Aminobutyric Acid C4H7NO 85.05276 85.1057
|
255
|
+
AECys Aminoethylcysteine C5H10N2OS 146.05138 146.2072
|
256
|
+
Aib alpha-Aminoisobutyric Acid C4H7NO 85.05276 85.1057
|
257
|
+
CMCys Carboxymethylcysteine C5H7NO3S 161.01466 161.1755
|
258
|
+
Dha Dehydroalanine C3H3NO 69.02146 69.0629
|
259
|
+
Dhb Dehydroamino-alpha-butyric Acid C4H5NO 83.03711 83.0898
|
260
|
+
Hyl Hydroxylysine C6H12N2O2 144.08988 144.1735
|
261
|
+
Hyp Hydroxyproline C5H7NO2 113.04768 113.1161
|
262
|
+
Iva Isovaline C5H9NO 99.06841 99.1326
|
263
|
+
nLeu Norleucine C6H11NO 113.08406 113.1594
|
264
|
+
Pip 2-Piperidinecarboxylic Acid C6H9NO 111.06841 111.1436
|
265
|
+
pGlu Pyroglutamic Acid C5H5NO2 111.03203 111.1002
|
266
|
+
Sar Sarcosine C3H5NO 71.03711 71.0788}
|
267
|
+
|
268
|
+
residues = uncommon.split(/\n/)
|
269
|
+
residues.each do |residue_str|
|
270
|
+
next if residue_str.empty?
|
271
|
+
|
272
|
+
residue_str =~ /(\w+) ([\w-]+( Acid)?) ([\w\d]+) (\d+\.\d+) (\d+\.\d+)/
|
273
|
+
abbr = $1
|
274
|
+
name = $2
|
275
|
+
formula = $4
|
276
|
+
monoisotopic = $5.to_f
|
277
|
+
average = $6.to_f
|
278
|
+
|
279
|
+
residue = Residue[abbr]
|
280
|
+
assert_not_nil residue, residue_str
|
281
|
+
assert_equal name, residue.name, residue_str
|
282
|
+
assert_equal formula, residue.to_s.gsub(/\(|\)/, ""), residue_str
|
283
|
+
|
284
|
+
assert_in_delta monoisotopic, residue.mass, delta_mass, residue_str
|
285
|
+
# TODO -- check average mass
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
end
|