molecules 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +21 -0
- data/README +73 -0
- data/Rakefile +78 -0
- data/lib/molecules.rb +4 -0
- data/lib/molecules/calc.rb +127 -0
- data/lib/molecules/empirical_formula.rb +325 -0
- data/lib/molecules/libraries/polypeptide.rb +91 -0
- data/lib/molecules/libraries/residue.rb +165 -0
- data/lib/molecules/utils.rb +49 -0
- data/tap.yml +0 -0
- data/test/molecules/calc_test.rb +37 -0
- data/test/molecules/empirical_formula_class_test.rb +196 -0
- data/test/molecules/empirical_formula_test.rb +204 -0
- data/test/molecules/libraries/polypeptide_test.rb +128 -0
- data/test/molecules/libraries/residue_test.rb +289 -0
- data/test/molecules/utils_test.rb +147 -0
- data/test/molecules_test.rb +24 -0
- data/test/molecules_test_helper.rb +31 -0
- data/test/molecules_test_suite.rb +3 -0
- data/test/tap_test_helper.rb +3 -0
- metadata +82 -0
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'molecules/libraries/residue'
|
2
|
+
|
3
|
+
module Molecules
|
4
|
+
module Libraries
|
5
|
+
|
6
|
+
# Represents a polypeptide as a sequence of residues. For convenience,
|
7
|
+
# polypeptides may contain whitespace in their sequences (thus allowing
|
8
|
+
# direct use with parsed FASTA formatted peptides sequences).
|
9
|
+
#
|
10
|
+
# Currently polypeptide only handles sequences with common residues.
|
11
|
+
class Polypeptide < EmpiricalFormula
|
12
|
+
|
13
|
+
class << self
|
14
|
+
# Normalizes the input sequence by removing whitespace and capitalizing.
|
15
|
+
def normalize(sequence)
|
16
|
+
sequence.gsub(/\s/, "").upcase
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# The sequence of self (including whitespace)
|
21
|
+
attr_reader :sequence
|
22
|
+
|
23
|
+
# A hash of (Residue, Integer) pairs defining the number of a given residue in self.
|
24
|
+
attr_reader :residue_composition
|
25
|
+
|
26
|
+
# The number of residues in self (may differ from sequence.length
|
27
|
+
# if sequence contains whitespace).
|
28
|
+
attr_reader :length
|
29
|
+
|
30
|
+
# An array of tokens that may occur in a sequence, grouped
|
31
|
+
# as patterns (ie one token for all whitespace characters, and
|
32
|
+
# one token for each residue). Used to count the number of
|
33
|
+
# each type of residue in a sequence.
|
34
|
+
SEQUENCE_TOKENS = ["\s\t\r\n"] + Residue.common.collect {|r| r.letter}
|
35
|
+
|
36
|
+
def initialize(sequence)
|
37
|
+
@sequence = sequence
|
38
|
+
|
39
|
+
@length = 0
|
40
|
+
@residue_composition = {}
|
41
|
+
@formula = Array.new(5, 0)
|
42
|
+
|
43
|
+
# count up the number of whitespaces and residues in self
|
44
|
+
tokens = Utils.count(sequence, SEQUENCE_TOKENS)
|
45
|
+
whitespace = tokens.shift
|
46
|
+
|
47
|
+
if whitespace == sequence.length
|
48
|
+
# as per the Base specification, factors
|
49
|
+
# should have no trailing zeros
|
50
|
+
@formula.clear
|
51
|
+
return
|
52
|
+
end
|
53
|
+
|
54
|
+
# add the residue masses and factors
|
55
|
+
Residue.common.each do |residue|
|
56
|
+
# benchmarks indicated that counting for each residue
|
57
|
+
# is quicker than trying anything like:
|
58
|
+
#
|
59
|
+
# sequence.each_byte {|b| bytes[b] += 1}
|
60
|
+
#
|
61
|
+
# This is particularly an issue for long sequences. The
|
62
|
+
# count operation could be optimized for isobaric residues
|
63
|
+
n = tokens.shift
|
64
|
+
next if n == 0
|
65
|
+
|
66
|
+
@length += n
|
67
|
+
@residue_composition[residue] = n
|
68
|
+
Utils.add(@formula, residue.formula, n)
|
69
|
+
end
|
70
|
+
|
71
|
+
if @length + whitespace != sequence.length
|
72
|
+
# raise an error if there are unaccounted characters
|
73
|
+
raise UnknownResidueError, "unknown characters in sequence: #{sequence}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Sequentially passes each residue in sequence to the block.
|
78
|
+
def each_residue
|
79
|
+
residues = Residue.residue_index
|
80
|
+
sequence.each_byte do |byte|
|
81
|
+
residue = residues[byte]
|
82
|
+
yield(residue) if residue
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class UnknownResidueError < StandardError # :nodoc:
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
require 'constants/library'
|
2
|
+
require 'molecules/empirical_formula'
|
3
|
+
|
4
|
+
module Molecules
|
5
|
+
module Libraries
|
6
|
+
|
7
|
+
# A library of amino acid residues.
|
8
|
+
#
|
9
|
+
# r = Residue::A
|
10
|
+
# r.name # => "Alanine"
|
11
|
+
# r.abbr # => "Ala"
|
12
|
+
# r.letter # => "A"
|
13
|
+
# r.side_chain.to_s # => "CH(3)"
|
14
|
+
#
|
15
|
+
class Residue < EmpiricalFormula
|
16
|
+
|
17
|
+
class << self
|
18
|
+
# The 20 common amino acids.
|
19
|
+
def common
|
20
|
+
collection(:common)
|
21
|
+
end
|
22
|
+
|
23
|
+
# An array of the residues indexed by the byte
|
24
|
+
# corresponding to the residue letter.
|
25
|
+
def residue_index
|
26
|
+
collection(:residue_index)
|
27
|
+
end
|
28
|
+
|
29
|
+
# An array of the residue masses indexed by the byte
|
30
|
+
# corresponding to the residue letter.
|
31
|
+
def residue_mass_index
|
32
|
+
collection(:residue_mass_index)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# The full name of self
|
37
|
+
attr_reader :name
|
38
|
+
|
39
|
+
# The (typically) 3-letter abbreviation of self
|
40
|
+
attr_reader :abbr
|
41
|
+
|
42
|
+
# The letter code for self
|
43
|
+
attr_reader :letter
|
44
|
+
|
45
|
+
# The byte corresponding to letter
|
46
|
+
attr_reader :byte
|
47
|
+
|
48
|
+
# An EmpiricalFormula representing the side chain of self
|
49
|
+
attr_reader :side_chain
|
50
|
+
|
51
|
+
# A symbol classification of self
|
52
|
+
attr_reader :type
|
53
|
+
|
54
|
+
# The unrounded monoisotopic side chain mass of self
|
55
|
+
attr_reader :side_chain_mass
|
56
|
+
|
57
|
+
# The uncharged, unrounded, monoisotopic residue mass of self
|
58
|
+
# (the backbone plus side chain mass, with no N- or C-terminus)
|
59
|
+
attr_reader :residue_mass
|
60
|
+
|
61
|
+
# The unrounded mass of the immonium ion of self
|
62
|
+
# (residue_mass + DELTA_IMMONIUM.mass)
|
63
|
+
attr_reader :immonium_ion_mass
|
64
|
+
|
65
|
+
def initialize(letter, abbr, name, side_chain_formula, classification=nil)
|
66
|
+
@side_chain = EmpiricalFormula.parse_simple(side_chain_formula)
|
67
|
+
super( Utils.add(side_chain.formula.dup, BACKBONE.formula), false)
|
68
|
+
|
69
|
+
@letter = letter
|
70
|
+
@abbr = abbr
|
71
|
+
@name = name
|
72
|
+
@classification = classification
|
73
|
+
|
74
|
+
@side_chain_mass = side_chain.mass
|
75
|
+
@residue_mass = mass
|
76
|
+
@immonium_ion_mass = @residue_mass + DELTA_IMMONIUM.mass
|
77
|
+
|
78
|
+
@byte = nil
|
79
|
+
@letter.each_byte do |byte|
|
80
|
+
@byte = byte
|
81
|
+
break
|
82
|
+
end unless @letter == nil
|
83
|
+
end
|
84
|
+
|
85
|
+
# True if the residue of type :common
|
86
|
+
def common?
|
87
|
+
@classification == :common
|
88
|
+
end
|
89
|
+
|
90
|
+
# True if the residue is type :common or :standard.
|
91
|
+
def standard?
|
92
|
+
@classification == :common || @classification == :standard
|
93
|
+
end
|
94
|
+
|
95
|
+
# True if the residue is a composite representing a set of isobaric residues
|
96
|
+
def composite?
|
97
|
+
@type == :composite
|
98
|
+
end
|
99
|
+
|
100
|
+
# An EmpiricalFormula for the residue backbone
|
101
|
+
BACKBONE = EmpiricalFormula.parse_simple('C(2)H(2)NO')
|
102
|
+
|
103
|
+
# Add to a Residue to achieve an immonium ion
|
104
|
+
DELTA_IMMONIUM = EmpiricalFormula.parse('-CO+H')
|
105
|
+
|
106
|
+
A = Residue.new('A', "Ala", "Alanine", "CH(3)", :common)
|
107
|
+
C = Residue.new('C', "Cys", "Cysteine", "CH(3)S", :common)
|
108
|
+
D = Residue.new('D', "Asp", "Aspartic Acid", "C(2)H(3)O(2)", :common)
|
109
|
+
E = Residue.new('E', "Glu", "Glutamic Acid", "C(3)H(5)O(2)", :common)
|
110
|
+
F = Residue.new('F', "Phe", "Phenylalanine", "C(7)H(7)", :common)
|
111
|
+
G = Residue.new('G', "Gly", "Glycine", "H", :common)
|
112
|
+
H = Residue.new('H', "His", "Histidine", "C(4)H(5)N(2)", :common)
|
113
|
+
I = Residue.new('I', "Ile", "Isoleucine", "C(4)H(9)", :common)
|
114
|
+
K = Residue.new('K', "Lys", "Lysine", "C(4)H(10)N", :common)
|
115
|
+
L = Residue.new('L', "Leu", "Leucine", "C(4)H(9)", :common)
|
116
|
+
M = Residue.new('M', "Met", "Methionine", "C(3)H(7)S", :common)
|
117
|
+
N = Residue.new('N', "Asn", "Asparagine", "C(2)H(4)NO", :common)
|
118
|
+
O = Residue.new('O', "Pyl", "Pyrrolysine", "C(9)H(17)NO", :standard)
|
119
|
+
P = Residue.new('P', "Pro", "Proline", "C(3)H(5)", :common)
|
120
|
+
Q = Residue.new('Q', "Gln", "Glutamine", "C(3)H(6)NO", :common)
|
121
|
+
R = Residue.new('R', "Arg", "Arginine", "C(4)H(10)N(3)", :common)
|
122
|
+
S = Residue.new('S', "Ser", "Serine", "CH(3)O", :common)
|
123
|
+
T = Residue.new('T', "Thr", "Threonine", "C(2)H(5)O", :common)
|
124
|
+
U = Residue.new('U', "Sec", "Selenocysteine", "CH(3)Se", :standard)
|
125
|
+
V = Residue.new('V', "Val", "Valine", "C(3)H(7)", :common)
|
126
|
+
W = Residue.new('W', "Trp", "Tryptophan", "C(9)H(8)N", :common)
|
127
|
+
Y = Residue.new('Y', "Tyr", "Tyrosine", "C(7)H(7)O", :common)
|
128
|
+
|
129
|
+
ORN = Residue.new(nil, "Orn", "Ornithine", "C(3)H(8)N", :uncommon)
|
130
|
+
ABA = Residue.new(nil, 'Aba', 'Aminobutyric Acid', 'C(2)H(5)', :uncommon)
|
131
|
+
AECYS = Residue.new(nil, 'AECys','Aminoethylcysteine', 'C(3)H(8)NS', :uncommon)
|
132
|
+
AIB = Residue.new(nil, 'Aib', 'alpha-Aminoisobutyric Acid', 'C(2)H(5)', :uncommon)
|
133
|
+
CMCYS = Residue.new(nil, 'CMCys','Carboxymethylcysteine', 'C(3)H(5)O(2)S', :uncommon)
|
134
|
+
DHA = Residue.new(nil, 'Dha', 'Dehydroalanine', 'CH', :uncommon)
|
135
|
+
DHB = Residue.new(nil, 'Dhb', 'Dehydroamino-alpha-butyric Acid', 'C(2)H(3)', :uncommon)
|
136
|
+
HYL = Residue.new(nil, 'Hyl', 'Hydroxylysine', 'C(4)H(10)NO', :uncommon)
|
137
|
+
HYP = Residue.new(nil, 'Hyp', 'Hydroxyproline', 'C(3)H(5)O', :uncommon)
|
138
|
+
IVA = Residue.new(nil, 'Iva', 'Isovaline', 'C(3)H(7)', :uncommon)
|
139
|
+
NLEU = Residue.new(nil, 'nLeu', 'Norleucine', 'C(4)H(9)', :uncommon)
|
140
|
+
PIP = Residue.new(nil, 'Pip', '2-Piperidinecarboxylic Acid', 'C(4)H(7)', :uncommon)
|
141
|
+
PGLU = Residue.new(nil, 'pGlu', 'Pyroglutamic Acid', 'C(3)H(3)O', :uncommon)
|
142
|
+
SAR = Residue.new(nil, 'Sar', 'Sarcosine', 'CH(3)', :uncommon)
|
143
|
+
|
144
|
+
include Constants::Library
|
145
|
+
|
146
|
+
library.index_by_attribute :letter
|
147
|
+
library.index_by_attribute :abbr
|
148
|
+
library.index_by_attribute :name
|
149
|
+
|
150
|
+
library.collect(:common) do |residue|
|
151
|
+
residue.common? ? residue : nil
|
152
|
+
end
|
153
|
+
|
154
|
+
library.collect(:residue_index) do |residue|
|
155
|
+
next unless residue.common?
|
156
|
+
[residue, residue.byte]
|
157
|
+
end
|
158
|
+
|
159
|
+
library.collect(:residue_mass_index) do |residue|
|
160
|
+
next unless residue.common?
|
161
|
+
[residue.residue_mass, residue.byte]
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Molecules
|
2
|
+
|
3
|
+
# A number of utility routines used by EmpiricalFormula and elsewhere.
|
4
|
+
# These methods are used a great deal and are all prime candidates for
|
5
|
+
# optimization (for example using RubyInline).
|
6
|
+
module Utils
|
7
|
+
module_function
|
8
|
+
|
9
|
+
# Rounds n to the specified precision (ie number of decimal places)
|
10
|
+
def round(n, precision)
|
11
|
+
factor = 10**precision.to_i
|
12
|
+
(n * factor).round.to_f / factor
|
13
|
+
end
|
14
|
+
|
15
|
+
# Adds the elements of b to a at corresponding
|
16
|
+
# indicies, multiplying by n. The input arrays
|
17
|
+
# do not have to be the same length. Returns a
|
18
|
+
# with trailing zeros removed.
|
19
|
+
def add(a, b, n=1)
|
20
|
+
a << 0 while a.length < b.length
|
21
|
+
|
22
|
+
# oddly, this is faster than each_with_index
|
23
|
+
i = 0
|
24
|
+
b.each do |factor|
|
25
|
+
a[i] += n * factor
|
26
|
+
i += 1
|
27
|
+
end
|
28
|
+
|
29
|
+
a.pop while a[-1] == 0
|
30
|
+
a
|
31
|
+
end
|
32
|
+
|
33
|
+
# Multiples the elements of array a by factor, returning a.
|
34
|
+
# Clears a if factor == 0.
|
35
|
+
def multiply(a, factor)
|
36
|
+
factor == 0 ? a.clear : a.collect! {|i| i * factor}
|
37
|
+
end
|
38
|
+
|
39
|
+
# Collects the number of each of the patterns in str. For example:
|
40
|
+
#
|
41
|
+
# count("abcabca", ["a", "b", "c"]) # => [3, 2, 2]
|
42
|
+
# count("abcabca", ["a", "bc"]) # => [3, 4]
|
43
|
+
#
|
44
|
+
def count(str, patterns)
|
45
|
+
patterns.collect {|pattern| str.count(pattern)}
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
data/tap.yml
ADDED
File without changes
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '../tap_test_helper.rb')
|
2
|
+
require 'molecules/calc'
|
3
|
+
|
4
|
+
class Molecules::CalcTest < Test::Unit::TestCase
|
5
|
+
acts_as_tap_test
|
6
|
+
|
7
|
+
attr_reader :t
|
8
|
+
|
9
|
+
def setup
|
10
|
+
super
|
11
|
+
@t = Molecules::Calc.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_mass_calculation
|
15
|
+
t.enq("H2O")
|
16
|
+
app.run
|
17
|
+
|
18
|
+
assert_equal [[Unit.new(18.0105646863, "Da")]], app.results(t)
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_mass_calculation_with_precision
|
22
|
+
t.precision = 2
|
23
|
+
t.enq("H2O", "NH3 + H2O")
|
24
|
+
app.run
|
25
|
+
|
26
|
+
assert_equal [[Unit.new(18.01, "Da"), Unit.new(35.04, "Da")]], app.results(t)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_mass_calculation_with_precision_and_unit_conversion
|
30
|
+
t.units = "yg"
|
31
|
+
t.precision = 3
|
32
|
+
t.enq("H2O")
|
33
|
+
app.run
|
34
|
+
|
35
|
+
assert_equal [[Unit.new(29.907, "yg")]], app.results(t)
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '../molecules_test_helper.rb')
|
2
|
+
require 'molecules/empirical_formula'
|
3
|
+
|
4
|
+
class EmpiricalFormulaClassTest < Test::Unit::TestCase
|
5
|
+
include Molecules
|
6
|
+
|
7
|
+
#
|
8
|
+
# parse_simple test
|
9
|
+
#
|
10
|
+
|
11
|
+
def test_parse_simple_documentation
|
12
|
+
assert_equal "H(2)O", EmpiricalFormula.parse_simple("H(2)O").to_s
|
13
|
+
assert_equal "H(2)O", EmpiricalFormula.parse_simple("H (2) O").to_s
|
14
|
+
assert_equal "H(2)O", EmpiricalFormula.parse_simple("HO(-1)O(2)H").to_s
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_parse_simple
|
18
|
+
assert_equal([2,1], EmpiricalFormula.parse_simple("HO(-1)O(2)H").formula)
|
19
|
+
assert_equal([2,1], EmpiricalFormula.parse_simple("H O (-1 )O( 2) H ").formula)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_parse_simple_fails_for_malformed_formulae
|
23
|
+
[
|
24
|
+
# numbers outside parenthesis
|
25
|
+
"H2",
|
26
|
+
# empty parenthesis
|
27
|
+
"H()",
|
28
|
+
# mismatched parenthesis
|
29
|
+
"H(",
|
30
|
+
")H",
|
31
|
+
# anything complex
|
32
|
+
"H + O"
|
33
|
+
].each do |formula|
|
34
|
+
assert_raise(EmpiricalFormula::ParseError) { EmpiricalFormula.parse_simple(formula) }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
#
|
39
|
+
# test class parse
|
40
|
+
#
|
41
|
+
|
42
|
+
def test_parse_documentation
|
43
|
+
assert_equal "H(2)O", EmpiricalFormula.parse("H2O").to_s
|
44
|
+
assert_equal "C(52)H(106)", EmpiricalFormula.parse("CH3(CH2)50CH3").to_s
|
45
|
+
assert_equal "C(2)H(4)N(2)", EmpiricalFormula.parse("C2H3NO - H2O + NH3").to_s
|
46
|
+
|
47
|
+
block = lambda do |formula|
|
48
|
+
case formula
|
49
|
+
when /\[(.*)\]/
|
50
|
+
factors = $1.split(/,/).collect {|i| i.strip.to_i }
|
51
|
+
EmpiricalFormula.new(factors)
|
52
|
+
else nil
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
assert_equal "H(4)O(2)", EmpiricalFormula.parse("H2O + [2, 1]", &block).to_s
|
57
|
+
assert_raise(EmpiricalFormula::ParseError) { EmpiricalFormula.parse("H2O + :not_expected", &block) }
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_parse
|
61
|
+
{
|
62
|
+
nil => "",
|
63
|
+
"" => "",
|
64
|
+
"H" => "H",
|
65
|
+
"HO" => "HO",
|
66
|
+
"HFe" => "FeH",
|
67
|
+
"FeH" => "FeH",
|
68
|
+
"OH2" => "H(2)O",
|
69
|
+
"H2O" => "H(2)O",
|
70
|
+
"C6H12O4" => "C(6)H(12)O(4)",
|
71
|
+
"Fe2OMg3" => "Fe(2)Mg(3)O",
|
72
|
+
"(H)2" => "H(2)",
|
73
|
+
"(OH)2" => "H(2)O(2)",
|
74
|
+
"(HFe)" => "FeH",
|
75
|
+
"(FeH)" => "FeH",
|
76
|
+
"(OH2)2" => "H(4)O(2)",
|
77
|
+
"(H2O)2" => "H(4)O(2)",
|
78
|
+
"(C6H12O4)2" => "C(12)H(24)O(8)",
|
79
|
+
"(Fe2OMg3)2" => "Fe(4)Mg(6)O(2)",
|
80
|
+
"C6H12O4(C6H12O4)2C6H12O4" => "C(24)H(48)O(16)",
|
81
|
+
"Fe2OMg3(Fe2OMg3(Fe2OMg3))Fe2OMg3" => "Fe(8)Mg(12)O(4)",
|
82
|
+
"Fe2OMg3(Fe2OMg3)(Fe2OMg3)Fe2OMg3" => "Fe(8)Mg(12)O(4)",
|
83
|
+
"Fe2OMg3(Fe2OMg3(Fe2OMg3)3((C)6H12O4)2)2C" => "C(25)Fe(18)H(48)Mg(27)O(25)",
|
84
|
+
" (H2O) 10 0 " => "H(200)O(100)",
|
85
|
+
"CH3(CH2)7CH" => "C(9)H(18)",
|
86
|
+
"H3NCHCO2" => "C(2)H(4)NO(2)",
|
87
|
+
"(CH3)2CuLi" => "C(2)CuH(6)Li",
|
88
|
+
|
89
|
+
# multipart
|
90
|
+
"-H" => "H(-1)",
|
91
|
+
"H2O-H" => "HO",
|
92
|
+
"H2O - (OH)2+ H2O2-H2O" => ""
|
93
|
+
}.each_pair do |formula, composition_str|
|
94
|
+
m = EmpiricalFormula.parse(formula)
|
95
|
+
assert_equal composition_str, m.to_s, formula
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def test_parse_fails_for_malformed_formulae
|
100
|
+
[
|
101
|
+
# mismatched parenthesis
|
102
|
+
"H)2",
|
103
|
+
"(H2",
|
104
|
+
"(O2(H2)",
|
105
|
+
"(O)2H2)",
|
106
|
+
# hanging factors
|
107
|
+
"2C",
|
108
|
+
#"(2)",
|
109
|
+
"(2)2",
|
110
|
+
"(2C)",
|
111
|
+
"(2C)2",
|
112
|
+
"C(2C)",
|
113
|
+
# empty parenthesis
|
114
|
+
"()",
|
115
|
+
"()2"
|
116
|
+
].each do |formula|
|
117
|
+
assert_raise(EmpiricalFormula::ParseError) { EmpiricalFormula.parse(formula) }
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
#
|
122
|
+
# class mass test
|
123
|
+
#
|
124
|
+
|
125
|
+
def break_test_class_mass_method
|
126
|
+
water_mass = EmpiricalFormula::Element::H.mass * 2 + EmpiricalFormula::Element::O.mass
|
127
|
+
assert_equal 18.010565, water_mass
|
128
|
+
|
129
|
+
assert_equal 18.010565, EmpiricalFormula.mass("H2O")
|
130
|
+
assert_equal 18.010565, EmpiricalFormula.mass("H + OH")
|
131
|
+
assert_equal 18, EmpiricalFormula.mass("H2O", 0)
|
132
|
+
end
|
133
|
+
|
134
|
+
#
|
135
|
+
# library molecules
|
136
|
+
#
|
137
|
+
|
138
|
+
def break_test_access_library_molecules
|
139
|
+
water = EmpiricalFormula::H2O
|
140
|
+
|
141
|
+
assert_equal water, EmpiricalFormula.lookup('h2o')
|
142
|
+
assert_equal water, EmpiricalFormula.h2o
|
143
|
+
assert_equal 18.010565, EmpiricalFormula.h2o.mass
|
144
|
+
end
|
145
|
+
|
146
|
+
# vs the VG Analytical Organic Mass Spectrometry reference, reference date unknown (prior to 2005)
|
147
|
+
# the data from the data sheet was copied manually to doc/VG Analytical DataSheet.txt
|
148
|
+
def test_molecule_mass_values_vs_vg_analytical
|
149
|
+
str = %Q{
|
150
|
+
NH2 16.01872 16.0226
|
151
|
+
OH 17.00274 17.0073
|
152
|
+
OCH3 31.01839 31.0342
|
153
|
+
CH3CO 43.01839 43.0452}
|
154
|
+
|
155
|
+
molecules = str.split(/\n/)
|
156
|
+
molecules.each do |mol_str|
|
157
|
+
next if mol_str.empty?
|
158
|
+
|
159
|
+
name, monoisotopic, average = mol_str.split(/\s/)
|
160
|
+
monoisotopic = monoisotopic.to_f
|
161
|
+
average = average.to_f
|
162
|
+
|
163
|
+
molecule = EmpiricalFormula.parse(name)
|
164
|
+
assert_in_delta monoisotopic, molecule.mass, delta_mass, mol_str
|
165
|
+
# TODO -- check average mass
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
#
|
170
|
+
# benchmark
|
171
|
+
#
|
172
|
+
|
173
|
+
def test_parse_speed
|
174
|
+
benchmark_test(20) do |x|
|
175
|
+
n = 10
|
176
|
+
|
177
|
+
["H20","H2(H2(H2))H2"].each do |formula|
|
178
|
+
x.report("#{n}k #{formula}") do
|
179
|
+
(n*1000).times { EmpiricalFormula.parse(formula) }
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def test_parse_simple_speed
|
186
|
+
benchmark_test(20) do |x|
|
187
|
+
n = 10
|
188
|
+
|
189
|
+
["H(20)","H(2)H(2)H(2)H(2)"].each do |formula|
|
190
|
+
x.report("#{n}k #{formula}") do
|
191
|
+
(n*1000).times { EmpiricalFormula.parse_simple(formula) }
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|