molecules 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +21 -0
- data/README +73 -0
- data/Rakefile +78 -0
- data/lib/molecules.rb +4 -0
- data/lib/molecules/calc.rb +127 -0
- data/lib/molecules/empirical_formula.rb +325 -0
- data/lib/molecules/libraries/polypeptide.rb +91 -0
- data/lib/molecules/libraries/residue.rb +165 -0
- data/lib/molecules/utils.rb +49 -0
- data/tap.yml +0 -0
- data/test/molecules/calc_test.rb +37 -0
- data/test/molecules/empirical_formula_class_test.rb +196 -0
- data/test/molecules/empirical_formula_test.rb +204 -0
- data/test/molecules/libraries/polypeptide_test.rb +128 -0
- data/test/molecules/libraries/residue_test.rb +289 -0
- data/test/molecules/utils_test.rb +147 -0
- data/test/molecules_test.rb +24 -0
- data/test/molecules_test_helper.rb +31 -0
- data/test/molecules_test_suite.rb +3 -0
- data/test/tap_test_helper.rb +3 -0
- metadata +82 -0
data/MIT-LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2006-2008, Regents of the University of Colorado.
|
2
|
+
Developer:: Simon Chiang, Biomolecular Structure Program
|
3
|
+
Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
6
|
+
software and associated documentation files (the "Software"), to deal in the Software
|
7
|
+
without restriction, including without limitation the rights to use, copy, modify, merge,
|
8
|
+
publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
9
|
+
to whom the Software is furnished to do so, subject to the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be included in all copies or
|
12
|
+
substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
18
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
19
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
20
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
21
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
= Molecules
|
2
|
+
|
3
|
+
A library of molecules for scientific calculations in Ruby.
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Molecules provides libraries of commonly used molecules (currently just amino
|
8
|
+
acid residues and polypeptides). Library classes inherit from EmpiricalFormula
|
9
|
+
which allows calculation of molecular composition and mass, as well as
|
10
|
+
adding/subtraction of other molecules.
|
11
|
+
|
12
|
+
I have attempted to use reputable sources and to adhere to standards when
|
13
|
+
applicable. Please notify me of any errors and send me suggestions!
|
14
|
+
|
15
|
+
* Rubyforge[http://rubyforge.org/projects/bioactive]
|
16
|
+
* Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/13518-molecules/overview]
|
17
|
+
* Github[http://github.com/bahuvrihi/molecules/tree/master]
|
18
|
+
|
19
|
+
== Usage
|
20
|
+
|
21
|
+
require 'molecules'
|
22
|
+
include Molecules::Libraries
|
23
|
+
|
24
|
+
# Residue predefines all common amino acids
|
25
|
+
# as well as some uncommon ones.
|
26
|
+
r = Residue::A
|
27
|
+
r.name # => "Alanine"
|
28
|
+
r.abbr # => "Ala"
|
29
|
+
r.letter # => "A"
|
30
|
+
r.side_chain.to_s # => "CH(3)"
|
31
|
+
r.mass # => 71.03711
|
32
|
+
r.immonium_ion_mass # => 44.0500
|
33
|
+
|
34
|
+
# Polypeptide allows for creation of polypeptides
|
35
|
+
# from residue sequences.
|
36
|
+
p = Polypeptide.new("RPPGFSPFR")
|
37
|
+
p.to_s # => "C(50)H(71)N(15)O(10)"
|
38
|
+
p.mass # => 1041.5508
|
39
|
+
|
40
|
+
# Generic molecules may be specified with EmpiricalFormula.
|
41
|
+
caffeine = Molecules::EmpiricalFormula.parse("C8H10N4O2")
|
42
|
+
coffee = Molecules::EmpiricalFormula.parse("C8H10N4O2 + H2O")
|
43
|
+
|
44
|
+
=== Mass Calculator (tap task)
|
45
|
+
|
46
|
+
Molecules provides a mass calculator tap task. Tap[http://tap.rubyforge.org]
|
47
|
+
is not required by molecules in general, but you get this bonus if you have tap
|
48
|
+
installed:
|
49
|
+
|
50
|
+
% tap -- molecules/calc ":RPPGFSPFR + H2O"
|
51
|
+
I[15:34:30] 1077.57 Da :RPPGFSPFR + H2O
|
52
|
+
|
53
|
+
== Known Issues
|
54
|
+
|
55
|
+
* Polypeptide only allows common residues
|
56
|
+
* No 'Molecule' class is defined, pending the potential addition
|
57
|
+
of more molecule data (ex: SMILES[http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification]
|
58
|
+
data)
|
59
|
+
* No mechanism for defining large libraries of molecules has been
|
60
|
+
chosen. A database solution may be adopted to this end.
|
61
|
+
|
62
|
+
== Installation
|
63
|
+
|
64
|
+
Molecules is available as a gem through RubyForge[http://rubyforge.org/projects/bioactive]. Use:
|
65
|
+
|
66
|
+
% gem install molecules
|
67
|
+
|
68
|
+
== Info
|
69
|
+
|
70
|
+
Copyright (c) 2006-2008, Regents of the University of Colorado.
|
71
|
+
Developer:: {Simon Chiang}[http://bahuvrihi.wordpress.com], {Biomolecular Structure Program}[http://biomol.uchsc.edu/], {Hansen Lab}[http://hsc-proteomics.uchsc.edu/hansenlab/]
|
72
|
+
Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
|
73
|
+
Licence:: MIT-Style
|
data/Rakefile
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
require 'rake/gempackagetask'
|
5
|
+
require 'yaml'
|
6
|
+
|
7
|
+
# tasks
|
8
|
+
desc 'Default: Run tests.'
|
9
|
+
task :default => :test
|
10
|
+
|
11
|
+
desc 'Run tests.'
|
12
|
+
Rake::TestTask.new(:test) do |t|
|
13
|
+
t.libs << 'lib'
|
14
|
+
t.pattern = File.join('test', ENV['subset'] || '', ENV['pattern'] || '**/*_test.rb')
|
15
|
+
t.verbose = true
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# admin tasks
|
20
|
+
#
|
21
|
+
|
22
|
+
def gemspec
|
23
|
+
data = File.read("molecules.gemspec")
|
24
|
+
spec = nil
|
25
|
+
Thread.new { spec = eval("$SAFE = 3\n#{data}") }.join
|
26
|
+
spec
|
27
|
+
end
|
28
|
+
|
29
|
+
Rake::GemPackageTask.new(gemspec) do |pkg|
|
30
|
+
pkg.need_tar = true
|
31
|
+
end
|
32
|
+
|
33
|
+
task :print_manifest do
|
34
|
+
# collect files from the gemspec, labeling
|
35
|
+
# with true or false corresponding to the
|
36
|
+
# file existing or not
|
37
|
+
files = gemspec.files.inject({}) do |files, file|
|
38
|
+
files[File.expand_path(file)] = [File.exists?(file), file]
|
39
|
+
files
|
40
|
+
end
|
41
|
+
|
42
|
+
# gather non-rdoc/pkg files for the project
|
43
|
+
# and add to the files list if they are not
|
44
|
+
# included already (marking by the absence
|
45
|
+
# of a label)
|
46
|
+
Dir.glob("**/*").each do |file|
|
47
|
+
next if file =~ /^(rdoc|pkg)/ || File.directory?(file)
|
48
|
+
|
49
|
+
path = File.expand_path(file)
|
50
|
+
files[path] = ["", file] unless files.has_key?(path)
|
51
|
+
end
|
52
|
+
|
53
|
+
# sort and output the results
|
54
|
+
files.values.sort_by {|exists, file| file }.each do |entry|
|
55
|
+
puts "%-5s : %s" % entry
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
desc 'Generate documentation.'
|
60
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
61
|
+
rdoc.rdoc_dir = 'rdoc'
|
62
|
+
rdoc.title = "molecules"
|
63
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
64
|
+
rdoc.rdoc_files.include(["README", 'MIT-LICENSE'])
|
65
|
+
rdoc.rdoc_files.include(gemspec.files.select {|file| file =~ /^lib/})
|
66
|
+
end
|
67
|
+
|
68
|
+
desc "Publish RDoc to RubyForge"
|
69
|
+
task :publish_rdoc => [:rdoc] do
|
70
|
+
config = YAML.load(File.read(File.expand_path("~/.rubyforge/user-config.yml")))
|
71
|
+
host = "#{config["username"]}@rubyforge.org"
|
72
|
+
|
73
|
+
rsync_args = "-v -c -r"
|
74
|
+
remote_dir = "/var/www/gforge-projects/bioactive/molecules"
|
75
|
+
local_dir = "rdoc"
|
76
|
+
|
77
|
+
sh %{rsync #{rsync_args} #{local_dir}/ #{host}:#{remote_dir}}
|
78
|
+
end
|
data/lib/molecules.rb
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'molecules/empirical_formula'
|
2
|
+
require 'molecules/libraries/polypeptide'
|
3
|
+
|
4
|
+
# patch for ruby units
|
5
|
+
class Unit < Numeric # :nodoc:
|
6
|
+
UNIT_DEFINITIONS['<AMU>'] = [%w{u AMU amu}, 1/6.0221415e26, :mass, %w{<kilogram>}]
|
7
|
+
UNIT_DEFINITIONS['<dalton>'] = [%w{Da Dalton Daltons dalton daltons}, 1/6.0221415e26, :mass, %w{<kilogram>}]
|
8
|
+
end
|
9
|
+
Unit.setup
|
10
|
+
|
11
|
+
module Molecules
|
12
|
+
|
13
|
+
# :startdoc::manifest a mass calculator
|
14
|
+
# Calculates the mass of a molecule or empirical formula. The
|
15
|
+
# options can be used to alter the output (precision, mass
|
16
|
+
# calculation method etc.) You may enter compound formulae, or
|
17
|
+
# a list of formulae. In addition, polypeptides can be specified
|
18
|
+
# using the one-letter residue codes:
|
19
|
+
#
|
20
|
+
# % tap -- molecules/calc H2O
|
21
|
+
# I[17:09:00] 18.0105646863 Da H2O
|
22
|
+
#
|
23
|
+
# % tap -- molecules/calc H2O -u kg
|
24
|
+
# I[13:35:59] 2.99072e-026 kg H2O
|
25
|
+
#
|
26
|
+
# % tap -- molecules/calc 'C3H5NO + H2O' C50H73N15O11 -p 2
|
27
|
+
# I[17:08:21] 89.05 Da C3H5NO + H2O
|
28
|
+
# I[17:08:21] 1059.56 Da C50H73N15O11
|
29
|
+
#
|
30
|
+
# % tap -- molecules/calc :RPPGFSPFR
|
31
|
+
# I[13:35:02] 1059.56 Da :RPPGFSPFR
|
32
|
+
#
|
33
|
+
# Furthermore, if a unimod path is specified in the configurations,
|
34
|
+
# unimod modifcations may be specified by name as the polypeptide
|
35
|
+
# termini. Use '%' signs as in a SQL query to shorten the name:
|
36
|
+
#
|
37
|
+
# % tap -- molecules/calc 'Acetyl:RPPGFSPFR:Hydroxyl%' --unimod-path <...>
|
38
|
+
# I[13:33:25] 1059.56 Da Acetyl:RPPGFSPFR:Hydroxyl%
|
39
|
+
#
|
40
|
+
# The unimod path must point to an sqlite3 ActiveUnimod database, and
|
41
|
+
# sqlite3-ruby must be installed for this feature to work.
|
42
|
+
#
|
43
|
+
# * ActiveUnimod[http://bioactive.rubyforge.org/]
|
44
|
+
# * sqlite3-ruby[http://rubyforge.org/projects/sqlite-ruby/]
|
45
|
+
#
|
46
|
+
class Calc < Tap::Task
|
47
|
+
|
48
|
+
config :type, :monoisotopic # the mass type calculated
|
49
|
+
config :precision, nil, :short => 'p' # the precision of the mass
|
50
|
+
config :units, "Da", :short => 'u', &c.string # the mass unit reported
|
51
|
+
config :composition, false, :short => 'c', &c.flag # reports the composition, not the formula
|
52
|
+
config :unimod_path, nil do |path| # the path to the unimod database
|
53
|
+
case
|
54
|
+
when path == nil then nil
|
55
|
+
when File.exists?(path) then path
|
56
|
+
else raise "path to unimod db does not exist: #{path}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Formulates a query for a modification matching code_name
|
61
|
+
# for the unimod database. If the code_name contains a '%'
|
62
|
+
# then the query will use a LIKE syntax, otherwise the
|
63
|
+
# code_name will be searced for exactly.
|
64
|
+
def mod_query(code_name)
|
65
|
+
# should do a rails-like escape on code_name
|
66
|
+
"SELECT code_name, composition FROM modifications WHERE code_name #{code_name.include?('%') ? 'LIKE' : '='} '#{code_name}'"
|
67
|
+
end
|
68
|
+
|
69
|
+
# Attempts to find and instantiate an EmpiricalFormula for
|
70
|
+
# a unimod modification matching code_name.
|
71
|
+
def find_mod(code_name)
|
72
|
+
raise "no unimod_path was specified" if unimod_path == nil
|
73
|
+
require 'sqlite3' unless Object.const_defined?(:SQLite3)
|
74
|
+
|
75
|
+
results = []
|
76
|
+
db = SQLite3::Database.new(unimod_path)
|
77
|
+
db.execute(mod_query(code_name)) do |row|
|
78
|
+
results << row
|
79
|
+
end
|
80
|
+
db.close
|
81
|
+
|
82
|
+
case results.length
|
83
|
+
when 1 then EmpiricalFormula.parse_simple(results[0][1])
|
84
|
+
when 0 then raise "could not find modification: #{code_name}"
|
85
|
+
else raise "multiple modifications found for: #{code_name} (#{results.collect {|result| result[0]}.join(', ')})"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
WATER = EmpiricalFormula.parse "H2O"
|
90
|
+
HYDROGEN = EmpiricalFormula.parse "H"
|
91
|
+
HYDROXIDE = EmpiricalFormula.parse "OH"
|
92
|
+
|
93
|
+
# Returns an array of the calculated masses, in the correct unit.
|
94
|
+
def process(*formulae)
|
95
|
+
formulae.collect do |formula_str|
|
96
|
+
formula = EmpiricalFormula.parse(formula_str) do |str|
|
97
|
+
case str
|
98
|
+
when /^(.*?):([A-Z]+):?(.*)$/
|
99
|
+
peptide = Libraries::Polypeptide.new($2) + WATER
|
100
|
+
peptide += find_mod($1) unless $1.to_s.empty?
|
101
|
+
peptide += find_mod($3) unless $3.to_s.empty?
|
102
|
+
peptide
|
103
|
+
else nil
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
mass = formula.mass do |element|
|
108
|
+
case type
|
109
|
+
when :monoisotopic then element.mass
|
110
|
+
when :average then element.std_atomic_weight.value
|
111
|
+
else raise "unknown mass type: #{type}"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
mass = Unit.new(mass, "Da").convert_to(units)
|
116
|
+
unless precision == nil
|
117
|
+
mass = Unit.new( Utils.round(mass.scalar, precision), units)
|
118
|
+
end
|
119
|
+
|
120
|
+
log mass, composition ? formula : formula_str
|
121
|
+
|
122
|
+
mass
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,325 @@
|
|
1
|
+
require 'constants/libraries/element'
|
2
|
+
require 'molecules/utils'
|
3
|
+
require 'strscan'
|
4
|
+
|
5
|
+
module Molecules
|
6
|
+
Element = Constants::Libraries::Element
|
7
|
+
|
8
|
+
# EmpiricalFormula represents the empirical formula (ex 'H(2)0') for
|
9
|
+
# a molecule. The formula is stored as an array of integers aligned
|
10
|
+
# to the elements in EmpiricalFormula::ELEMENT_INDEX. Hence:
|
11
|
+
#
|
12
|
+
# EmpiricalFormula::ELEMENT_INDEX[0].name # => "Hydrogen"
|
13
|
+
# EmpiricalFormula::ELEMENT_INDEX[1].name # => "Oxygen"
|
14
|
+
#
|
15
|
+
# water = EmpiricalFormula.new [2,1]
|
16
|
+
# water.to_s # => 'H(2)O'
|
17
|
+
# water.mass # => 18.0105646863
|
18
|
+
#
|
19
|
+
# EmpiricalFormula may be added, subtracted, and multiplied to
|
20
|
+
# perform the expected operations:
|
21
|
+
#
|
22
|
+
# alanine = EmpiricalFormula.new [5,1,3,1]
|
23
|
+
# (alanine - water).formula # => [3,0,3,1]
|
24
|
+
#
|
25
|
+
class EmpiricalFormula
|
26
|
+
class << self
|
27
|
+
|
28
|
+
# Parses a simple formula (formatted like those returned by
|
29
|
+
# EmpiricalFormula#to_s) into a EmpiricalFormula. Whitespace
|
30
|
+
# is allowed in the formula.
|
31
|
+
#
|
32
|
+
# EmpiricalFormula.parse("H(2)O").to_s # => "H(2)O"
|
33
|
+
# EmpiricalFormula.parse("H (2) O").to_s # => "H(2)O"
|
34
|
+
# EmpiricalFormula.parse("HO(-1)O(2)H").to_s # => "H(2)O"
|
35
|
+
#
|
36
|
+
def parse_simple(chemical_formula)
|
37
|
+
formula = chemical_formula.to_s.gsub(/\s+/, "")
|
38
|
+
|
39
|
+
factor = nil
|
40
|
+
composition = Hash.new(0)
|
41
|
+
scanner = StringScanner.new(formula.reverse)
|
42
|
+
while scanner.restsize > 0
|
43
|
+
case
|
44
|
+
when scanner.scan_full(/\)(\d+-?)\(/, true, false)
|
45
|
+
# found a factor
|
46
|
+
factor = scanner[1].reverse.to_i
|
47
|
+
when scanner.scan_full(/([a-z]?[A-Z])/, true, false)
|
48
|
+
# found an element
|
49
|
+
composition[scanner[1].reverse] += (factor == nil ? 1 : factor)
|
50
|
+
|
51
|
+
# reset factor to nil
|
52
|
+
factor = nil
|
53
|
+
else
|
54
|
+
raise ParseError.new("could not parse formula: #{chemical_formula}")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
factors = composition_to_factors(composition)
|
58
|
+
block_given? ? yield(factors) : EmpiricalFormula.new(factors)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Parses a generalized chemical formula into an EmpiricalFormula.
|
62
|
+
# Formula sections can be nested with parenthesis, and multiple
|
63
|
+
# sections can be added or subtracted within the formula.
|
64
|
+
#
|
65
|
+
# EmpiricalFormula.parse("H2O").to_s # => "H(2)O"
|
66
|
+
# EmpiricalFormula.parse("CH3(CH2)50CH3").to_s # => "C(52)H(106)"
|
67
|
+
# EmpiricalFormula.parse("C2H3NO - H2O + NH3").to_s # => "C(2)H(4)N(2)"
|
68
|
+
#
|
69
|
+
# Note that the format for EmpiricalFormula#to_s differs from the
|
70
|
+
# format that parse utilizes.
|
71
|
+
#
|
72
|
+
# To extend the functionality of parse, provide a block to receive
|
73
|
+
# formula sections with unexpected punctuation and calculate an
|
74
|
+
# EmpiricalFormula therefrom. If the block returns nil,
|
75
|
+
# then parse will raise an error.
|
76
|
+
#
|
77
|
+
# block = lambda do |formula|
|
78
|
+
# case formula
|
79
|
+
# when /\[(.*)\]/
|
80
|
+
# factors = $1.split(/,/).collect {|i| i.strip.to_i }
|
81
|
+
# EmpiricalFormula.new(factors)
|
82
|
+
# else nil
|
83
|
+
# end
|
84
|
+
# end
|
85
|
+
#
|
86
|
+
# EmpiricalFormula.parse("H2O + [2, 1]", &block).to_s # => "H(4)O(2)"
|
87
|
+
# EmpiricalFormula.parse("H2O + :not_expected", &block) # !> ParseError
|
88
|
+
#
|
89
|
+
def parse(chemical_formula, &block)
|
90
|
+
# Remove whitespace
|
91
|
+
formula = chemical_formula.to_s.gsub(/\s+/, "")
|
92
|
+
|
93
|
+
# Split and handle multipart formulae
|
94
|
+
case formula
|
95
|
+
when /\+/
|
96
|
+
return formula.split(/\+/).inject(EmpiricalFormula.new) do |current, formula|
|
97
|
+
current + parse(formula, &block)
|
98
|
+
end
|
99
|
+
when /-/
|
100
|
+
splits = formula.split(/-/)
|
101
|
+
first = parse(splits.shift, &block)
|
102
|
+
return splits.inject(first) do |current, formula|
|
103
|
+
current - parse(formula, &block)
|
104
|
+
end
|
105
|
+
when /[^A-Za-z0-9\\(\\)]/
|
106
|
+
result = block_given? ? yield(formula) : nil
|
107
|
+
return result unless result == nil
|
108
|
+
|
109
|
+
raise ParseError.new("unexpected characters in formula: #{chemical_formula}")
|
110
|
+
end
|
111
|
+
|
112
|
+
# factor is the number following an element, as 6 and 12 in 'C6H12'
|
113
|
+
# factor == -1 indicates that a number has not been read for the
|
114
|
+
# next element. This state is used later to check for hanging
|
115
|
+
# factors, as in '2C6' or (8OH)
|
116
|
+
factor = nil
|
117
|
+
|
118
|
+
# multiplier is the latest cumulative factor for a parenthesis
|
119
|
+
# expression. A new multiplier is pushed on the stack for every new
|
120
|
+
# parenthesis set, and popped off when the set terminates.
|
121
|
+
# ex: for CH3(C(H)2)7CH
|
122
|
+
# At the period Integer at the top of the stack equals
|
123
|
+
# CH3(C(H)2)7.CH 1
|
124
|
+
# CH3(C(H)2.)7CH 7
|
125
|
+
# CH3(C(H.)2)7CH 14
|
126
|
+
# CH3(C.(H)2)7CH 7
|
127
|
+
# CH3.(CH)2)7CH 1
|
128
|
+
multiplier = []
|
129
|
+
multiplier << 1
|
130
|
+
|
131
|
+
# composition will store the formula composition as it is parsed
|
132
|
+
composition = Hash.new(0)
|
133
|
+
|
134
|
+
# Parse elements and factors out of the formula from right to left
|
135
|
+
scanner = StringScanner.new(formula.reverse)
|
136
|
+
while scanner.restsize > 0
|
137
|
+
|
138
|
+
case
|
139
|
+
when scanner.scan_full(/(\d+)/, true, false)
|
140
|
+
# found a factor
|
141
|
+
factor = scanner[1].reverse.to_i
|
142
|
+
when scanner.scan_full(/([a-z]?[A-Z])/, true, false)
|
143
|
+
# found an element
|
144
|
+
|
145
|
+
# Adjust the factor by the multiplier. If factor == nil
|
146
|
+
# then a factor has not been read for the element, as would
|
147
|
+
# be seen in NaOH; use 1 in this case instead.
|
148
|
+
factor = (factor.nil? ? 1 : factor) * multiplier.last
|
149
|
+
|
150
|
+
# Add the current factor to composition, remembering to reverse the symbol
|
151
|
+
composition[ scanner[1].reverse ] += factor
|
152
|
+
|
153
|
+
# reset factor to nil
|
154
|
+
factor = nil
|
155
|
+
when scanner.scan_full(/\)/, true, false)
|
156
|
+
# When a parenthesis ends, the current multiplier must be
|
157
|
+
# adujusted by the current factor. If factor == nil then a
|
158
|
+
# factor has not been read for the parenthesis, use 1 instead
|
159
|
+
multiplier << (factor.nil? ? 1 : factor) * multiplier.last
|
160
|
+
|
161
|
+
# reset factor to nil
|
162
|
+
factor = nil
|
163
|
+
when scanner.scan_full(/\(/, true, false)
|
164
|
+
# When a parenthesis starts, the current multiplier is
|
165
|
+
# popped off. Check for hanging factors and that after
|
166
|
+
# popping a multiplier will remain. If no multiplier will
|
167
|
+
# remain, then the parenthesis must be mismatched
|
168
|
+
raise ParseError.new("the formula contains a hanging factor: #{chemical_formula}") unless factor.nil?
|
169
|
+
raise ParseError.new("the formula contains mismatched parenthesis: #{chemical_formula}") unless multiplier.length > 1
|
170
|
+
|
171
|
+
multiplier.pop
|
172
|
+
else
|
173
|
+
raise ParseError.new("could not parse formula: #{chemical_formula}")
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# Check for hanging factors, that a multiplier remains, and that
|
178
|
+
# elements were found during parsing
|
179
|
+
raise ParseError.new("the formula contains a hanging factor: #{chemical_formula}") unless factor.nil?
|
180
|
+
raise ParseError.new("the formula contains mismatched parenthesis: #{chemical_formula}") unless multiplier.length == 1
|
181
|
+
raise ParseError.new("no elements could be found in the formula: #{chemical_formula}") if composition.length == 0 && !formula.empty?
|
182
|
+
|
183
|
+
EmpiricalFormula.new(composition_to_factors(composition))
|
184
|
+
end
|
185
|
+
|
186
|
+
# Parses the input formula into an EmpiricalFormula and
|
187
|
+
# calculates the mass therefrom. By default the mass
|
188
|
+
# will be the monoisotopic mass of the formula.
|
189
|
+
#
|
190
|
+
# See EmpericalFormula#mass for more details.
|
191
|
+
def mass(formula, &block) # :yields: element
|
192
|
+
mass = parse(formula).mass(&block)
|
193
|
+
end
|
194
|
+
|
195
|
+
protected
|
196
|
+
|
197
|
+
# Converts a hash of (symbol, factor) pairs into a factors array,
|
198
|
+
# suitable for initializing an EmpiricalFormula.
|
199
|
+
def composition_to_factors(composition)
|
200
|
+
factors = []
|
201
|
+
composition.each_pair do |symbol, factor|
|
202
|
+
next if factor == 0
|
203
|
+
|
204
|
+
element = symbol.kind_of?(Element) ? symbol : Element.index(:symbol)[symbol]
|
205
|
+
if element == nil
|
206
|
+
raise UnknownElementError.new("unknown element: #{symbol}")
|
207
|
+
end
|
208
|
+
|
209
|
+
factors[ELEMENT_INDEX.index(element)] = factor
|
210
|
+
end
|
211
|
+
factors
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
class UnknownElementError < StandardError # :nodoc:
|
216
|
+
end
|
217
|
+
|
218
|
+
class ParseError < StandardError # :nodoc:
|
219
|
+
end
|
220
|
+
|
221
|
+
include Enumerable
|
222
|
+
include Utils
|
223
|
+
|
224
|
+
# An array defining the number of a given element in the formula. The
|
225
|
+
# order of elements in ELEMENT_INDEX correspond to order of forumula,
|
226
|
+
# such that formula[1] indicates the number of ELEMENT_INDEX[1] elements
|
227
|
+
# in self.
|
228
|
+
attr_reader :formula
|
229
|
+
|
230
|
+
def initialize(formula=[], normalize=true)
|
231
|
+
@formula = formula
|
232
|
+
|
233
|
+
if normalize
|
234
|
+
# normalize by converting nils to zero and remove trailing zeros
|
235
|
+
@formula.collect! {|factor| factor == nil ? 0 : factor}
|
236
|
+
@formula.pop while @formula.last == 0
|
237
|
+
end
|
238
|
+
|
239
|
+
# ensure the formula cannot be changed
|
240
|
+
@formula.freeze
|
241
|
+
end
|
242
|
+
|
243
|
+
# Returns a new EmpiricalFormula summing the formula of another and self.
|
244
|
+
def +(another)
|
245
|
+
EmpiricalFormula.new(add(self.formula.dup, another.formula), false)
|
246
|
+
end
|
247
|
+
|
248
|
+
# Returns a new EmpiricalFormula subtracting the formula of another from self.
|
249
|
+
def -(another)
|
250
|
+
EmpiricalFormula.new(add(self.formula.dup, another.formula, -1), false)
|
251
|
+
end
|
252
|
+
|
253
|
+
# Returns a new EmpiricalFormula multiplying the formula of self by factor.
|
254
|
+
def *(factor)
|
255
|
+
EmpiricalFormula.new(multiply(self.formula.dup, factor), false)
|
256
|
+
end
|
257
|
+
|
258
|
+
# True if another is an EmpiricalFormula and the formula of another equals the formula of self.
|
259
|
+
def ==(another)
|
260
|
+
another.kind_of?(EmpiricalFormula) && self.formula == another.formula
|
261
|
+
end
|
262
|
+
|
263
|
+
# Yields each element and the number of times that element occurs in self.
|
264
|
+
def each # :yields: element, n
|
265
|
+
formula.each_with_index do |n, index|
|
266
|
+
next if n == 0
|
267
|
+
yield(ELEMENT_INDEX[index], n)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
# Returns a formula string formatted like 'H(2)O' with the
|
272
|
+
# elements are sorted alphabetically by symbol.
|
273
|
+
def to_s
|
274
|
+
collect do |element, n|
|
275
|
+
element.symbol + (n == 1 ? "" : "(#{n})")
|
276
|
+
end.sort.join('')
|
277
|
+
end
|
278
|
+
|
279
|
+
# Calculates and returns the mass of self using the element
|
280
|
+
# masses returned by the block. Returns the monoisotopic mass
|
281
|
+
# for the formula (ie the mass calculated from the most abundant
|
282
|
+
# natural isotope of each element) if no block is given.
|
283
|
+
#
|
284
|
+
# water = EmpiricalFormula.new [2,1]
|
285
|
+
#
|
286
|
+
# # monoisotopic mass calculation
|
287
|
+
# water.mass # => 18.0105646863
|
288
|
+
# water.mass {|e| e.mass } # => 18.0105646863
|
289
|
+
#
|
290
|
+
# # average mass calculation
|
291
|
+
# water.mass {|e| e.std_atomic_weight.value } # => 18.01528
|
292
|
+
#
|
293
|
+
# ==== Notes
|
294
|
+
# - The definition of monoisotopic mass conforms to
|
295
|
+
# that presented in 'Standard Definitions of Terms Relating
|
296
|
+
# to Mass Spectrometry, Phil. Price, J. Am. Soc. Mass
|
297
|
+
# Spectrom. (1991) 2 336-348'
|
298
|
+
# (see {Unimod Mass Help}[http://www.unimod.org/masses.html])
|
299
|
+
# - Masses are calculated such that mathematical operations
|
300
|
+
# are performed on the return of the block.
|
301
|
+
#
|
302
|
+
def mass(&block)
|
303
|
+
if block_given?
|
304
|
+
mass = 0
|
305
|
+
each {|e, n| mass = (yield(e) * n) + mass }
|
306
|
+
mass
|
307
|
+
else
|
308
|
+
@monoisotopic_mass ||= mass {|e| e.mass}
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
# An array of all element symbols ordered roughly by their occurence
|
313
|
+
# in common biological molecules (ex water, carbohydrates, proteins).
|
314
|
+
ELEMENT_INDEX_ORDER = ['H', 'O', 'C', 'N', 'S', 'P', 'Fe', 'Ni', 'Se']
|
315
|
+
|
316
|
+
# An array of all elements ordered as in ELEMENT_INDEX_ORDER
|
317
|
+
ELEMENT_INDEX = Element.library.collect :element_index do |e|
|
318
|
+
unless ELEMENT_INDEX_ORDER.include?(e.symbol)
|
319
|
+
ELEMENT_INDEX_ORDER << e.symbol
|
320
|
+
end
|
321
|
+
|
322
|
+
[e, ELEMENT_INDEX_ORDER.index(e.symbol)]
|
323
|
+
end
|
324
|
+
end
|
325
|
+
end
|