molecules 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +21 -0
- data/README +73 -0
- data/Rakefile +78 -0
- data/lib/molecules.rb +4 -0
- data/lib/molecules/calc.rb +127 -0
- data/lib/molecules/empirical_formula.rb +325 -0
- data/lib/molecules/libraries/polypeptide.rb +91 -0
- data/lib/molecules/libraries/residue.rb +165 -0
- data/lib/molecules/utils.rb +49 -0
- data/tap.yml +0 -0
- data/test/molecules/calc_test.rb +37 -0
- data/test/molecules/empirical_formula_class_test.rb +196 -0
- data/test/molecules/empirical_formula_test.rb +204 -0
- data/test/molecules/libraries/polypeptide_test.rb +128 -0
- data/test/molecules/libraries/residue_test.rb +289 -0
- data/test/molecules/utils_test.rb +147 -0
- data/test/molecules_test.rb +24 -0
- data/test/molecules_test_helper.rb +31 -0
- data/test/molecules_test_suite.rb +3 -0
- data/test/tap_test_helper.rb +3 -0
- metadata +82 -0
data/MIT-LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2006-2008, Regents of the University of Colorado.
|
2
|
+
Developer:: Simon Chiang, Biomolecular Structure Program
|
3
|
+
Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
6
|
+
software and associated documentation files (the "Software"), to deal in the Software
|
7
|
+
without restriction, including without limitation the rights to use, copy, modify, merge,
|
8
|
+
publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
9
|
+
to whom the Software is furnished to do so, subject to the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be included in all copies or
|
12
|
+
substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
18
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
19
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
20
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
21
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
= Molecules
|
2
|
+
|
3
|
+
A library of molecules for scientific calculations in Ruby.
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Molecules provides libraries of commonly used molecules (currently just amino
|
8
|
+
acid residues and polypeptides). Library classes inherit from EmpiricalFormula
|
9
|
+
which allows calculation of molecular composition and mass, as well as
|
10
|
+
adding/subtraction of other molecules.
|
11
|
+
|
12
|
+
I have attempted to use reputable sources and to adhere to standards when
|
13
|
+
applicable. Please notify me of any errors and send me suggestions!
|
14
|
+
|
15
|
+
* Rubyforge[http://rubyforge.org/projects/bioactive]
|
16
|
+
* Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/13518-molecules/overview]
|
17
|
+
* Github[http://github.com/bahuvrihi/molecules/tree/master]
|
18
|
+
|
19
|
+
== Usage
|
20
|
+
|
21
|
+
require 'molecules'
|
22
|
+
include Molecules::Libraries
|
23
|
+
|
24
|
+
# Residue predefines all common amino acids
|
25
|
+
# as well as some uncommon ones.
|
26
|
+
r = Residue::A
|
27
|
+
r.name # => "Alanine"
|
28
|
+
r.abbr # => "Ala"
|
29
|
+
r.letter # => "A"
|
30
|
+
r.side_chain.to_s # => "CH(3)"
|
31
|
+
r.mass # => 71.03711
|
32
|
+
r.immonium_ion_mass # => 44.0500
|
33
|
+
|
34
|
+
# Polypeptide allows for creation of polypeptides
|
35
|
+
# from residue sequences.
|
36
|
+
p = Polypeptide.new("RPPGFSPFR")
|
37
|
+
p.to_s # => "C(50)H(71)N(15)O(10)"
|
38
|
+
p.mass # => 1041.5508
|
39
|
+
|
40
|
+
# Generic molecules may be specified with EmpiricalFormula.
|
41
|
+
caffeine = Molecules::EmpiricalFormula.parse("C8H10N4O2")
|
42
|
+
coffee = Molecules::EmpiricalFormula.parse("C8H10N4O2 + H2O")
|
43
|
+
|
44
|
+
=== Mass Calculator (tap task)
|
45
|
+
|
46
|
+
Molecules provides a mass calculator tap task. Tap[http://tap.rubyforge.org]
|
47
|
+
is not required by molecules in general, but you get this bonus if you have tap
|
48
|
+
installed:
|
49
|
+
|
50
|
+
% tap -- molecules/calc ":RPPGFSPFR + H2O"
|
51
|
+
I[15:34:30] 1077.57 Da :RPPGFSPFR + H2O
|
52
|
+
|
53
|
+
== Known Issues
|
54
|
+
|
55
|
+
* Polypeptide only allows common residues
|
56
|
+
* No 'Molecule' class is defined, pending the potential addition
|
57
|
+
of more molecule data (ex: SMILES[http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification]
|
58
|
+
data)
|
59
|
+
* No mechanism for defining large libraries of molecules has been
|
60
|
+
chosen. A database solution may be adopted to this end.
|
61
|
+
|
62
|
+
== Installation
|
63
|
+
|
64
|
+
Molecules is available as a gem through RubyForge[http://rubyforge.org/projects/bioactive]. Use:
|
65
|
+
|
66
|
+
% gem install molecules
|
67
|
+
|
68
|
+
== Info
|
69
|
+
|
70
|
+
Copyright (c) 2006-2008, Regents of the University of Colorado.
|
71
|
+
Developer:: {Simon Chiang}[http://bahuvrihi.wordpress.com], {Biomolecular Structure Program}[http://biomol.uchsc.edu/], {Hansen Lab}[http://hsc-proteomics.uchsc.edu/hansenlab/]
|
72
|
+
Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
|
73
|
+
Licence:: MIT-Style
|
data/Rakefile
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
require 'rake/gempackagetask'
|
5
|
+
require 'yaml'
|
6
|
+
|
7
|
+
# tasks
|
8
|
+
desc 'Default: Run tests.'
|
9
|
+
task :default => :test
|
10
|
+
|
11
|
+
desc 'Run tests.'
|
12
|
+
Rake::TestTask.new(:test) do |t|
|
13
|
+
t.libs << 'lib'
|
14
|
+
t.pattern = File.join('test', ENV['subset'] || '', ENV['pattern'] || '**/*_test.rb')
|
15
|
+
t.verbose = true
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# admin tasks
|
20
|
+
#
|
21
|
+
|
22
|
+
def gemspec
|
23
|
+
data = File.read("molecules.gemspec")
|
24
|
+
spec = nil
|
25
|
+
Thread.new { spec = eval("$SAFE = 3\n#{data}") }.join
|
26
|
+
spec
|
27
|
+
end
|
28
|
+
|
29
|
+
Rake::GemPackageTask.new(gemspec) do |pkg|
|
30
|
+
pkg.need_tar = true
|
31
|
+
end
|
32
|
+
|
33
|
+
task :print_manifest do
|
34
|
+
# collect files from the gemspec, labeling
|
35
|
+
# with true or false corresponding to the
|
36
|
+
# file existing or not
|
37
|
+
files = gemspec.files.inject({}) do |files, file|
|
38
|
+
files[File.expand_path(file)] = [File.exists?(file), file]
|
39
|
+
files
|
40
|
+
end
|
41
|
+
|
42
|
+
# gather non-rdoc/pkg files for the project
|
43
|
+
# and add to the files list if they are not
|
44
|
+
# included already (marking by the absence
|
45
|
+
# of a label)
|
46
|
+
Dir.glob("**/*").each do |file|
|
47
|
+
next if file =~ /^(rdoc|pkg)/ || File.directory?(file)
|
48
|
+
|
49
|
+
path = File.expand_path(file)
|
50
|
+
files[path] = ["", file] unless files.has_key?(path)
|
51
|
+
end
|
52
|
+
|
53
|
+
# sort and output the results
|
54
|
+
files.values.sort_by {|exists, file| file }.each do |entry|
|
55
|
+
puts "%-5s : %s" % entry
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
desc 'Generate documentation.'
|
60
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
61
|
+
rdoc.rdoc_dir = 'rdoc'
|
62
|
+
rdoc.title = "molecules"
|
63
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
64
|
+
rdoc.rdoc_files.include(["README", 'MIT-LICENSE'])
|
65
|
+
rdoc.rdoc_files.include(gemspec.files.select {|file| file =~ /^lib/})
|
66
|
+
end
|
67
|
+
|
68
|
+
desc "Publish RDoc to RubyForge"
|
69
|
+
task :publish_rdoc => [:rdoc] do
|
70
|
+
config = YAML.load(File.read(File.expand_path("~/.rubyforge/user-config.yml")))
|
71
|
+
host = "#{config["username"]}@rubyforge.org"
|
72
|
+
|
73
|
+
rsync_args = "-v -c -r"
|
74
|
+
remote_dir = "/var/www/gforge-projects/bioactive/molecules"
|
75
|
+
local_dir = "rdoc"
|
76
|
+
|
77
|
+
sh %{rsync #{rsync_args} #{local_dir}/ #{host}:#{remote_dir}}
|
78
|
+
end
|
data/lib/molecules.rb
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'molecules/empirical_formula'
|
2
|
+
require 'molecules/libraries/polypeptide'
|
3
|
+
|
4
|
+
# patch for ruby units
|
5
|
+
class Unit < Numeric # :nodoc:
|
6
|
+
UNIT_DEFINITIONS['<AMU>'] = [%w{u AMU amu}, 1/6.0221415e26, :mass, %w{<kilogram>}]
|
7
|
+
UNIT_DEFINITIONS['<dalton>'] = [%w{Da Dalton Daltons dalton daltons}, 1/6.0221415e26, :mass, %w{<kilogram>}]
|
8
|
+
end
|
9
|
+
Unit.setup
|
10
|
+
|
11
|
+
module Molecules
|
12
|
+
|
13
|
+
# :startdoc::manifest a mass calculator
|
14
|
+
# Calculates the mass of a molecule or empirical formula. The
|
15
|
+
# options can be used to alter the output (precision, mass
|
16
|
+
# calculation method etc.) You may enter compound formulae, or
|
17
|
+
# a list of formulae. In addition, polypeptides can be specified
|
18
|
+
# using the one-letter residue codes:
|
19
|
+
#
|
20
|
+
# % tap -- molecules/calc H2O
|
21
|
+
# I[17:09:00] 18.0105646863 Da H2O
|
22
|
+
#
|
23
|
+
# % tap -- molecules/calc H2O -u kg
|
24
|
+
# I[13:35:59] 2.99072e-026 kg H2O
|
25
|
+
#
|
26
|
+
# % tap -- molecules/calc 'C3H5NO + H2O' C50H73N15O11 -p 2
|
27
|
+
# I[17:08:21] 89.05 Da C3H5NO + H2O
|
28
|
+
# I[17:08:21] 1059.56 Da C50H73N15O11
|
29
|
+
#
|
30
|
+
# % tap -- molecules/calc :RPPGFSPFR
|
31
|
+
# I[13:35:02] 1059.56 Da :RPPGFSPFR
|
32
|
+
#
|
33
|
+
# Furthermore, if a unimod path is specified in the configurations,
|
34
|
+
# unimod modifcations may be specified by name as the polypeptide
|
35
|
+
# termini. Use '%' signs as in a SQL query to shorten the name:
|
36
|
+
#
|
37
|
+
# % tap -- molecules/calc 'Acetyl:RPPGFSPFR:Hydroxyl%' --unimod-path <...>
|
38
|
+
# I[13:33:25] 1059.56 Da Acetyl:RPPGFSPFR:Hydroxyl%
|
39
|
+
#
|
40
|
+
# The unimod path must point to an sqlite3 ActiveUnimod database, and
|
41
|
+
# sqlite3-ruby must be installed for this feature to work.
|
42
|
+
#
|
43
|
+
# * ActiveUnimod[http://bioactive.rubyforge.org/]
|
44
|
+
# * sqlite3-ruby[http://rubyforge.org/projects/sqlite-ruby/]
|
45
|
+
#
|
46
|
+
class Calc < Tap::Task
|
47
|
+
|
48
|
+
config :type, :monoisotopic # the mass type calculated
|
49
|
+
config :precision, nil, :short => 'p' # the precision of the mass
|
50
|
+
config :units, "Da", :short => 'u', &c.string # the mass unit reported
|
51
|
+
config :composition, false, :short => 'c', &c.flag # reports the composition, not the formula
|
52
|
+
config :unimod_path, nil do |path| # the path to the unimod database
|
53
|
+
case
|
54
|
+
when path == nil then nil
|
55
|
+
when File.exists?(path) then path
|
56
|
+
else raise "path to unimod db does not exist: #{path}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Formulates a query for a modification matching code_name
|
61
|
+
# for the unimod database. If the code_name contains a '%'
|
62
|
+
# then the query will use a LIKE syntax, otherwise the
|
63
|
+
# code_name will be searced for exactly.
|
64
|
+
def mod_query(code_name)
|
65
|
+
# should do a rails-like escape on code_name
|
66
|
+
"SELECT code_name, composition FROM modifications WHERE code_name #{code_name.include?('%') ? 'LIKE' : '='} '#{code_name}'"
|
67
|
+
end
|
68
|
+
|
69
|
+
# Attempts to find and instantiate an EmpiricalFormula for
|
70
|
+
# a unimod modification matching code_name.
|
71
|
+
def find_mod(code_name)
|
72
|
+
raise "no unimod_path was specified" if unimod_path == nil
|
73
|
+
require 'sqlite3' unless Object.const_defined?(:SQLite3)
|
74
|
+
|
75
|
+
results = []
|
76
|
+
db = SQLite3::Database.new(unimod_path)
|
77
|
+
db.execute(mod_query(code_name)) do |row|
|
78
|
+
results << row
|
79
|
+
end
|
80
|
+
db.close
|
81
|
+
|
82
|
+
case results.length
|
83
|
+
when 1 then EmpiricalFormula.parse_simple(results[0][1])
|
84
|
+
when 0 then raise "could not find modification: #{code_name}"
|
85
|
+
else raise "multiple modifications found for: #{code_name} (#{results.collect {|result| result[0]}.join(', ')})"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
WATER = EmpiricalFormula.parse "H2O"
|
90
|
+
HYDROGEN = EmpiricalFormula.parse "H"
|
91
|
+
HYDROXIDE = EmpiricalFormula.parse "OH"
|
92
|
+
|
93
|
+
# Returns an array of the calculated masses, in the correct unit.
|
94
|
+
def process(*formulae)
|
95
|
+
formulae.collect do |formula_str|
|
96
|
+
formula = EmpiricalFormula.parse(formula_str) do |str|
|
97
|
+
case str
|
98
|
+
when /^(.*?):([A-Z]+):?(.*)$/
|
99
|
+
peptide = Libraries::Polypeptide.new($2) + WATER
|
100
|
+
peptide += find_mod($1) unless $1.to_s.empty?
|
101
|
+
peptide += find_mod($3) unless $3.to_s.empty?
|
102
|
+
peptide
|
103
|
+
else nil
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
mass = formula.mass do |element|
|
108
|
+
case type
|
109
|
+
when :monoisotopic then element.mass
|
110
|
+
when :average then element.std_atomic_weight.value
|
111
|
+
else raise "unknown mass type: #{type}"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
mass = Unit.new(mass, "Da").convert_to(units)
|
116
|
+
unless precision == nil
|
117
|
+
mass = Unit.new( Utils.round(mass.scalar, precision), units)
|
118
|
+
end
|
119
|
+
|
120
|
+
log mass, composition ? formula : formula_str
|
121
|
+
|
122
|
+
mass
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,325 @@
|
|
1
|
+
require 'constants/libraries/element'
|
2
|
+
require 'molecules/utils'
|
3
|
+
require 'strscan'
|
4
|
+
|
5
|
+
module Molecules
|
6
|
+
Element = Constants::Libraries::Element
|
7
|
+
|
8
|
+
# EmpiricalFormula represents the empirical formula (ex 'H(2)0') for
|
9
|
+
# a molecule. The formula is stored as an array of integers aligned
|
10
|
+
# to the elements in EmpiricalFormula::ELEMENT_INDEX. Hence:
|
11
|
+
#
|
12
|
+
# EmpiricalFormula::ELEMENT_INDEX[0].name # => "Hydrogen"
|
13
|
+
# EmpiricalFormula::ELEMENT_INDEX[1].name # => "Oxygen"
|
14
|
+
#
|
15
|
+
# water = EmpiricalFormula.new [2,1]
|
16
|
+
# water.to_s # => 'H(2)O'
|
17
|
+
# water.mass # => 18.0105646863
|
18
|
+
#
|
19
|
+
# EmpiricalFormula may be added, subtracted, and multiplied to
|
20
|
+
# perform the expected operations:
|
21
|
+
#
|
22
|
+
# alanine = EmpiricalFormula.new [5,1,3,1]
|
23
|
+
# (alanine - water).formula # => [3,0,3,1]
|
24
|
+
#
|
25
|
+
class EmpiricalFormula
|
26
|
+
class << self
|
27
|
+
|
28
|
+
# Parses a simple formula (formatted like those returned by
|
29
|
+
# EmpiricalFormula#to_s) into a EmpiricalFormula. Whitespace
|
30
|
+
# is allowed in the formula.
|
31
|
+
#
|
32
|
+
# EmpiricalFormula.parse("H(2)O").to_s # => "H(2)O"
|
33
|
+
# EmpiricalFormula.parse("H (2) O").to_s # => "H(2)O"
|
34
|
+
# EmpiricalFormula.parse("HO(-1)O(2)H").to_s # => "H(2)O"
|
35
|
+
#
|
36
|
+
def parse_simple(chemical_formula)
|
37
|
+
formula = chemical_formula.to_s.gsub(/\s+/, "")
|
38
|
+
|
39
|
+
factor = nil
|
40
|
+
composition = Hash.new(0)
|
41
|
+
scanner = StringScanner.new(formula.reverse)
|
42
|
+
while scanner.restsize > 0
|
43
|
+
case
|
44
|
+
when scanner.scan_full(/\)(\d+-?)\(/, true, false)
|
45
|
+
# found a factor
|
46
|
+
factor = scanner[1].reverse.to_i
|
47
|
+
when scanner.scan_full(/([a-z]?[A-Z])/, true, false)
|
48
|
+
# found an element
|
49
|
+
composition[scanner[1].reverse] += (factor == nil ? 1 : factor)
|
50
|
+
|
51
|
+
# reset factor to nil
|
52
|
+
factor = nil
|
53
|
+
else
|
54
|
+
raise ParseError.new("could not parse formula: #{chemical_formula}")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
factors = composition_to_factors(composition)
|
58
|
+
block_given? ? yield(factors) : EmpiricalFormula.new(factors)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Parses a generalized chemical formula into an EmpiricalFormula.
|
62
|
+
# Formula sections can be nested with parenthesis, and multiple
|
63
|
+
# sections can be added or subtracted within the formula.
|
64
|
+
#
|
65
|
+
# EmpiricalFormula.parse("H2O").to_s # => "H(2)O"
|
66
|
+
# EmpiricalFormula.parse("CH3(CH2)50CH3").to_s # => "C(52)H(106)"
|
67
|
+
# EmpiricalFormula.parse("C2H3NO - H2O + NH3").to_s # => "C(2)H(4)N(2)"
|
68
|
+
#
|
69
|
+
# Note that the format for EmpiricalFormula#to_s differs from the
|
70
|
+
# format that parse utilizes.
|
71
|
+
#
|
72
|
+
# To extend the functionality of parse, provide a block to receive
|
73
|
+
# formula sections with unexpected punctuation and calculate an
|
74
|
+
# EmpiricalFormula therefrom. If the block returns nil,
|
75
|
+
# then parse will raise an error.
|
76
|
+
#
|
77
|
+
# block = lambda do |formula|
|
78
|
+
# case formula
|
79
|
+
# when /\[(.*)\]/
|
80
|
+
# factors = $1.split(/,/).collect {|i| i.strip.to_i }
|
81
|
+
# EmpiricalFormula.new(factors)
|
82
|
+
# else nil
|
83
|
+
# end
|
84
|
+
# end
|
85
|
+
#
|
86
|
+
# EmpiricalFormula.parse("H2O + [2, 1]", &block).to_s # => "H(4)O(2)"
|
87
|
+
# EmpiricalFormula.parse("H2O + :not_expected", &block) # !> ParseError
|
88
|
+
#
|
89
|
+
def parse(chemical_formula, &block)
|
90
|
+
# Remove whitespace
|
91
|
+
formula = chemical_formula.to_s.gsub(/\s+/, "")
|
92
|
+
|
93
|
+
# Split and handle multipart formulae
|
94
|
+
case formula
|
95
|
+
when /\+/
|
96
|
+
return formula.split(/\+/).inject(EmpiricalFormula.new) do |current, formula|
|
97
|
+
current + parse(formula, &block)
|
98
|
+
end
|
99
|
+
when /-/
|
100
|
+
splits = formula.split(/-/)
|
101
|
+
first = parse(splits.shift, &block)
|
102
|
+
return splits.inject(first) do |current, formula|
|
103
|
+
current - parse(formula, &block)
|
104
|
+
end
|
105
|
+
when /[^A-Za-z0-9\\(\\)]/
|
106
|
+
result = block_given? ? yield(formula) : nil
|
107
|
+
return result unless result == nil
|
108
|
+
|
109
|
+
raise ParseError.new("unexpected characters in formula: #{chemical_formula}")
|
110
|
+
end
|
111
|
+
|
112
|
+
# factor is the number following an element, as 6 and 12 in 'C6H12'
|
113
|
+
# factor == -1 indicates that a number has not been read for the
|
114
|
+
# next element. This state is used later to check for hanging
|
115
|
+
# factors, as in '2C6' or (8OH)
|
116
|
+
factor = nil
|
117
|
+
|
118
|
+
# multiplier is the latest cumulative factor for a parenthesis
|
119
|
+
# expression. A new multiplier is pushed on the stack for every new
|
120
|
+
# parenthesis set, and popped off when the set terminates.
|
121
|
+
# ex: for CH3(C(H)2)7CH
|
122
|
+
# At the period Integer at the top of the stack equals
|
123
|
+
# CH3(C(H)2)7.CH 1
|
124
|
+
# CH3(C(H)2.)7CH 7
|
125
|
+
# CH3(C(H.)2)7CH 14
|
126
|
+
# CH3(C.(H)2)7CH 7
|
127
|
+
# CH3.(CH)2)7CH 1
|
128
|
+
multiplier = []
|
129
|
+
multiplier << 1
|
130
|
+
|
131
|
+
# composition will store the formula composition as it is parsed
|
132
|
+
composition = Hash.new(0)
|
133
|
+
|
134
|
+
# Parse elements and factors out of the formula from right to left
|
135
|
+
scanner = StringScanner.new(formula.reverse)
|
136
|
+
while scanner.restsize > 0
|
137
|
+
|
138
|
+
case
|
139
|
+
when scanner.scan_full(/(\d+)/, true, false)
|
140
|
+
# found a factor
|
141
|
+
factor = scanner[1].reverse.to_i
|
142
|
+
when scanner.scan_full(/([a-z]?[A-Z])/, true, false)
|
143
|
+
# found an element
|
144
|
+
|
145
|
+
# Adjust the factor by the multiplier. If factor == nil
|
146
|
+
# then a factor has not been read for the element, as would
|
147
|
+
# be seen in NaOH; use 1 in this case instead.
|
148
|
+
factor = (factor.nil? ? 1 : factor) * multiplier.last
|
149
|
+
|
150
|
+
# Add the current factor to composition, remembering to reverse the symbol
|
151
|
+
composition[ scanner[1].reverse ] += factor
|
152
|
+
|
153
|
+
# reset factor to nil
|
154
|
+
factor = nil
|
155
|
+
when scanner.scan_full(/\)/, true, false)
|
156
|
+
# When a parenthesis ends, the current multiplier must be
|
157
|
+
# adujusted by the current factor. If factor == nil then a
|
158
|
+
# factor has not been read for the parenthesis, use 1 instead
|
159
|
+
multiplier << (factor.nil? ? 1 : factor) * multiplier.last
|
160
|
+
|
161
|
+
# reset factor to nil
|
162
|
+
factor = nil
|
163
|
+
when scanner.scan_full(/\(/, true, false)
|
164
|
+
# When a parenthesis starts, the current multiplier is
|
165
|
+
# popped off. Check for hanging factors and that after
|
166
|
+
# popping a multiplier will remain. If no multiplier will
|
167
|
+
# remain, then the parenthesis must be mismatched
|
168
|
+
raise ParseError.new("the formula contains a hanging factor: #{chemical_formula}") unless factor.nil?
|
169
|
+
raise ParseError.new("the formula contains mismatched parenthesis: #{chemical_formula}") unless multiplier.length > 1
|
170
|
+
|
171
|
+
multiplier.pop
|
172
|
+
else
|
173
|
+
raise ParseError.new("could not parse formula: #{chemical_formula}")
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# Check for hanging factors, that a multiplier remains, and that
|
178
|
+
# elements were found during parsing
|
179
|
+
raise ParseError.new("the formula contains a hanging factor: #{chemical_formula}") unless factor.nil?
|
180
|
+
raise ParseError.new("the formula contains mismatched parenthesis: #{chemical_formula}") unless multiplier.length == 1
|
181
|
+
raise ParseError.new("no elements could be found in the formula: #{chemical_formula}") if composition.length == 0 && !formula.empty?
|
182
|
+
|
183
|
+
EmpiricalFormula.new(composition_to_factors(composition))
|
184
|
+
end
|
185
|
+
|
186
|
+
# Parses the input formula into an EmpiricalFormula and
|
187
|
+
# calculates the mass therefrom. By default the mass
|
188
|
+
# will be the monoisotopic mass of the formula.
|
189
|
+
#
|
190
|
+
# See EmpericalFormula#mass for more details.
|
191
|
+
def mass(formula, &block) # :yields: element
|
192
|
+
mass = parse(formula).mass(&block)
|
193
|
+
end
|
194
|
+
|
195
|
+
protected
|
196
|
+
|
197
|
+
# Converts a hash of (symbol, factor) pairs into a factors array,
|
198
|
+
# suitable for initializing an EmpiricalFormula.
|
199
|
+
def composition_to_factors(composition)
|
200
|
+
factors = []
|
201
|
+
composition.each_pair do |symbol, factor|
|
202
|
+
next if factor == 0
|
203
|
+
|
204
|
+
element = symbol.kind_of?(Element) ? symbol : Element.index(:symbol)[symbol]
|
205
|
+
if element == nil
|
206
|
+
raise UnknownElementError.new("unknown element: #{symbol}")
|
207
|
+
end
|
208
|
+
|
209
|
+
factors[ELEMENT_INDEX.index(element)] = factor
|
210
|
+
end
|
211
|
+
factors
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
class UnknownElementError < StandardError # :nodoc:
|
216
|
+
end
|
217
|
+
|
218
|
+
class ParseError < StandardError # :nodoc:
|
219
|
+
end
|
220
|
+
|
221
|
+
include Enumerable
|
222
|
+
include Utils
|
223
|
+
|
224
|
+
# An array defining the number of a given element in the formula. The
|
225
|
+
# order of elements in ELEMENT_INDEX correspond to order of forumula,
|
226
|
+
# such that formula[1] indicates the number of ELEMENT_INDEX[1] elements
|
227
|
+
# in self.
|
228
|
+
attr_reader :formula
|
229
|
+
|
230
|
+
def initialize(formula=[], normalize=true)
|
231
|
+
@formula = formula
|
232
|
+
|
233
|
+
if normalize
|
234
|
+
# normalize by converting nils to zero and remove trailing zeros
|
235
|
+
@formula.collect! {|factor| factor == nil ? 0 : factor}
|
236
|
+
@formula.pop while @formula.last == 0
|
237
|
+
end
|
238
|
+
|
239
|
+
# ensure the formula cannot be changed
|
240
|
+
@formula.freeze
|
241
|
+
end
|
242
|
+
|
243
|
+
# Returns a new EmpiricalFormula summing the formula of another and self.
|
244
|
+
def +(another)
|
245
|
+
EmpiricalFormula.new(add(self.formula.dup, another.formula), false)
|
246
|
+
end
|
247
|
+
|
248
|
+
# Returns a new EmpiricalFormula subtracting the formula of another from self.
|
249
|
+
def -(another)
|
250
|
+
EmpiricalFormula.new(add(self.formula.dup, another.formula, -1), false)
|
251
|
+
end
|
252
|
+
|
253
|
+
# Returns a new EmpiricalFormula multiplying the formula of self by factor.
|
254
|
+
def *(factor)
|
255
|
+
EmpiricalFormula.new(multiply(self.formula.dup, factor), false)
|
256
|
+
end
|
257
|
+
|
258
|
+
# True if another is an EmpiricalFormula and the formula of another equals the formula of self.
|
259
|
+
def ==(another)
|
260
|
+
another.kind_of?(EmpiricalFormula) && self.formula == another.formula
|
261
|
+
end
|
262
|
+
|
263
|
+
# Yields each element and the number of times that element occurs in self.
|
264
|
+
def each # :yields: element, n
|
265
|
+
formula.each_with_index do |n, index|
|
266
|
+
next if n == 0
|
267
|
+
yield(ELEMENT_INDEX[index], n)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
# Returns a formula string formatted like 'H(2)O' with the
|
272
|
+
# elements are sorted alphabetically by symbol.
|
273
|
+
def to_s
|
274
|
+
collect do |element, n|
|
275
|
+
element.symbol + (n == 1 ? "" : "(#{n})")
|
276
|
+
end.sort.join('')
|
277
|
+
end
|
278
|
+
|
279
|
+
# Calculates and returns the mass of self using the element
|
280
|
+
# masses returned by the block. Returns the monoisotopic mass
|
281
|
+
# for the formula (ie the mass calculated from the most abundant
|
282
|
+
# natural isotope of each element) if no block is given.
|
283
|
+
#
|
284
|
+
# water = EmpiricalFormula.new [2,1]
|
285
|
+
#
|
286
|
+
# # monoisotopic mass calculation
|
287
|
+
# water.mass # => 18.0105646863
|
288
|
+
# water.mass {|e| e.mass } # => 18.0105646863
|
289
|
+
#
|
290
|
+
# # average mass calculation
|
291
|
+
# water.mass {|e| e.std_atomic_weight.value } # => 18.01528
|
292
|
+
#
|
293
|
+
# ==== Notes
|
294
|
+
# - The definition of monoisotopic mass conforms to
|
295
|
+
# that presented in 'Standard Definitions of Terms Relating
|
296
|
+
# to Mass Spectrometry, Phil. Price, J. Am. Soc. Mass
|
297
|
+
# Spectrom. (1991) 2 336-348'
|
298
|
+
# (see {Unimod Mass Help}[http://www.unimod.org/masses.html])
|
299
|
+
# - Masses are calculated such that mathematical operations
|
300
|
+
# are performed on the return of the block.
|
301
|
+
#
|
302
|
+
def mass(&block)
|
303
|
+
if block_given?
|
304
|
+
mass = 0
|
305
|
+
each {|e, n| mass = (yield(e) * n) + mass }
|
306
|
+
mass
|
307
|
+
else
|
308
|
+
@monoisotopic_mass ||= mass {|e| e.mass}
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
# An array of all element symbols ordered roughly by their occurence
|
313
|
+
# in common biological molecules (ex water, carbohydrates, proteins).
|
314
|
+
ELEMENT_INDEX_ORDER = ['H', 'O', 'C', 'N', 'S', 'P', 'Fe', 'Ni', 'Se']
|
315
|
+
|
316
|
+
# An array of all elements ordered as in ELEMENT_INDEX_ORDER
|
317
|
+
ELEMENT_INDEX = Element.library.collect :element_index do |e|
|
318
|
+
unless ELEMENT_INDEX_ORDER.include?(e.symbol)
|
319
|
+
ELEMENT_INDEX_ORDER << e.symbol
|
320
|
+
end
|
321
|
+
|
322
|
+
[e, ELEMENT_INDEX_ORDER.index(e.symbol)]
|
323
|
+
end
|
324
|
+
end
|
325
|
+
end
|