molecules 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2006-2008, Regents of the University of Colorado.
2
+ Developer:: Simon Chiang, Biomolecular Structure Program
3
+ Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this
6
+ software and associated documentation files (the "Software"), to deal in the Software
7
+ without restriction, including without limitation the rights to use, copy, modify, merge,
8
+ publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
9
+ to whom the Software is furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all copies or
12
+ substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,73 @@
1
+ = Molecules
2
+
3
+ A library of molecules for scientific calculations in Ruby.
4
+
5
+ == Description
6
+
7
+ Molecules provides libraries of commonly used molecules (currently just amino
8
+ acid residues and polypeptides). Library classes inherit from EmpiricalFormula
9
+ which allows calculation of molecular composition and mass, as well as
10
+ adding/subtraction of other molecules.
11
+
12
+ I have attempted to use reputable sources and to adhere to standards when
13
+ applicable. Please notify me of any errors and send me suggestions!
14
+
15
+ * Rubyforge[http://rubyforge.org/projects/bioactive]
16
+ * Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/13518-molecules/overview]
17
+ * Github[http://github.com/bahuvrihi/molecules/tree/master]
18
+
19
+ == Usage
20
+
21
+ require 'molecules'
22
+ include Molecules::Libraries
23
+
24
+ # Residue predefines all common amino acids
25
+ # as well as some uncommon ones.
26
+ r = Residue::A
27
+ r.name # => "Alanine"
28
+ r.abbr # => "Ala"
29
+ r.letter # => "A"
30
+ r.side_chain.to_s # => "CH(3)"
31
+ r.mass # => 71.03711
32
+ r.immonium_ion_mass # => 44.0500
33
+
34
+ # Polypeptide allows for creation of polypeptides
35
+ # from residue sequences.
36
+ p = Polypeptide.new("RPPGFSPFR")
37
+ p.to_s # => "C(50)H(71)N(15)O(10)"
38
+ p.mass # => 1041.5508
39
+
40
+ # Generic molecules may be specified with EmpiricalFormula.
41
+ caffeine = Molecules::EmpiricalFormula.parse("C8H10N4O2")
42
+ coffee = Molecules::EmpiricalFormula.parse("C8H10N4O2 + H2O")
43
+
44
+ === Mass Calculator (tap task)
45
+
46
+ Molecules provides a mass calculator tap task. Tap[http://tap.rubyforge.org]
47
+ is not required by molecules in general, but you get this bonus if you have tap
48
+ installed:
49
+
50
+ % tap -- molecules/calc ":RPPGFSPFR + H2O"
51
+ I[15:34:30] 1077.57 Da :RPPGFSPFR + H2O
52
+
53
+ == Known Issues
54
+
55
+ * Polypeptide only allows common residues
56
+ * No 'Molecule' class is defined, pending the potential addition
57
+ of more molecule data (ex: SMILES[http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification]
58
+ data)
59
+ * No mechanism for defining large libraries of molecules has been
60
+ chosen. A database solution may be adopted to this end.
61
+
62
+ == Installation
63
+
64
+ Molecules is available as a gem through RubyForge[http://rubyforge.org/projects/bioactive]. Use:
65
+
66
+ % gem install molecules
67
+
68
+ == Info
69
+
70
+ Copyright (c) 2006-2008, Regents of the University of Colorado.
71
+ Developer:: {Simon Chiang}[http://bahuvrihi.wordpress.com], {Biomolecular Structure Program}[http://biomol.uchsc.edu/], {Hansen Lab}[http://hsc-proteomics.uchsc.edu/hansenlab/]
72
+ Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
73
+ Licence:: MIT-Style
@@ -0,0 +1,78 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+ require 'rake/rdoctask'
4
+ require 'rake/gempackagetask'
5
+ require 'yaml'
6
+
7
+ # tasks
8
+ desc 'Default: Run tests.'
9
+ task :default => :test
10
+
11
+ desc 'Run tests.'
12
+ Rake::TestTask.new(:test) do |t|
13
+ t.libs << 'lib'
14
+ t.pattern = File.join('test', ENV['subset'] || '', ENV['pattern'] || '**/*_test.rb')
15
+ t.verbose = true
16
+ end
17
+
18
+ #
19
+ # admin tasks
20
+ #
21
+
22
+ def gemspec
23
+ data = File.read("molecules.gemspec")
24
+ spec = nil
25
+ Thread.new { spec = eval("$SAFE = 3\n#{data}") }.join
26
+ spec
27
+ end
28
+
29
+ Rake::GemPackageTask.new(gemspec) do |pkg|
30
+ pkg.need_tar = true
31
+ end
32
+
33
+ task :print_manifest do
34
+ # collect files from the gemspec, labeling
35
+ # with true or false corresponding to the
36
+ # file existing or not
37
+ files = gemspec.files.inject({}) do |files, file|
38
+ files[File.expand_path(file)] = [File.exists?(file), file]
39
+ files
40
+ end
41
+
42
+ # gather non-rdoc/pkg files for the project
43
+ # and add to the files list if they are not
44
+ # included already (marking by the absence
45
+ # of a label)
46
+ Dir.glob("**/*").each do |file|
47
+ next if file =~ /^(rdoc|pkg)/ || File.directory?(file)
48
+
49
+ path = File.expand_path(file)
50
+ files[path] = ["", file] unless files.has_key?(path)
51
+ end
52
+
53
+ # sort and output the results
54
+ files.values.sort_by {|exists, file| file }.each do |entry|
55
+ puts "%-5s : %s" % entry
56
+ end
57
+ end
58
+
59
+ desc 'Generate documentation.'
60
+ Rake::RDocTask.new(:rdoc) do |rdoc|
61
+ rdoc.rdoc_dir = 'rdoc'
62
+ rdoc.title = "molecules"
63
+ rdoc.options << '--line-numbers' << '--inline-source'
64
+ rdoc.rdoc_files.include(["README", 'MIT-LICENSE'])
65
+ rdoc.rdoc_files.include(gemspec.files.select {|file| file =~ /^lib/})
66
+ end
67
+
68
+ desc "Publish RDoc to RubyForge"
69
+ task :publish_rdoc => [:rdoc] do
70
+ config = YAML.load(File.read(File.expand_path("~/.rubyforge/user-config.yml")))
71
+ host = "#{config["username"]}@rubyforge.org"
72
+
73
+ rsync_args = "-v -c -r"
74
+ remote_dir = "/var/www/gforge-projects/bioactive/molecules"
75
+ local_dir = "rdoc"
76
+
77
+ sh %{rsync #{rsync_args} #{local_dir}/ #{host}:#{remote_dir}}
78
+ end
@@ -0,0 +1,4 @@
1
+ $: << File.dirname(__FILE__)
2
+
3
+ require 'molecules/libraries/residue'
4
+ require 'molecules/libraries/polypeptide'
@@ -0,0 +1,127 @@
1
+ require 'molecules/empirical_formula'
2
+ require 'molecules/libraries/polypeptide'
3
+
4
+ # patch for ruby units
5
+ class Unit < Numeric # :nodoc:
6
+ UNIT_DEFINITIONS['<AMU>'] = [%w{u AMU amu}, 1/6.0221415e26, :mass, %w{<kilogram>}]
7
+ UNIT_DEFINITIONS['<dalton>'] = [%w{Da Dalton Daltons dalton daltons}, 1/6.0221415e26, :mass, %w{<kilogram>}]
8
+ end
9
+ Unit.setup
10
+
11
+ module Molecules
12
+
13
+ # :startdoc::manifest a mass calculator
14
+ # Calculates the mass of a molecule or empirical formula. The
15
+ # options can be used to alter the output (precision, mass
16
+ # calculation method etc.) You may enter compound formulae, or
17
+ # a list of formulae. In addition, polypeptides can be specified
18
+ # using the one-letter residue codes:
19
+ #
20
+ # % tap -- molecules/calc H2O
21
+ # I[17:09:00] 18.0105646863 Da H2O
22
+ #
23
+ # % tap -- molecules/calc H2O -u kg
24
+ # I[13:35:59] 2.99072e-026 kg H2O
25
+ #
26
+ # % tap -- molecules/calc 'C3H5NO + H2O' C50H73N15O11 -p 2
27
+ # I[17:08:21] 89.05 Da C3H5NO + H2O
28
+ # I[17:08:21] 1059.56 Da C50H73N15O11
29
+ #
30
+ # % tap -- molecules/calc :RPPGFSPFR
31
+ # I[13:35:02] 1059.56 Da :RPPGFSPFR
32
+ #
33
+ # Furthermore, if a unimod path is specified in the configurations,
34
+ # unimod modifcations may be specified by name as the polypeptide
35
+ # termini. Use '%' signs as in a SQL query to shorten the name:
36
+ #
37
+ # % tap -- molecules/calc 'Acetyl:RPPGFSPFR:Hydroxyl%' --unimod-path <...>
38
+ # I[13:33:25] 1059.56 Da Acetyl:RPPGFSPFR:Hydroxyl%
39
+ #
40
+ # The unimod path must point to an sqlite3 ActiveUnimod database, and
41
+ # sqlite3-ruby must be installed for this feature to work.
42
+ #
43
+ # * ActiveUnimod[http://bioactive.rubyforge.org/]
44
+ # * sqlite3-ruby[http://rubyforge.org/projects/sqlite-ruby/]
45
+ #
46
+ class Calc < Tap::Task
47
+
48
+ config :type, :monoisotopic # the mass type calculated
49
+ config :precision, nil, :short => 'p' # the precision of the mass
50
+ config :units, "Da", :short => 'u', &c.string # the mass unit reported
51
+ config :composition, false, :short => 'c', &c.flag # reports the composition, not the formula
52
+ config :unimod_path, nil do |path| # the path to the unimod database
53
+ case
54
+ when path == nil then nil
55
+ when File.exists?(path) then path
56
+ else raise "path to unimod db does not exist: #{path}"
57
+ end
58
+ end
59
+
60
+ # Formulates a query for a modification matching code_name
61
+ # for the unimod database. If the code_name contains a '%'
62
+ # then the query will use a LIKE syntax, otherwise the
63
+ # code_name will be searced for exactly.
64
+ def mod_query(code_name)
65
+ # should do a rails-like escape on code_name
66
+ "SELECT code_name, composition FROM modifications WHERE code_name #{code_name.include?('%') ? 'LIKE' : '='} '#{code_name}'"
67
+ end
68
+
69
+ # Attempts to find and instantiate an EmpiricalFormula for
70
+ # a unimod modification matching code_name.
71
+ def find_mod(code_name)
72
+ raise "no unimod_path was specified" if unimod_path == nil
73
+ require 'sqlite3' unless Object.const_defined?(:SQLite3)
74
+
75
+ results = []
76
+ db = SQLite3::Database.new(unimod_path)
77
+ db.execute(mod_query(code_name)) do |row|
78
+ results << row
79
+ end
80
+ db.close
81
+
82
+ case results.length
83
+ when 1 then EmpiricalFormula.parse_simple(results[0][1])
84
+ when 0 then raise "could not find modification: #{code_name}"
85
+ else raise "multiple modifications found for: #{code_name} (#{results.collect {|result| result[0]}.join(', ')})"
86
+ end
87
+ end
88
+
89
+ WATER = EmpiricalFormula.parse "H2O"
90
+ HYDROGEN = EmpiricalFormula.parse "H"
91
+ HYDROXIDE = EmpiricalFormula.parse "OH"
92
+
93
+ # Returns an array of the calculated masses, in the correct unit.
94
+ def process(*formulae)
95
+ formulae.collect do |formula_str|
96
+ formula = EmpiricalFormula.parse(formula_str) do |str|
97
+ case str
98
+ when /^(.*?):([A-Z]+):?(.*)$/
99
+ peptide = Libraries::Polypeptide.new($2) + WATER
100
+ peptide += find_mod($1) unless $1.to_s.empty?
101
+ peptide += find_mod($3) unless $3.to_s.empty?
102
+ peptide
103
+ else nil
104
+ end
105
+ end
106
+
107
+ mass = formula.mass do |element|
108
+ case type
109
+ when :monoisotopic then element.mass
110
+ when :average then element.std_atomic_weight.value
111
+ else raise "unknown mass type: #{type}"
112
+ end
113
+ end
114
+
115
+ mass = Unit.new(mass, "Da").convert_to(units)
116
+ unless precision == nil
117
+ mass = Unit.new( Utils.round(mass.scalar, precision), units)
118
+ end
119
+
120
+ log mass, composition ? formula : formula_str
121
+
122
+ mass
123
+ end
124
+ end
125
+
126
+ end
127
+ end
@@ -0,0 +1,325 @@
1
+ require 'constants/libraries/element'
2
+ require 'molecules/utils'
3
+ require 'strscan'
4
+
5
+ module Molecules
6
+ Element = Constants::Libraries::Element
7
+
8
+ # EmpiricalFormula represents the empirical formula (ex 'H(2)0') for
9
+ # a molecule. The formula is stored as an array of integers aligned
10
+ # to the elements in EmpiricalFormula::ELEMENT_INDEX. Hence:
11
+ #
12
+ # EmpiricalFormula::ELEMENT_INDEX[0].name # => "Hydrogen"
13
+ # EmpiricalFormula::ELEMENT_INDEX[1].name # => "Oxygen"
14
+ #
15
+ # water = EmpiricalFormula.new [2,1]
16
+ # water.to_s # => 'H(2)O'
17
+ # water.mass # => 18.0105646863
18
+ #
19
+ # EmpiricalFormula may be added, subtracted, and multiplied to
20
+ # perform the expected operations:
21
+ #
22
+ # alanine = EmpiricalFormula.new [5,1,3,1]
23
+ # (alanine - water).formula # => [3,0,3,1]
24
+ #
25
+ class EmpiricalFormula
26
+ class << self
27
+
28
+ # Parses a simple formula (formatted like those returned by
29
+ # EmpiricalFormula#to_s) into a EmpiricalFormula. Whitespace
30
+ # is allowed in the formula.
31
+ #
32
+ # EmpiricalFormula.parse("H(2)O").to_s # => "H(2)O"
33
+ # EmpiricalFormula.parse("H (2) O").to_s # => "H(2)O"
34
+ # EmpiricalFormula.parse("HO(-1)O(2)H").to_s # => "H(2)O"
35
+ #
36
+ def parse_simple(chemical_formula)
37
+ formula = chemical_formula.to_s.gsub(/\s+/, "")
38
+
39
+ factor = nil
40
+ composition = Hash.new(0)
41
+ scanner = StringScanner.new(formula.reverse)
42
+ while scanner.restsize > 0
43
+ case
44
+ when scanner.scan_full(/\)(\d+-?)\(/, true, false)
45
+ # found a factor
46
+ factor = scanner[1].reverse.to_i
47
+ when scanner.scan_full(/([a-z]?[A-Z])/, true, false)
48
+ # found an element
49
+ composition[scanner[1].reverse] += (factor == nil ? 1 : factor)
50
+
51
+ # reset factor to nil
52
+ factor = nil
53
+ else
54
+ raise ParseError.new("could not parse formula: #{chemical_formula}")
55
+ end
56
+ end
57
+ factors = composition_to_factors(composition)
58
+ block_given? ? yield(factors) : EmpiricalFormula.new(factors)
59
+ end
60
+
61
+ # Parses a generalized chemical formula into an EmpiricalFormula.
62
+ # Formula sections can be nested with parenthesis, and multiple
63
+ # sections can be added or subtracted within the formula.
64
+ #
65
+ # EmpiricalFormula.parse("H2O").to_s # => "H(2)O"
66
+ # EmpiricalFormula.parse("CH3(CH2)50CH3").to_s # => "C(52)H(106)"
67
+ # EmpiricalFormula.parse("C2H3NO - H2O + NH3").to_s # => "C(2)H(4)N(2)"
68
+ #
69
+ # Note that the format for EmpiricalFormula#to_s differs from the
70
+ # format that parse utilizes.
71
+ #
72
+ # To extend the functionality of parse, provide a block to receive
73
+ # formula sections with unexpected punctuation and calculate an
74
+ # EmpiricalFormula therefrom. If the block returns nil,
75
+ # then parse will raise an error.
76
+ #
77
+ # block = lambda do |formula|
78
+ # case formula
79
+ # when /\[(.*)\]/
80
+ # factors = $1.split(/,/).collect {|i| i.strip.to_i }
81
+ # EmpiricalFormula.new(factors)
82
+ # else nil
83
+ # end
84
+ # end
85
+ #
86
+ # EmpiricalFormula.parse("H2O + [2, 1]", &block).to_s # => "H(4)O(2)"
87
+ # EmpiricalFormula.parse("H2O + :not_expected", &block) # !> ParseError
88
+ #
89
+ def parse(chemical_formula, &block)
90
+ # Remove whitespace
91
+ formula = chemical_formula.to_s.gsub(/\s+/, "")
92
+
93
+ # Split and handle multipart formulae
94
+ case formula
95
+ when /\+/
96
+ return formula.split(/\+/).inject(EmpiricalFormula.new) do |current, formula|
97
+ current + parse(formula, &block)
98
+ end
99
+ when /-/
100
+ splits = formula.split(/-/)
101
+ first = parse(splits.shift, &block)
102
+ return splits.inject(first) do |current, formula|
103
+ current - parse(formula, &block)
104
+ end
105
+ when /[^A-Za-z0-9\\(\\)]/
106
+ result = block_given? ? yield(formula) : nil
107
+ return result unless result == nil
108
+
109
+ raise ParseError.new("unexpected characters in formula: #{chemical_formula}")
110
+ end
111
+
112
+ # factor is the number following an element, as 6 and 12 in 'C6H12'
113
+ # factor == -1 indicates that a number has not been read for the
114
+ # next element. This state is used later to check for hanging
115
+ # factors, as in '2C6' or (8OH)
116
+ factor = nil
117
+
118
+ # multiplier is the latest cumulative factor for a parenthesis
119
+ # expression. A new multiplier is pushed on the stack for every new
120
+ # parenthesis set, and popped off when the set terminates.
121
+ # ex: for CH3(C(H)2)7CH
122
+ # At the period Integer at the top of the stack equals
123
+ # CH3(C(H)2)7.CH 1
124
+ # CH3(C(H)2.)7CH 7
125
+ # CH3(C(H.)2)7CH 14
126
+ # CH3(C.(H)2)7CH 7
127
+ # CH3.(CH)2)7CH 1
128
+ multiplier = []
129
+ multiplier << 1
130
+
131
+ # composition will store the formula composition as it is parsed
132
+ composition = Hash.new(0)
133
+
134
+ # Parse elements and factors out of the formula from right to left
135
+ scanner = StringScanner.new(formula.reverse)
136
+ while scanner.restsize > 0
137
+
138
+ case
139
+ when scanner.scan_full(/(\d+)/, true, false)
140
+ # found a factor
141
+ factor = scanner[1].reverse.to_i
142
+ when scanner.scan_full(/([a-z]?[A-Z])/, true, false)
143
+ # found an element
144
+
145
+ # Adjust the factor by the multiplier. If factor == nil
146
+ # then a factor has not been read for the element, as would
147
+ # be seen in NaOH; use 1 in this case instead.
148
+ factor = (factor.nil? ? 1 : factor) * multiplier.last
149
+
150
+ # Add the current factor to composition, remembering to reverse the symbol
151
+ composition[ scanner[1].reverse ] += factor
152
+
153
+ # reset factor to nil
154
+ factor = nil
155
+ when scanner.scan_full(/\)/, true, false)
156
+ # When a parenthesis ends, the current multiplier must be
157
+ # adujusted by the current factor. If factor == nil then a
158
+ # factor has not been read for the parenthesis, use 1 instead
159
+ multiplier << (factor.nil? ? 1 : factor) * multiplier.last
160
+
161
+ # reset factor to nil
162
+ factor = nil
163
+ when scanner.scan_full(/\(/, true, false)
164
+ # When a parenthesis starts, the current multiplier is
165
+ # popped off. Check for hanging factors and that after
166
+ # popping a multiplier will remain. If no multiplier will
167
+ # remain, then the parenthesis must be mismatched
168
+ raise ParseError.new("the formula contains a hanging factor: #{chemical_formula}") unless factor.nil?
169
+ raise ParseError.new("the formula contains mismatched parenthesis: #{chemical_formula}") unless multiplier.length > 1
170
+
171
+ multiplier.pop
172
+ else
173
+ raise ParseError.new("could not parse formula: #{chemical_formula}")
174
+ end
175
+ end
176
+
177
+ # Check for hanging factors, that a multiplier remains, and that
178
+ # elements were found during parsing
179
+ raise ParseError.new("the formula contains a hanging factor: #{chemical_formula}") unless factor.nil?
180
+ raise ParseError.new("the formula contains mismatched parenthesis: #{chemical_formula}") unless multiplier.length == 1
181
+ raise ParseError.new("no elements could be found in the formula: #{chemical_formula}") if composition.length == 0 && !formula.empty?
182
+
183
+ EmpiricalFormula.new(composition_to_factors(composition))
184
+ end
185
+
186
+ # Parses the input formula into an EmpiricalFormula and
187
+ # calculates the mass therefrom. By default the mass
188
+ # will be the monoisotopic mass of the formula.
189
+ #
190
+ # See EmpericalFormula#mass for more details.
191
+ def mass(formula, &block) # :yields: element
192
+ mass = parse(formula).mass(&block)
193
+ end
194
+
195
+ protected
196
+
197
+ # Converts a hash of (symbol, factor) pairs into a factors array,
198
+ # suitable for initializing an EmpiricalFormula.
199
+ def composition_to_factors(composition)
200
+ factors = []
201
+ composition.each_pair do |symbol, factor|
202
+ next if factor == 0
203
+
204
+ element = symbol.kind_of?(Element) ? symbol : Element.index(:symbol)[symbol]
205
+ if element == nil
206
+ raise UnknownElementError.new("unknown element: #{symbol}")
207
+ end
208
+
209
+ factors[ELEMENT_INDEX.index(element)] = factor
210
+ end
211
+ factors
212
+ end
213
+ end
214
+
215
+ class UnknownElementError < StandardError # :nodoc:
216
+ end
217
+
218
+ class ParseError < StandardError # :nodoc:
219
+ end
220
+
221
+ include Enumerable
222
+ include Utils
223
+
224
+ # An array defining the number of a given element in the formula. The
225
+ # order of elements in ELEMENT_INDEX correspond to order of forumula,
226
+ # such that formula[1] indicates the number of ELEMENT_INDEX[1] elements
227
+ # in self.
228
+ attr_reader :formula
229
+
230
+ def initialize(formula=[], normalize=true)
231
+ @formula = formula
232
+
233
+ if normalize
234
+ # normalize by converting nils to zero and remove trailing zeros
235
+ @formula.collect! {|factor| factor == nil ? 0 : factor}
236
+ @formula.pop while @formula.last == 0
237
+ end
238
+
239
+ # ensure the formula cannot be changed
240
+ @formula.freeze
241
+ end
242
+
243
+ # Returns a new EmpiricalFormula summing the formula of another and self.
244
+ def +(another)
245
+ EmpiricalFormula.new(add(self.formula.dup, another.formula), false)
246
+ end
247
+
248
+ # Returns a new EmpiricalFormula subtracting the formula of another from self.
249
+ def -(another)
250
+ EmpiricalFormula.new(add(self.formula.dup, another.formula, -1), false)
251
+ end
252
+
253
+ # Returns a new EmpiricalFormula multiplying the formula of self by factor.
254
+ def *(factor)
255
+ EmpiricalFormula.new(multiply(self.formula.dup, factor), false)
256
+ end
257
+
258
+ # True if another is an EmpiricalFormula and the formula of another equals the formula of self.
259
+ def ==(another)
260
+ another.kind_of?(EmpiricalFormula) && self.formula == another.formula
261
+ end
262
+
263
+ # Yields each element and the number of times that element occurs in self.
264
+ def each # :yields: element, n
265
+ formula.each_with_index do |n, index|
266
+ next if n == 0
267
+ yield(ELEMENT_INDEX[index], n)
268
+ end
269
+ end
270
+
271
+ # Returns a formula string formatted like 'H(2)O' with the
272
+ # elements are sorted alphabetically by symbol.
273
+ def to_s
274
+ collect do |element, n|
275
+ element.symbol + (n == 1 ? "" : "(#{n})")
276
+ end.sort.join('')
277
+ end
278
+
279
+ # Calculates and returns the mass of self using the element
280
+ # masses returned by the block. Returns the monoisotopic mass
281
+ # for the formula (ie the mass calculated from the most abundant
282
+ # natural isotope of each element) if no block is given.
283
+ #
284
+ # water = EmpiricalFormula.new [2,1]
285
+ #
286
+ # # monoisotopic mass calculation
287
+ # water.mass # => 18.0105646863
288
+ # water.mass {|e| e.mass } # => 18.0105646863
289
+ #
290
+ # # average mass calculation
291
+ # water.mass {|e| e.std_atomic_weight.value } # => 18.01528
292
+ #
293
+ # ==== Notes
294
+ # - The definition of monoisotopic mass conforms to
295
+ # that presented in 'Standard Definitions of Terms Relating
296
+ # to Mass Spectrometry, Phil. Price, J. Am. Soc. Mass
297
+ # Spectrom. (1991) 2 336-348'
298
+ # (see {Unimod Mass Help}[http://www.unimod.org/masses.html])
299
+ # - Masses are calculated such that mathematical operations
300
+ # are performed on the return of the block.
301
+ #
302
+ def mass(&block)
303
+ if block_given?
304
+ mass = 0
305
+ each {|e, n| mass = (yield(e) * n) + mass }
306
+ mass
307
+ else
308
+ @monoisotopic_mass ||= mass {|e| e.mass}
309
+ end
310
+ end
311
+
312
+ # An array of all element symbols ordered roughly by their occurence
313
+ # in common biological molecules (ex water, carbohydrates, proteins).
314
+ ELEMENT_INDEX_ORDER = ['H', 'O', 'C', 'N', 'S', 'P', 'Fe', 'Ni', 'Se']
315
+
316
+ # An array of all elements ordered as in ELEMENT_INDEX_ORDER
317
+ ELEMENT_INDEX = Element.library.collect :element_index do |e|
318
+ unless ELEMENT_INDEX_ORDER.include?(e.symbol)
319
+ ELEMENT_INDEX_ORDER << e.symbol
320
+ end
321
+
322
+ [e, ELEMENT_INDEX_ORDER.index(e.symbol)]
323
+ end
324
+ end
325
+ end