mspire-molecular_formula 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +23 -0
- data/README.md +58 -0
- data/Rakefile +24 -0
- data/lib/mspire/mf.rb +3 -0
- data/lib/mspire/molecular_formula.rb +69 -0
- data/lib/mspire/molecular_formula/aa.rb +79 -0
- data/lib/mspire/molecular_formula/arithmetic.rb +74 -0
- data/lib/mspire/molecular_formula/isotope_distribution.rb +109 -0
- data/lib/mspire/molecular_formula/mass.rb +34 -0
- data/lib/mspire/molecular_formula/reader.rb +56 -0
- data/lib/mspire/molecular_formula/version.rb +5 -0
- data/mspire-molecular_formula.gemspec +39 -0
- data/spec/mspire/mf_spec.rb +10 -0
- data/spec/mspire/molecular_formula/aa_spec.rb +33 -0
- data/spec/mspire/molecular_formula/isotope_distribution_spec.rb +82 -0
- data/spec/mspire/molecular_formula_spec.rb +227 -0
- data/spec/spec_helper.rb +20 -0
- metadata +169 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 8aa1b3d4118e17b3ca2c9092e9c355f6e376a46c
|
|
4
|
+
data.tar.gz: 14d52e1251c536369476ec68bab41f903a366a11
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: ee4729ebc0ff0276ffbd299bdec26a3eea81a05603895c1061b333031c8c22b53c1bac56789734a7e071dd776df728041f1dd67f0ca2ae4f61be6192aca87a77
|
|
7
|
+
data.tar.gz: d6301ed9ef9b458c80b80ea92615c059f6a49bbedc15ed76c1a5f089aac3766cb2d52d7063858bbb4704080fff8ade679bc18049f0d223e44e945c33d8f4ea65
|
data/.gitignore
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
*.gem
|
|
2
|
+
*.rbc
|
|
3
|
+
.bundle
|
|
4
|
+
.config
|
|
5
|
+
.yardoc
|
|
6
|
+
Gemfile.lock
|
|
7
|
+
InstalledFiles
|
|
8
|
+
_yardoc
|
|
9
|
+
coverage
|
|
10
|
+
doc/
|
|
11
|
+
lib/bundler/man
|
|
12
|
+
pkg
|
|
13
|
+
rdoc
|
|
14
|
+
spec/reports
|
|
15
|
+
test/tmp
|
|
16
|
+
test/version_tmp
|
|
17
|
+
tmp
|
|
18
|
+
*.bundle
|
|
19
|
+
*.so
|
|
20
|
+
*.o
|
|
21
|
+
*.a
|
|
22
|
+
mkmf.log
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Copyright (c) 2014 Brigham Young University
|
|
2
|
+
Author: John T. Prince
|
|
3
|
+
|
|
4
|
+
MIT License
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
7
|
+
a copy of this software and associated documentation files (the
|
|
8
|
+
"Software"), to deal in the Software without restriction, including
|
|
9
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
10
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
11
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
12
|
+
the following conditions:
|
|
13
|
+
|
|
14
|
+
The above copyright notice and this permission notice shall be
|
|
15
|
+
included in all copies or substantial portions of the Software.
|
|
16
|
+
|
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
18
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
19
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
20
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
21
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
22
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
23
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Mspire::MolecularFormula
|
|
2
|
+
|
|
3
|
+
mspire library to handle molecular formulas (including an optional charge state), complete with relevant chemical properties such as mass, m/z, and isotope distribution.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
gem install mspire-molecular_formula
|
|
8
|
+
|
|
9
|
+
## Usage
|
|
10
|
+
|
|
11
|
+
```ruby
|
|
12
|
+
require 'mspire/molecular_formula'
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
### Create with a hash
|
|
16
|
+
|
|
17
|
+
```ruby
|
|
18
|
+
mf = Mspire::MolecularFormula.new( C:3, H:4, O:2 )
|
|
19
|
+
|
|
20
|
+
# with a +2 charge
|
|
21
|
+
mf = Mspire::MolecularFormula.new( {C:3, H:4, O:2}, 2)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### Create with a string formula
|
|
25
|
+
|
|
26
|
+
```ruby
|
|
27
|
+
mf = Mspire::MolecularFormula[ 'C3H4O2' ]
|
|
28
|
+
|
|
29
|
+
# with a +2 charge
|
|
30
|
+
mf = Mspire::MolecularFormula[ 'C3H4O2', 2 ]
|
|
31
|
+
mf = Mspire::MolecularFormula[ 'C3H4O2+2' ] # alternatively
|
|
32
|
+
mf = Mspire::MolecularFormula[ 'C3H4O2++' ] # alternatively
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### Arithmetic
|
|
36
|
+
|
|
37
|
+
Walk through the arithmetic of combustion of ethene using this equation:
|
|
38
|
+
|
|
39
|
+
CH2=CH2 + 3 O2 -> 2 CO2 + 2 H2O
|
|
40
|
+
|
|
41
|
+
```ruby
|
|
42
|
+
ethene = Mspire::MolecularFormula['C2H4']
|
|
43
|
+
oxygen = Mspire::MolecularFormula['O2']
|
|
44
|
+
water = Mspire::MolecularFormula['H2O']
|
|
45
|
+
|
|
46
|
+
combustion = ethene + (oxygen*3)
|
|
47
|
+
two_carbon_dioxide = combustion - (water*2)
|
|
48
|
+
carbon_dioxide = two_carbon_dioxide / 2
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Note that there are no methods defined on fixnum to deal with MolecularFormula
|
|
52
|
+
objects, so fixnums need to follow the MolecularFormula (i.e., "3 * oxygen"
|
|
53
|
+
will throw an error but "oxygen * 3" is fine)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
require 'mspire/isotope/distribution' # requires fftw gem
|
|
57
|
+
puts butane.isotope_distribution # :total, :max, :first as arg to normalize
|
|
58
|
+
|
data/Rakefile
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require "bundler/gem_tasks"
|
|
2
|
+
|
|
3
|
+
@module_name = Mspire::MolecularFormula
|
|
4
|
+
@gem_name = 'mspire-molecular_formula'
|
|
5
|
+
@gem_path_name = @gem_name.gsub('-','/')
|
|
6
|
+
|
|
7
|
+
require "#{@gem_path_name}/version"
|
|
8
|
+
|
|
9
|
+
require 'rspec/core'
|
|
10
|
+
require 'rspec/core/rake_task'
|
|
11
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
|
12
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
task :default => :spec
|
|
16
|
+
|
|
17
|
+
require 'rdoc/task'
|
|
18
|
+
Rake::RDocTask.new do |rdoc|
|
|
19
|
+
version = @module_name.const_get('VERSION')
|
|
20
|
+
rdoc.rdoc_dir = 'rdoc'
|
|
21
|
+
rdoc.title = "#{@gem_name} #{version}"
|
|
22
|
+
rdoc.rdoc_files.include('README*')
|
|
23
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
24
|
+
end
|
data/lib/mspire/mf.rb
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
module Mspire
|
|
2
|
+
class MolecularFormula < Hash
|
|
3
|
+
|
|
4
|
+
# integer desribing the charge state
|
|
5
|
+
# mass calculations will add/remove electron mass from this
|
|
6
|
+
attr_accessor :charge
|
|
7
|
+
|
|
8
|
+
# Takes a hash and an optional Integer expressing the charge
|
|
9
|
+
# {H: 22, C: 12, N: 1, O: 3, S: 2} # case and string/sym doesn't matter
|
|
10
|
+
def initialize(hash={}, charge=0)
|
|
11
|
+
@charge = charge
|
|
12
|
+
self.merge!(hash)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def to_s(include_charge_if_nonzero=true, alphabetize=true)
|
|
16
|
+
h = alphabetize ? self.sort : self
|
|
17
|
+
st = ''
|
|
18
|
+
h.each do |k,v|
|
|
19
|
+
if v > 0
|
|
20
|
+
st << k.to_s.capitalize
|
|
21
|
+
st << v.to_s if v > 1
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
if include_charge_if_nonzero
|
|
25
|
+
st << "#{charge > 0 ? '+' : '-'}#{charge.abs if charge.abs > 1}" unless charge.zero?
|
|
26
|
+
end
|
|
27
|
+
st
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def inspect
|
|
31
|
+
"{MolecularFormula #{super[1...-1]}, @charge=#{self.charge}}"
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# returns a hash (note: does not pass along charge info!)
|
|
35
|
+
def to_h
|
|
36
|
+
Hash[ self ]
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
alias_method :old_equal, '=='.to_sym
|
|
40
|
+
|
|
41
|
+
def ==(other)
|
|
42
|
+
old_equal(other) && self.charge == other.charge
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
require "mspire/molecular_formula/version"
|
|
49
|
+
|
|
50
|
+
# class methods for reading from different string input
|
|
51
|
+
require 'mspire/molecular_formula/reader'
|
|
52
|
+
|
|
53
|
+
# the modules for these 3 are included at the bottom
|
|
54
|
+
require 'mspire/molecular_formula/arithmetic'
|
|
55
|
+
require 'mspire/molecular_formula/mass'
|
|
56
|
+
require 'mspire/molecular_formula/isotope_distribution'
|
|
57
|
+
|
|
58
|
+
module Mspire
|
|
59
|
+
class MolecularFormula
|
|
60
|
+
extend Reader
|
|
61
|
+
|
|
62
|
+
####################################################
|
|
63
|
+
# include other behaviors
|
|
64
|
+
####################################################
|
|
65
|
+
include Arithmetic
|
|
66
|
+
include Mass
|
|
67
|
+
include IsotopeDistribution
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
require 'mspire/molecular_formula'
|
|
2
|
+
|
|
3
|
+
module Mspire
|
|
4
|
+
class MolecularFormula
|
|
5
|
+
module AA
|
|
6
|
+
# These represent counts for the individual residues (i.e., no extra H
|
|
7
|
+
# and OH on the ends)
|
|
8
|
+
aa_to_el_hash = {
|
|
9
|
+
'A' => { C: 3, H: 5, O: 1, N: 1 },
|
|
10
|
+
'C' => { C: 3, H: 5, O: 1, N: 1, S: 1 },
|
|
11
|
+
'D' => { C: 4, H: 5, O: 3, N: 1 },
|
|
12
|
+
'E' => { C: 5, H: 7, O: 3, N: 1 },
|
|
13
|
+
'F' => { C: 9, H: 9, O: 1, N: 1 },
|
|
14
|
+
'G' => { C: 2, H: 3, O: 1, N: 1 },
|
|
15
|
+
'I' => { C: 6, H: 11, O: 1, N: 1 },
|
|
16
|
+
'H' => { C: 6, H: 7, O: 1, N: 3 },
|
|
17
|
+
'K' => { C: 6, H: 12, O: 1, N: 2 },
|
|
18
|
+
'L' => { C: 6, H: 11, O: 1, N: 1 },
|
|
19
|
+
'M' => { C: 5, H: 9, O: 1, N: 1, S: 1 },
|
|
20
|
+
'N' => { C: 4, H: 6, O: 2, N: 2 },
|
|
21
|
+
'O' => { C: 12, H: 19, O: 2, N: 3 },
|
|
22
|
+
'P' => { C: 5, H: 7, O: 1, N: 1 },
|
|
23
|
+
'Q' => { C: 5, H: 8, O: 2, N: 2 },
|
|
24
|
+
'R' => { C: 6, H: 12, O: 1, N: 4 },
|
|
25
|
+
'S' => { C: 3, H: 5, O: 2, N: 1 },
|
|
26
|
+
'T' => { C: 4, H: 7, O: 2, N: 1 },
|
|
27
|
+
'U' => { C: 3, H: 5, O: 1, N: 1, Se: 1 },
|
|
28
|
+
'V' => { C: 5, H: 9, O: 1, N: 1 },
|
|
29
|
+
'W' => { C: 11, H: 10, O: 1, N: 2 },
|
|
30
|
+
'Y' => { C: 9, H: 9, O: 2, N: 1 },
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
# molecular formulas for each amino acid residue (no H or OH on ends)
|
|
34
|
+
# keyed by AA string. Shares formula objects with FORMULAS_SYBMOL and
|
|
35
|
+
# FORMULAS.
|
|
36
|
+
FORMULAS_STRING = aa_to_el_hash.map {|k,v| [k, Mspire::MolecularFormula.new(v)] }.to_h
|
|
37
|
+
|
|
38
|
+
class << self
|
|
39
|
+
# returns hash of molecular formulas keyed by amino acid single letter
|
|
40
|
+
# symbol
|
|
41
|
+
#
|
|
42
|
+
# options:
|
|
43
|
+
#
|
|
44
|
+
# :by = :symbol | :string | :both
|
|
45
|
+
# (:symbol is default)
|
|
46
|
+
def formulas(by: :symbol)
|
|
47
|
+
case by
|
|
48
|
+
when :symbol, :both
|
|
49
|
+
sym_hash = Mspire::MolecularFormula::AA::FORMULAS_STRING.map {|k,v| [k.to_sym, v] }.to_h
|
|
50
|
+
when :string
|
|
51
|
+
return Mspire::MolecularFormula::AA::FORMULAS_STRING
|
|
52
|
+
else
|
|
53
|
+
raise ArgumentError, ":by must be :symbol, :string, or :both"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
if by == :symbol
|
|
57
|
+
sym_hash
|
|
58
|
+
else
|
|
59
|
+
Mspire::MolecularFormula::AA::FORMULAS_STRING.merge(sym_hash)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
module Reader
|
|
66
|
+
|
|
67
|
+
# a linear peptide (so includes all the residue masses plus water)
|
|
68
|
+
def from_aaseq(aaseq, charge=0, aa_formula_hash=Mspire::MolecularFormula::AA::FORMULAS_STRING)
|
|
69
|
+
hash = aaseq.each_char.inject({}) do |hash,aa|
|
|
70
|
+
hash.merge(aa_formula_hash[aa]) {|hash,old,new| (old ? old : 0) + new }
|
|
71
|
+
end
|
|
72
|
+
hash[:H] += 2
|
|
73
|
+
hash[:O] += 1
|
|
74
|
+
self.new(hash, charge)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
end # molecular_formula
|
|
79
|
+
end # mspire
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
module Mspire
|
|
2
|
+
class MolecularFormula
|
|
3
|
+
module Arithmetic
|
|
4
|
+
# returns a new formula object where all the atoms have been added up
|
|
5
|
+
def +(*others)
|
|
6
|
+
self.dup.add!(*others)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
# returns self
|
|
10
|
+
def add!(*others)
|
|
11
|
+
others.each do |other|
|
|
12
|
+
self.merge!(other) {|key, oldval, newval| self[key] = oldval + newval }
|
|
13
|
+
self.charge += other.charge
|
|
14
|
+
end
|
|
15
|
+
self
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# returns a new formula object where all the formulas have been subtracted
|
|
19
|
+
# from the caller
|
|
20
|
+
def -(*others)
|
|
21
|
+
self.dup.sub!(*others)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def sub!(*others)
|
|
25
|
+
others.each do |other|
|
|
26
|
+
oth = other.dup
|
|
27
|
+
self.each do |k,v|
|
|
28
|
+
if oth.key?(k)
|
|
29
|
+
self[k] -= oth.delete(k)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
oth.each do |k,v|
|
|
33
|
+
self[k] = -v
|
|
34
|
+
end
|
|
35
|
+
self.charge -= other.charge
|
|
36
|
+
end
|
|
37
|
+
self
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def *(int)
|
|
41
|
+
self.dup.mul!(int)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def mul!(int, also_do_charge=true)
|
|
45
|
+
raise ArgumentError, "must be an integer" unless int.is_a?(Integer)
|
|
46
|
+
self.each do |k,v|
|
|
47
|
+
self[k] = v * int
|
|
48
|
+
end
|
|
49
|
+
self.charge *= int if also_do_charge
|
|
50
|
+
self
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def /(int)
|
|
54
|
+
self.dup.div!(int)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def div!(int, also_do_charge=true)
|
|
58
|
+
raise ArgumentError, "must be an integer" unless int.is_a?(Integer)
|
|
59
|
+
self.each do |k,v|
|
|
60
|
+
quotient, modulus = v.divmod(int)
|
|
61
|
+
raise ArgumentError "all numbers must be divisible by int" unless modulus == 0
|
|
62
|
+
self[k] = quotient
|
|
63
|
+
end
|
|
64
|
+
if also_do_charge
|
|
65
|
+
quotient, modulus = self.charge.divmod(int)
|
|
66
|
+
raise ArgumentError "charge must be divisible by int" unless modulus == 0
|
|
67
|
+
self.charge = quotient
|
|
68
|
+
end
|
|
69
|
+
self
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
require 'mspire/mass'
|
|
2
|
+
require 'mspire/isotope'
|
|
3
|
+
|
|
4
|
+
require 'fftw3'
|
|
5
|
+
|
|
6
|
+
module Mspire
|
|
7
|
+
class MolecularFormula
|
|
8
|
+
module IsotopeDistribution
|
|
9
|
+
NORMALIZE = :total
|
|
10
|
+
|
|
11
|
+
# Returns isotopic distribution beginning with the lightest possible peak.
|
|
12
|
+
# (for most molecules this will also be the monoisotopic peak)
|
|
13
|
+
#
|
|
14
|
+
# Two cutoff protocols may be specified, percent_cutoff or
|
|
15
|
+
# peak_cutoff. Normalization is performed *after* cutoff.
|
|
16
|
+
#
|
|
17
|
+
# percent_cutoff: cuts off when no more peaks contribute more than percent_cutoff
|
|
18
|
+
# to the total distribution.
|
|
19
|
+
# peak_cutoff: cuts off after that many peaks.
|
|
20
|
+
#
|
|
21
|
+
# prefer_lowest_index controls the behavior if both percent_cutoff and
|
|
22
|
+
# peak_cutoff are specified. If true, then the lowest index found between
|
|
23
|
+
# the two methods will be used, otherwise the highest index.
|
|
24
|
+
#
|
|
25
|
+
# all values will be fractional. normalize may be one of:
|
|
26
|
+
#
|
|
27
|
+
# :total normalize to the total intensity
|
|
28
|
+
# :max normalize to the highest peak intensity
|
|
29
|
+
# :first normalize to the intensity of the first peak
|
|
30
|
+
# (this is typically the monoisotopic peak)
|
|
31
|
+
def isotope_intensity_distribution(normalize: NORMALIZE, peak_cutoff: nil, percent_cutoff: nil, prefer_lowest_index: true, isotope_table: Mspire::Isotope::BY_ELEMENT)
|
|
32
|
+
mono_dist = raw_isotope_distribution(isotope_table: isotope_table)
|
|
33
|
+
|
|
34
|
+
cutoff_index = [
|
|
35
|
+
if percent_cutoff
|
|
36
|
+
total_signal = mono_dist.reduce(:+)
|
|
37
|
+
cutoff_index_less1 = (mono_dist.size-1).downto(0).find do |i|
|
|
38
|
+
# finds the index
|
|
39
|
+
(mono_dist[i] / total_signal) >= (percent_cutoff/100.0)
|
|
40
|
+
end
|
|
41
|
+
cutoff_index = cutoff_index_less1 ? (cutoff_index_less1 + 1) : 0
|
|
42
|
+
end,
|
|
43
|
+
peak_cutoff
|
|
44
|
+
].compact.send( prefer_lowest_index ? :min : :max ) || mono_dist.size
|
|
45
|
+
|
|
46
|
+
# mono_dist.size will result in nothing sliced off (i.e., for no cutoff)
|
|
47
|
+
|
|
48
|
+
mono_dist.slice!(cutoff_index..-1)
|
|
49
|
+
|
|
50
|
+
# normalization
|
|
51
|
+
norm_by =
|
|
52
|
+
case normalize
|
|
53
|
+
when :total
|
|
54
|
+
total_signal || mono_dist.reduce(:+)
|
|
55
|
+
when :max
|
|
56
|
+
mono_dist.max
|
|
57
|
+
when :first
|
|
58
|
+
mono_dist.first
|
|
59
|
+
end
|
|
60
|
+
mono_dist.map do |i|
|
|
61
|
+
v = i / norm_by
|
|
62
|
+
(v > 0) ? v : 0
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# returns an array of two arrays: mass values (or m/z values of charged)
|
|
67
|
+
# and intensity values. Arguments are passed directly to
|
|
68
|
+
# isotope_intensity_distribution. the molecule has a charge, this will be
|
|
69
|
+
# used to adjust the m/z values (by removing or adding electrons to the
|
|
70
|
+
# m/z and as the z)
|
|
71
|
+
def isotope_distribution(*args)
|
|
72
|
+
intensities = isotope_intensity_distribution(*args)
|
|
73
|
+
#mono = self.map {|el,cnt| Mspire::Mass::Element::MONO[el]*cnt }.reduce(:+)
|
|
74
|
+
mono = self.map {|el,cnt| Mspire::Isotope::BY_ELEMENT[el].find(&:mono).atomic_mass*cnt }.reduce(:+)
|
|
75
|
+
masses = Array.new(intensities.size)
|
|
76
|
+
neutron = Mspire::Mass::NEUTRON
|
|
77
|
+
masses[0] = mono
|
|
78
|
+
(1...masses.size).each {|i| masses[i] = masses[i-1] + neutron }
|
|
79
|
+
if self.charge && self.charge != 0
|
|
80
|
+
masses.map! do |mass|
|
|
81
|
+
(mass - (self.charge * Mspire::Mass::ELECTRON)) / self.charge
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
[masses, intensities]
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# returns relative ratios from low nominal mass to high nominal mass.
|
|
88
|
+
# These are *not* normalized at all.
|
|
89
|
+
def raw_isotope_distribution(isotope_table: Mspire::Isotope::BY_ELEMENT)
|
|
90
|
+
low_nominal = 0
|
|
91
|
+
high_nominal = 0
|
|
92
|
+
self.each do |el,cnt|
|
|
93
|
+
isotopes = isotope_table[el]
|
|
94
|
+
low_nominal += (isotopes.first.mass_number * cnt)
|
|
95
|
+
high_nominal += (isotopes.last.mass_number * cnt)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
ffts = self.map do |el, cnt|
|
|
99
|
+
isotope_el_ar = NArray.float(high_nominal+1)
|
|
100
|
+
isotope_table[el].each do |isotope|
|
|
101
|
+
isotope_el_ar[isotope.mass_number] = isotope.relative_abundance
|
|
102
|
+
end
|
|
103
|
+
FFTW3.fft(isotope_el_ar)**cnt
|
|
104
|
+
end
|
|
105
|
+
FFTW3.ifft(ffts.reduce(:*)).real.to_a[low_nominal..high_nominal]
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
require 'mspire/mass'
|
|
2
|
+
|
|
3
|
+
module Mspire
|
|
4
|
+
class MolecularFormula
|
|
5
|
+
module Mass
|
|
6
|
+
# gives the monoisotopic mass adjusted by the current charge (i.e.,
|
|
7
|
+
# adds/subtracts electron masses for the charges)
|
|
8
|
+
def mass(consider_electron_masses = true)
|
|
9
|
+
mss = inject(0.0) do |sum,(el,cnt)|
|
|
10
|
+
sum + (Mspire::Mass::Element::MONO_STRING[el.to_s]*cnt)
|
|
11
|
+
end
|
|
12
|
+
mss -= (Mspire::Mass::ELECTRON * charge) if consider_electron_masses
|
|
13
|
+
mss
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def avg_mass(consider_electron_masses = true)
|
|
17
|
+
mss = inject(0.0) {|sum,(el,cnt)| sum + (Mspire::Mass::Element::AVG_STRING[el.to_s]*cnt) }
|
|
18
|
+
mss -= (Mspire::Mass::ELECTRON * charge) if consider_electron_masses
|
|
19
|
+
mss
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# the mass to charge ratio (m/z)
|
|
23
|
+
# returns nil if the charge == 0
|
|
24
|
+
def mz(consider_electron_masses = true, negative_mz_allowed = true)
|
|
25
|
+
if charge == 0
|
|
26
|
+
nil
|
|
27
|
+
else
|
|
28
|
+
mass(consider_electron_masses) / (negative_mz_allowed ? charge : charge.abs)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
require 'mspire/molecular_formula/aa'
|
|
2
|
+
|
|
3
|
+
module Mspire
|
|
4
|
+
class MolecularFormula
|
|
5
|
+
module Reader
|
|
6
|
+
# returns the formula portion and the charge portion (signed Int) of a string
|
|
7
|
+
# returns nil for charge if no charge specified.
|
|
8
|
+
# e.g. C2H4+3 => ['C2H4', 3]
|
|
9
|
+
# e.g. C2H4+++ => ['C2H4', 3]
|
|
10
|
+
# e.g. C2H4- => ['C2H4', -1]
|
|
11
|
+
def formula_and_charge(string)
|
|
12
|
+
md = string.match(/([^+-]*)([\+-]+)(\d*)\Z/)
|
|
13
|
+
if md
|
|
14
|
+
charges_string = md[2]
|
|
15
|
+
chrg =
|
|
16
|
+
if md[3] != ''
|
|
17
|
+
md[2] == '-' ? -md[3].to_i : md[3].to_i
|
|
18
|
+
else
|
|
19
|
+
sign = charges_string[0]
|
|
20
|
+
cnt = charges_string.count(sign)
|
|
21
|
+
sign == '-' ? -cnt : cnt
|
|
22
|
+
end
|
|
23
|
+
[md[1], chrg]
|
|
24
|
+
else
|
|
25
|
+
[string, nil]
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# takes a string, with properly capitalized elements making up the
|
|
31
|
+
# formula. The elements may be in any order. A charge (e.g., +2, +, -,
|
|
32
|
+
# -3 may be affixed to the end )
|
|
33
|
+
def from_string(arg, charge=nil)
|
|
34
|
+
(mol_form_str, chrg_from_str) = formula_and_charge(arg)
|
|
35
|
+
mf = self.new({}, charge || chrg_from_str || 0)
|
|
36
|
+
mol_form_str.scan(/([A-Z][a-z]?)(\d*)/).each do |k,v|
|
|
37
|
+
mf[k.to_sym] = (v == '' ? 1 : v.to_i)
|
|
38
|
+
end
|
|
39
|
+
mf
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# arg may be a String, Hash, or MolecularFormula object.
|
|
43
|
+
def from_any(arg, charge=nil)
|
|
44
|
+
if arg.is_a?(String)
|
|
45
|
+
from_string(arg, charge)
|
|
46
|
+
else
|
|
47
|
+
self.new(arg, arg.respond_to?(:charge) ? arg.charge : 0)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
alias_method :[], :from_any
|
|
51
|
+
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
|
+
require 'mspire/molecular_formula/version'
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |spec|
|
|
7
|
+
spec.name = "mspire-molecular_formula"
|
|
8
|
+
spec.version = Mspire::MolecularFormula::VERSION
|
|
9
|
+
spec.authors = ["John T. Prince"]
|
|
10
|
+
spec.email = ["jtprince@gmail.com"]
|
|
11
|
+
spec.summary = %q{mspire library to handle molecular formulas (including an optional charge state)}
|
|
12
|
+
spec.description = %q{mspire library to handle molecular formulas (including an optional charge state), complete with relevant chemical properties such as mass, m/z, and isotope distribution.}
|
|
13
|
+
spec.homepage = ""
|
|
14
|
+
spec.license = "MIT"
|
|
15
|
+
|
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
|
19
|
+
spec.require_paths = ["lib"]
|
|
20
|
+
|
|
21
|
+
[
|
|
22
|
+
["mspire-mass", "~> 0.1.0"], # which brings in mspire-isotope
|
|
23
|
+
].each do |args|
|
|
24
|
+
spec.add_dependency(*args)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
[
|
|
28
|
+
["bundler", "~> 1.6.2"],
|
|
29
|
+
["rake"],
|
|
30
|
+
["rspec", "~> 2.14.1"],
|
|
31
|
+
["rdoc", "~> 4.1.1"],
|
|
32
|
+
["simplecov", "~> 0.8.2"],
|
|
33
|
+
# here because bad microsoft OS support
|
|
34
|
+
# TODO: implement w/o FFTW
|
|
35
|
+
["fftw3", "~> 0.3"],
|
|
36
|
+
].each do |args|
|
|
37
|
+
spec.add_development_dependency(*args)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
require 'mspire/mf'
|
|
4
|
+
|
|
5
|
+
describe 'require "mspire/mf" to get Mspire::MF shorthand' do
|
|
6
|
+
specify 'Mspire::MF allows convenient access to MolecularFormula stuff' do
|
|
7
|
+
product = Mspire::MF['H2O'] + Mspire::MF['C2H4']
|
|
8
|
+
Mspire::MF['H2O+'].mass.should == 18.010016083700002
|
|
9
|
+
end
|
|
10
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
require 'mspire/molecular_formula/aa'
|
|
4
|
+
|
|
5
|
+
describe Mspire::MolecularFormula::AA do
|
|
6
|
+
specify '::FORMULAS_STRING holds molecular formulas keyed by AA string' do
|
|
7
|
+
hash = Mspire::MolecularFormula::AA::FORMULAS_STRING
|
|
8
|
+
hash.size.should == 22
|
|
9
|
+
hash.values.each {|mf| mf.should be_a(Mspire::MolecularFormula) }
|
|
10
|
+
hash.values.first.to_h.should == { :C=>3, :H=>5, :O=>1, :N=>1 }
|
|
11
|
+
hash['A'].to_h.should == { :C=>3, :H=>5, :O=>1, :N=>1 }
|
|
12
|
+
hash.keys.all? {|key| key.is_a?(String) }.should be_true
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
specify '::formulas returns them by symbol or string or both (symbol by default)' do
|
|
16
|
+
hash = Mspire::MolecularFormula::AA.formulas
|
|
17
|
+
hash.size.should == 22
|
|
18
|
+
hash.values.each {|mf| mf.should be_a(Mspire::MolecularFormula) }
|
|
19
|
+
hash.values.first.to_h.should == { :C=>3, :H=>5, :O=>1, :N=>1 }
|
|
20
|
+
hash.keys.all? {|key| key.is_a?(Symbol) }.should be_true
|
|
21
|
+
hash[:A].to_h.should == { :C=>3, :H=>5, :O=>1, :N=>1 }
|
|
22
|
+
|
|
23
|
+
hash = Mspire::MolecularFormula::AA.formulas(by: :string)
|
|
24
|
+
hash.keys.all? {|key| key.is_a?(String) }.should be_true
|
|
25
|
+
hash['A'].to_h.should == { :C=>3, :H=>5, :O=>1, :N=>1 }
|
|
26
|
+
|
|
27
|
+
hash = Mspire::MolecularFormula::AA.formulas(by: :both)
|
|
28
|
+
hash.keys.any? {|key| key.is_a?(Symbol) }.should be_true
|
|
29
|
+
hash.keys.any? {|key| key.is_a?(String) }.should be_true
|
|
30
|
+
hash['A'].to_h.should == { :C=>3, :H=>5, :O=>1, :N=>1 }
|
|
31
|
+
hash[:A].to_h.should == { :C=>3, :H=>5, :O=>1, :N=>1 }
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
# in this case we need to pull in mol...form... first or we'll get it behaving
|
|
4
|
+
# like a proper hash with ::[] since ::[] hasn't been overridden!
|
|
5
|
+
require 'mspire/molecular_formula'
|
|
6
|
+
|
|
7
|
+
describe 'Mspire::Isotope::Distribution class methods' do
|
|
8
|
+
|
|
9
|
+
def similar_distributions(a_dist, b_dist)
|
|
10
|
+
b_dist.zip(a_dist) do |b,a|
|
|
11
|
+
expect(a).to be_within(1e-9).of b
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
before(:all) do
|
|
16
|
+
@nist = Mspire::Isotope::NIST::BY_ELEMENT
|
|
17
|
+
@norm = :total
|
|
18
|
+
@pcut = nil # percent cutoff
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
before do
|
|
22
|
+
@first = [1.0, 0.08919230588715311, 0.017894161377222138, 0.0013573997600723345, 0.0001398330738144181]
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# can also be used on a real MolecularFormula object
|
|
26
|
+
subject { Mspire::MolecularFormula['C102H120O15'] }
|
|
27
|
+
|
|
28
|
+
describe 'normalizing isotope distributions' do
|
|
29
|
+
|
|
30
|
+
it 'defaults to normalizing by total signal with no cutoff' do
|
|
31
|
+
|
|
32
|
+
dist = subject.isotope_intensity_distribution(normalize: @norm, percent_cutoff: @pcut, isotope_table: @nist )
|
|
33
|
+
expect(dist.size).to eq(253)
|
|
34
|
+
similar_distributions dist[0,5], [0.31740518639058685, 0.35635707398291416, 0.20793431846543858, 0.08373257192958428, 0.026084566135229446]
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
it 'can normalize by first peak' do
|
|
38
|
+
dist = subject.isotope_intensity_distribution(normalize: :first, percent_cutoff: @pcut, isotope_table: @nist )
|
|
39
|
+
dist.size.should == 253
|
|
40
|
+
dist[0].should == 1.0
|
|
41
|
+
dist[1].should_not == 1.0
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
it 'can normalize by the max peak' do
|
|
45
|
+
dist = subject.isotope_intensity_distribution(normalize: :max, percent_cutoff: @pcut, isotope_table: @nist )
|
|
46
|
+
dist.size.should == 253
|
|
47
|
+
dist[0].should_not == 1.0
|
|
48
|
+
dist[1].should == 1.0
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it 'can cutoff based on percent of total signal' do
|
|
52
|
+
subject.isotope_intensity_distribution(normalize: :max, percent_cutoff: 100, isotope_table: @nist).should == []
|
|
53
|
+
similar_distributions subject.isotope_intensity_distribution(normalize: :max, percent_cutoff: 20, isotope_table: @nist), [0.8906942209481861, 1.0, 0.5834999040187656]
|
|
54
|
+
similar_distributions subject.isotope_intensity_distribution(normalize: :max, percent_cutoff: 5, isotope_table: @nist), [0.8906942209481861, 1.0, 0.5834999040187656, 0.23496817670469172]
|
|
55
|
+
subject.isotope_intensity_distribution( normalize: :max, percent_cutoff: 0.0001, isotope_table: @nist).size.should == 11
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
it 'can cutoff based on a given number of peaks' do
|
|
59
|
+
subject.isotope_intensity_distribution(normalize: :max, peak_cutoff: 0, isotope_table: @nist).should == []
|
|
60
|
+
similar_distributions subject.isotope_intensity_distribution(normalize: :total, peak_cutoff: 4, isotope_table: @nist), [0.3287710818944283, 0.3691177894299527, 0.2153801947039964, 0.08673093397162249]
|
|
61
|
+
expect(subject.isotope_intensity_distribution(normalize: :max, peak_cutoff: 1, isotope_table: @nist)).to eql([1.0])
|
|
62
|
+
end
|
|
63
|
+
#xspecify 'prefers the lowest of cutoffs' ## need to test
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
describe "calculating an isotope distribution (yielding masses/mz's and intensities)" do
|
|
67
|
+
|
|
68
|
+
it 'gives neutral masses if no charge' do
|
|
69
|
+
(mzs, intensities) = subject.isotope_distribution(normalize: @norm, percent_cutoff: @pcut, isotope_table: @nist )
|
|
70
|
+
[mzs, intensities].each {|ar| ar.size.should == 253 }
|
|
71
|
+
mzs[0,5].should == [1584.8627231418, 1585.8713880574, 1586.8800529730001, 1587.8887178886002, 1588.8973828042003]
|
|
72
|
+
similar_distributions intensities[0,5], [0.31740518639058685, 0.35635707398291416, 0.20793431846543858, 0.08373257192958428, 0.026084566135229446]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it 'gives proper m/z values if the molecule is charged' do
|
|
76
|
+
subject.charge = -3
|
|
77
|
+
(mzs, ints) = subject.isotope_distribution(normalize: @norm, percent_cutoff: @pcut, isotope_table: @nist )
|
|
78
|
+
[mzs, ints].each {|ar| ar.size.should == 253 }
|
|
79
|
+
mzs[0,5].should == [-528.2881229806, -528.6243446191334, -528.9605662576668, -529.2967878962, -529.6330095347334]
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
require 'mspire/molecular_formula'
|
|
4
|
+
|
|
5
|
+
MF = Mspire::MolecularFormula
|
|
6
|
+
describe Mspire::MolecularFormula do
|
|
7
|
+
|
|
8
|
+
describe 'initialization' do
|
|
9
|
+
|
|
10
|
+
it 'is initialized with Hash' do
|
|
11
|
+
data = {H: 22, C: 12, N: 1, O: 3, S: 2}
|
|
12
|
+
mf = MF.new(data)
|
|
13
|
+
mf.to_h.should == {:H=>22, :C=>12, :N=>1, :O=>3, :S=>2}
|
|
14
|
+
mf.to_h.should == data
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it 'can be initialized with charge, too' do
|
|
18
|
+
mf = MF["H22BeC12N1O3S2Li2", 2]
|
|
19
|
+
mf.to_h.should == {:H=>22, :Be=>1, :C=>12, :N=>1, :O=>3, :S=>2, :Li=>2}
|
|
20
|
+
mf.charge.should == 2
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it 'from_string or ::[] to make from a capitalized string formula' do
|
|
24
|
+
MF.from_string("H22BeC12N1O3S2Li2").to_h.should == {:H=>22, :Be=>1, :C=>12, :N=>1, :O=>3, :S=>2, :Li=>2}
|
|
25
|
+
|
|
26
|
+
mf = MF['Ni7Se3', 1]
|
|
27
|
+
mf.charge.should == 1
|
|
28
|
+
mf.to_h.should == {:Ni=>7, :Se=>3}
|
|
29
|
+
|
|
30
|
+
# there is no such thing as the E element, so this is going to get the
|
|
31
|
+
# user in trouble. However, this is the proper interpretation of the
|
|
32
|
+
# formula.
|
|
33
|
+
mf = MF['Ni7SE3']
|
|
34
|
+
mf.charge.should == 0
|
|
35
|
+
mf.to_h.should == {:Ni=>7, :S=>1, :E=>3}
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
specify 'Mspire::MolecularFormula.from_aaseq(seq) from an amino acide sequence' do
|
|
39
|
+
mf = MF.from_aaseq("ACDEFGIHKLMNOPQRSTUVWY")
|
|
40
|
+
# checked to be correct with http://web.expasy.org/protparam/
|
|
41
|
+
mf.to_h.should == {:C=>122, :H=>183, :O=>33, :N=>33, :S=>2, :Se=>1}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
specify 'Mspire::MolecularFormula.from_aaseq(seq, charge) from an amino acide sequence with charge' do
|
|
45
|
+
mf = MF.from_aaseq("ACDEFGIHKLMNOPQRSTUVWY", 3)
|
|
46
|
+
mf.to_h.should == {:C=>122, :H=>183, :O=>33, :N=>33, :S=>2, :Se=>1}
|
|
47
|
+
mf.charge.should == 3
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it 'from_string or ::[] to make from a capitalized string formula with charge attached' do
|
|
51
|
+
mf = MF.from_string("H22BeC12N1O3S2Li2+")
|
|
52
|
+
mf.charge.should == 1
|
|
53
|
+
mf.to_h.should == {:H=>22, :Be=>1, :C=>12, :N=>1, :O=>3, :S=>2, :Li=>2}
|
|
54
|
+
|
|
55
|
+
mf = MF.from_string("H22BeC12N1O3S2Li2++++")
|
|
56
|
+
mf.charge.should == 4
|
|
57
|
+
mf.to_h.should == {:H=>22, :Be=>1, :C=>12, :N=>1, :O=>3, :S=>2, :Li=>2}
|
|
58
|
+
|
|
59
|
+
mf = MF.from_string("H22BeC12N1O3S2Li2+4")
|
|
60
|
+
mf.charge.should == 4
|
|
61
|
+
mf.to_h.should == {:H=>22, :Be=>1, :C=>12, :N=>1, :O=>3, :S=>2, :Li=>2}
|
|
62
|
+
|
|
63
|
+
mf = MF.from_string("H22BeC12N1O3S2Li2-")
|
|
64
|
+
mf.charge.should == -1
|
|
65
|
+
mf.to_h.should == {:H=>22, :Be=>1, :C=>12, :N=>1, :O=>3, :S=>2, :Li=>2}
|
|
66
|
+
|
|
67
|
+
mf = MF.from_string("H22BeC12N1O3S2Li2-3")
|
|
68
|
+
mf.charge.should == -3
|
|
69
|
+
mf.to_h.should == {:H=>22, :Be=>1, :C=>12, :N=>1, :O=>3, :S=>2, :Li=>2}
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
describe 'conversion (to_s and to_h)' do
|
|
74
|
+
|
|
75
|
+
subject {
|
|
76
|
+
data = {H: 22, C: 12, N: 1, O: 3, S: 2, Be: 1}
|
|
77
|
+
MF.new(data)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
specify '#to_s a standard molecular formula, alphabetized by default' do
|
|
81
|
+
subject.to_s.should == "BeC12H22NO3S2"
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
specify '#to_s contains the charge by default' do
|
|
85
|
+
subject.charge = 3
|
|
86
|
+
subject.to_s.should == "BeC12H22NO3S2+3"
|
|
87
|
+
subject.charge = -3
|
|
88
|
+
subject.to_s.should == "BeC12H22NO3S2-3"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
specify '#to_s(false) turns off charge' do
|
|
92
|
+
subject.charge = 3
|
|
93
|
+
subject.to_s(false).should == "BeC12H22NO3S2"
|
|
94
|
+
subject.charge = -3
|
|
95
|
+
subject.to_s(false).should == "BeC12H22NO3S2"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
specify '#to_s(true, false) does not sort' do
|
|
99
|
+
subject.charge = 2
|
|
100
|
+
subject.to_s(true, false) == "H22C12NO3S2Be+2"
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
specify '#to_h converts to a hash' do
|
|
104
|
+
subject.charge = 2
|
|
105
|
+
subject.to_h.should == {H: 22, C: 12, N: 1, O: 3, S: 2, Be: 1}
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
describe 'equality' do
|
|
110
|
+
subject {
|
|
111
|
+
data = {H: 22, C: 12, N: 1, O: 3, S: 2, Be: 1}
|
|
112
|
+
MF.new(data)
|
|
113
|
+
}
|
|
114
|
+
it 'is only equal if the charge is equal' do
|
|
115
|
+
another = subject.dup
|
|
116
|
+
another.should == subject
|
|
117
|
+
another.charge = 2
|
|
118
|
+
another.should_not == subject
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
describe 'arithmetic' do
|
|
123
|
+
subject {
|
|
124
|
+
data = {H: 22, C: 12, N: 1, O: 3, S: 2, Be: 1}
|
|
125
|
+
MF.new(data, 2)
|
|
126
|
+
}
|
|
127
|
+
it 'can do non-destructive arithmetic' do
|
|
128
|
+
orig = subject.dup
|
|
129
|
+
reply = subject + MF["H2C3P2", 2]
|
|
130
|
+
reply.to_h.should == {H: 24, C: 15, N: 1, O: 3, S: 2, Be: 1, P: 2}
|
|
131
|
+
reply.charge.should == 4
|
|
132
|
+
subject.should == orig
|
|
133
|
+
|
|
134
|
+
reply = subject - MF["H2C3P2", 2]
|
|
135
|
+
reply.to_h.should == {H: 20, C: 9, N: 1, O: 3, S: 2, Be: 1, P: -2}
|
|
136
|
+
reply.charge.should == 0
|
|
137
|
+
subject.should == orig
|
|
138
|
+
|
|
139
|
+
by2 = subject * 2
|
|
140
|
+
by2.to_h.should == {H: 44, C: 24, N: 2, O: 6, S: 4, Be: 2}
|
|
141
|
+
by2.charge.should == 4
|
|
142
|
+
subject.should == orig
|
|
143
|
+
|
|
144
|
+
reply = by2 / 2
|
|
145
|
+
reply.to_h.should == {H: 22, C: 12, N: 1, O: 3, S: 2, Be: 1}
|
|
146
|
+
reply.charge.should == 2
|
|
147
|
+
subject.should == orig
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
it 'can do destructive arithmetic' do
|
|
151
|
+
orig = subject.dup
|
|
152
|
+
subject.sub!(MF["H2C3"]).to_h.should == {H: 20, C: 9, N: 1, O: 3, S: 2, Be: 1}
|
|
153
|
+
subject.should_not == orig
|
|
154
|
+
subject.add!(MF["H2C3"]).to_h.should == {H: 22, C: 12, N: 1, O: 3, S: 2, Be: 1}
|
|
155
|
+
subject.should == orig
|
|
156
|
+
|
|
157
|
+
by2 = subject.mul!(2)
|
|
158
|
+
subject.should_not == orig
|
|
159
|
+
by2.to_h.should == {H: 44, C: 24, N: 2, O: 6, S: 4, Be: 2}
|
|
160
|
+
by2.div!(2).to_h.should == {H: 22, C: 12, N: 1, O: 3, S: 2, Be: 1}
|
|
161
|
+
by2.to_h.should == orig
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
describe 'reading in a formula and charge from a string' do
|
|
167
|
+
subject { MF }
|
|
168
|
+
specify 'Mspire::MolecularFormula.formula_and_charge' do
|
|
169
|
+
subject.formula_and_charge( 'C2H4+3' ).should == ['C2H4', 3]
|
|
170
|
+
subject.formula_and_charge( 'C2H4+++' ).should == ['C2H4', 3]
|
|
171
|
+
subject.formula_and_charge( 'C2H4-').should == ['C2H4', -1]
|
|
172
|
+
subject.formula_and_charge( 'C2H4-2').should == ['C2H4', -2]
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
describe 'mass and mz' do
|
|
177
|
+
# (for all these, checked to make sure in close ballpark, but not
|
|
178
|
+
# necessarily exact, unless otherwise stated)
|
|
179
|
+
|
|
180
|
+
before do
|
|
181
|
+
@exact = 65.02654910101
|
|
182
|
+
@avg = 65.07332
|
|
183
|
+
@e = 0.0005486 # set with -> Mspire::Mass::ELECTRON
|
|
184
|
+
@exact_plus_2e = @exact + (2*@e)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
subject {
|
|
188
|
+
data = {H: 3, C: 4, N: 1}
|
|
189
|
+
MF.new(data, -2)
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
specify '#mass (of an uncharged molecule) -> the exact mass' do
|
|
193
|
+
subject.charge = 0
|
|
194
|
+
subject.mass.should == @exact # BMRB databank says: 65.0265491015
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
specify '#mass -> the exact mass (adjusts for electrons)' do
|
|
198
|
+
subject.mass.should == @exact_plus_2e
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
specify '#mass (no charge adjustment)' do
|
|
202
|
+
subject.mass(false).should == @exact # BMRB databank says: 65.0265491015
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
specify '#avg_mass' do
|
|
206
|
+
subject.avg_mass.should == (@avg + 2*@e)
|
|
207
|
+
# changes the value
|
|
208
|
+
subject.charge = 0
|
|
209
|
+
subject.avg_mass.should == @avg # BMRB databank says: 65.073320
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
specify '#mz -> the m/z ratio' do
|
|
213
|
+
subject.mz.should == (@exact_plus_2e / -2.0)
|
|
214
|
+
subject.charge = +2
|
|
215
|
+
subject.mz.should == ((@exact - 2*@e) / 2.0)
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
specify '#mz(true, false) will only yield positive m/z ratio' do
|
|
219
|
+
subject.mz(true, false).should == (@exact_plus_2e / 2.0)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
specify '#mz(false, true) will not consider electrons in mass determination' do
|
|
223
|
+
subject.mz(false, true).should == (@exact / -2.0)
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
end
|
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
require 'simplecov'
|
|
2
|
+
SimpleCov.start
|
|
3
|
+
|
|
4
|
+
require 'rspec'
|
|
5
|
+
|
|
6
|
+
TESTFILES = File.dirname(__FILE__) + '/testfiles'
|
|
7
|
+
|
|
8
|
+
# Requires supporting files with custom matchers and macros, etc,
|
|
9
|
+
# in ./support/ and its subdirectories.
|
|
10
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
|
11
|
+
|
|
12
|
+
RSpec.configure do |config|
|
|
13
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
|
14
|
+
config.color_enabled = true
|
|
15
|
+
config.tty = true
|
|
16
|
+
config.formatter = :documentation # :progress, :html, :textmate
|
|
17
|
+
#config.formatter = :progress # :progress, :html, :textmate
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
|
metadata
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: mspire-molecular_formula
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- John T. Prince
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2014-08-02 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: mspire-mass
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: 0.1.0
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: 0.1.0
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: bundler
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: 1.6.2
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: 1.6.2
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rake
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ">="
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ">="
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '0'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: rspec
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: 2.14.1
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: 2.14.1
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: rdoc
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - "~>"
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: 4.1.1
|
|
76
|
+
type: :development
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - "~>"
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: 4.1.1
|
|
83
|
+
- !ruby/object:Gem::Dependency
|
|
84
|
+
name: simplecov
|
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - "~>"
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: 0.8.2
|
|
90
|
+
type: :development
|
|
91
|
+
prerelease: false
|
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
93
|
+
requirements:
|
|
94
|
+
- - "~>"
|
|
95
|
+
- !ruby/object:Gem::Version
|
|
96
|
+
version: 0.8.2
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: fftw3
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - "~>"
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: '0.3'
|
|
104
|
+
type: :development
|
|
105
|
+
prerelease: false
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - "~>"
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: '0.3'
|
|
111
|
+
description: mspire library to handle molecular formulas (including an optional charge
|
|
112
|
+
state), complete with relevant chemical properties such as mass, m/z, and isotope
|
|
113
|
+
distribution.
|
|
114
|
+
email:
|
|
115
|
+
- jtprince@gmail.com
|
|
116
|
+
executables: []
|
|
117
|
+
extensions: []
|
|
118
|
+
extra_rdoc_files: []
|
|
119
|
+
files:
|
|
120
|
+
- ".gitignore"
|
|
121
|
+
- Gemfile
|
|
122
|
+
- LICENSE.txt
|
|
123
|
+
- README.md
|
|
124
|
+
- Rakefile
|
|
125
|
+
- lib/mspire/mf.rb
|
|
126
|
+
- lib/mspire/molecular_formula.rb
|
|
127
|
+
- lib/mspire/molecular_formula/aa.rb
|
|
128
|
+
- lib/mspire/molecular_formula/arithmetic.rb
|
|
129
|
+
- lib/mspire/molecular_formula/isotope_distribution.rb
|
|
130
|
+
- lib/mspire/molecular_formula/mass.rb
|
|
131
|
+
- lib/mspire/molecular_formula/reader.rb
|
|
132
|
+
- lib/mspire/molecular_formula/version.rb
|
|
133
|
+
- mspire-molecular_formula.gemspec
|
|
134
|
+
- spec/mspire/mf_spec.rb
|
|
135
|
+
- spec/mspire/molecular_formula/aa_spec.rb
|
|
136
|
+
- spec/mspire/molecular_formula/isotope_distribution_spec.rb
|
|
137
|
+
- spec/mspire/molecular_formula_spec.rb
|
|
138
|
+
- spec/spec_helper.rb
|
|
139
|
+
homepage: ''
|
|
140
|
+
licenses:
|
|
141
|
+
- MIT
|
|
142
|
+
metadata: {}
|
|
143
|
+
post_install_message:
|
|
144
|
+
rdoc_options: []
|
|
145
|
+
require_paths:
|
|
146
|
+
- lib
|
|
147
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
148
|
+
requirements:
|
|
149
|
+
- - ">="
|
|
150
|
+
- !ruby/object:Gem::Version
|
|
151
|
+
version: '0'
|
|
152
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
153
|
+
requirements:
|
|
154
|
+
- - ">="
|
|
155
|
+
- !ruby/object:Gem::Version
|
|
156
|
+
version: '0'
|
|
157
|
+
requirements: []
|
|
158
|
+
rubyforge_project:
|
|
159
|
+
rubygems_version: 2.2.2
|
|
160
|
+
signing_key:
|
|
161
|
+
specification_version: 4
|
|
162
|
+
summary: mspire library to handle molecular formulas (including an optional charge
|
|
163
|
+
state)
|
|
164
|
+
test_files:
|
|
165
|
+
- spec/mspire/mf_spec.rb
|
|
166
|
+
- spec/mspire/molecular_formula/aa_spec.rb
|
|
167
|
+
- spec/mspire/molecular_formula/isotope_distribution_spec.rb
|
|
168
|
+
- spec/mspire/molecular_formula_spec.rb
|
|
169
|
+
- spec/spec_helper.rb
|