molecules 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/MIT-LICENSE CHANGED
@@ -1,6 +1,4 @@
1
1
  Copyright (c) 2006-2008, Regents of the University of Colorado.
2
- Developer:: Simon Chiang, Biomolecular Structure Program
3
- Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
4
2
 
5
3
  Permission is hereby granted, free of charge, to any person obtaining a copy of this
6
4
  software and associated documentation files (the "Software"), to deal in the Software
data/README CHANGED
@@ -47,8 +47,8 @@ Molecules provides a mass calculator tap task. Tap[http://tap.rubyforge.org]
47
47
  is not required by molecules in general, but you get this bonus if you have tap
48
48
  installed:
49
49
 
50
- % tap -- molecules/calc ":RPPGFSPFR + H2O"
51
- I[15:34:30] 1077.57 Da :RPPGFSPFR + H2O
50
+ % tap run -- molecules/calc H2O
51
+ I[17:08:00] 18.0105646863 Da H2O
52
52
 
53
53
  == Known Issues
54
54
 
@@ -10,101 +10,40 @@ Unit.setup
10
10
 
11
11
  module Molecules
12
12
 
13
- # :startdoc::manifest a mass calculator
14
- # Calculates the mass of a molecule or empirical formula. The
15
- # options can be used to alter the output (precision, mass
16
- # calculation method etc.) You may enter compound formulae, or
17
- # a list of formulae. In addition, polypeptides can be specified
18
- # using the one-letter residue codes:
13
+ # :startdoc::manifest a mass calculator
19
14
  #
20
- # % tap -- molecules/calc H2O
21
- # I[17:09:00] 18.0105646863 Da H2O
22
- #
23
- # % tap -- molecules/calc H2O -u kg
24
- # I[13:35:59] 2.99072e-026 kg H2O
15
+ # Calculates the mass of a molecule. Compound formulae are allowed and you may
16
+ # specify a list of formulae. The options can be used to alter the output (precision,
17
+ # mass calculation method etc.)
25
18
  #
26
- # % tap -- molecules/calc 'C3H5NO + H2O' C50H73N15O11 -p 2
27
- # I[17:08:21] 89.05 Da C3H5NO + H2O
28
- # I[17:08:21] 1059.56 Da C50H73N15O11
29
- #
30
- # % tap -- molecules/calc :RPPGFSPFR
31
- # I[13:35:02] 1059.56 Da :RPPGFSPFR
32
- #
33
- # Furthermore, if a unimod path is specified in the configurations,
34
- # unimod modifcations may be specified by name as the polypeptide
35
- # termini. Use '%' signs as in a SQL query to shorten the name:
36
- #
37
- # % tap -- molecules/calc 'Acetyl:RPPGFSPFR:Hydroxyl%' --unimod-path <...>
38
- # I[13:33:25] 1059.56 Da Acetyl:RPPGFSPFR:Hydroxyl%
19
+ # % tap run -- molecules/calc H2O
20
+ # I[17:08:00] 18.0105646863 Da H2O
39
21
  #
40
- # The unimod path must point to an sqlite3 ActiveUnimod database, and
41
- # sqlite3-ruby must be installed for this feature to work.
42
- #
43
- # * ActiveUnimod[http://bioactive.rubyforge.org/]
44
- # * sqlite3-ruby[http://rubyforge.org/projects/sqlite-ruby/]
45
- #
22
+ # % tap run -- molecules/calc H2O --units yg --precision 6
23
+ # I[17:08:21] 29.907243 yg H2O
24
+ #
25
+ # % tap run -- molecules/calc 'C3H5NO + H2O' C50H73N15O11 -p 2
26
+ # I[17:08:53] 89.05 Da C3H5NO + H2O
27
+ # I[17:08:53] 1059.56 Da C50H73N15O11
28
+ #
46
29
  class Calc < Tap::Task
47
30
 
48
31
  config :type, :monoisotopic # the mass type calculated
49
32
  config :precision, nil, :short => 'p' # the precision of the mass
50
33
  config :units, "Da", :short => 'u', &c.string # the mass unit reported
51
34
  config :composition, false, :short => 'c', &c.flag # reports the composition, not the formula
52
- config :unimod_path, nil do |path| # the path to the unimod database
53
- case
54
- when path == nil then nil
55
- when File.exists?(path) then path
56
- else raise "path to unimod db does not exist: #{path}"
57
- end
58
- end
59
35
 
60
- # Formulates a query for a modification matching code_name
61
- # for the unimod database. If the code_name contains a '%'
62
- # then the query will use a LIKE syntax, otherwise the
63
- # code_name will be searced for exactly.
64
- def mod_query(code_name)
65
- # should do a rails-like escape on code_name
66
- "SELECT code_name, composition FROM modifications WHERE code_name #{code_name.include?('%') ? 'LIKE' : '='} '#{code_name}'"
36
+ # Parses the formula string into an EmpiricalFormula.
37
+ # Can be used as a hook for more complicated formulae
38
+ # in subclases.
39
+ def parse(formula)
40
+ EmpiricalFormula.parse(formula)
67
41
  end
68
42
 
69
- # Attempts to find and instantiate an EmpiricalFormula for
70
- # a unimod modification matching code_name.
71
- def find_mod(code_name)
72
- raise "no unimod_path was specified" if unimod_path == nil
73
- require 'sqlite3' unless Object.const_defined?(:SQLite3)
74
-
75
- results = []
76
- db = SQLite3::Database.new(unimod_path)
77
- db.execute(mod_query(code_name)) do |row|
78
- results << row
79
- end
80
- db.close
81
-
82
- case results.length
83
- when 1 then EmpiricalFormula.parse_simple(results[0][1])
84
- when 0 then raise "could not find modification: #{code_name}"
85
- else raise "multiple modifications found for: #{code_name} (#{results.collect {|result| result[0]}.join(', ')})"
86
- end
87
- end
88
-
89
- WATER = EmpiricalFormula.parse "H2O"
90
- HYDROGEN = EmpiricalFormula.parse "H"
91
- HYDROXIDE = EmpiricalFormula.parse "OH"
92
-
93
43
  # Returns an array of the calculated masses, in the correct unit.
94
44
  def process(*formulae)
95
- formulae.collect do |formula_str|
96
- formula = EmpiricalFormula.parse(formula_str) do |str|
97
- case str
98
- when /^(.*?):([A-Z]+):?(.*)$/
99
- peptide = Libraries::Polypeptide.new($2) + WATER
100
- peptide += find_mod($1) unless $1.to_s.empty?
101
- peptide += find_mod($3) unless $3.to_s.empty?
102
- peptide
103
- else nil
104
- end
105
- end
106
-
107
- mass = formula.mass do |element|
45
+ formulae.collect do |formula_str|
46
+ mass = parse(formula_str).mass do |element|
108
47
  case type
109
48
  when :monoisotopic then element.mass
110
49
  when :average then element.std_atomic_weight.value
@@ -116,8 +55,8 @@ module Molecules
116
55
  unless precision == nil
117
56
  mass = Unit.new( Utils.round(mass.scalar, precision), units)
118
57
  end
119
-
120
- log mass, composition ? formula : formula_str
58
+
59
+ log "#{mass.scalar} #{mass.units}", composition ? formula : formula_str
121
60
 
122
61
  mass
123
62
  end
@@ -39,7 +39,7 @@ module Molecules
39
39
  factor = nil
40
40
  composition = Hash.new(0)
41
41
  scanner = StringScanner.new(formula.reverse)
42
- while scanner.restsize > 0
42
+ while scanner.rest_size > 0
43
43
  case
44
44
  when scanner.scan_full(/\)(\d+-?)\(/, true, false)
45
45
  # found a factor
@@ -133,7 +133,7 @@ module Molecules
133
133
 
134
134
  # Parse elements and factors out of the formula from right to left
135
135
  scanner = StringScanner.new(formula.reverse)
136
- while scanner.restsize > 0
136
+ while scanner.rest_size > 0
137
137
 
138
138
  case
139
139
  when scanner.scan_full(/(\d+)/, true, false)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: molecules
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simon Chiang
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-08-11 00:00:00 -06:00
12
+ date: 2008-10-13 00:00:00 -06:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -22,6 +22,16 @@ dependencies:
22
22
  - !ruby/object:Gem::Version
23
23
  version: 0.1.0
24
24
  version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: tap
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.10.8
34
+ version:
25
35
  description:
26
36
  email: simon.a.chiang@gmail.com
27
37
  executables: []
@@ -32,9 +42,6 @@ extra_rdoc_files:
32
42
  - README
33
43
  - MIT-LICENSE
34
44
  files:
35
- - MIT-LICENSE
36
- - Rakefile
37
- - README
38
45
  - lib/molecules.rb
39
46
  - lib/molecules/calc.rb
40
47
  - lib/molecules/empirical_formula.rb
@@ -42,16 +49,8 @@ files:
42
49
  - lib/molecules/libraries/residue.rb
43
50
  - lib/molecules/utils.rb
44
51
  - tap.yml
45
- - test/molecules/calc_test.rb
46
- - test/molecules/empirical_formula_class_test.rb
47
- - test/molecules/empirical_formula_test.rb
48
- - test/molecules/libraries/polypeptide_test.rb
49
- - test/molecules/libraries/residue_test.rb
50
- - test/molecules/utils_test.rb
51
- - test/molecules_test.rb
52
- - test/molecules_test_helper.rb
53
- - test/molecules_test_suite.rb
54
- - test/tap_test_helper.rb
52
+ - README
53
+ - MIT-LICENSE
55
54
  has_rdoc: true
56
55
  homepage: http://bioactive.rubyforge.org/molecules/
57
56
  post_install_message:
@@ -78,5 +77,5 @@ rubygems_version: 1.2.0
78
77
  signing_key:
79
78
  specification_version: 2
80
79
  summary: A library of molecules for scientific calculations in Ruby.
81
- test_files:
82
- - test/molecules_test_suite.rb
80
+ test_files: []
81
+
data/Rakefile DELETED
@@ -1,78 +0,0 @@
1
- require 'rake'
2
- require 'rake/testtask'
3
- require 'rake/rdoctask'
4
- require 'rake/gempackagetask'
5
- require 'yaml'
6
-
7
- # tasks
8
- desc 'Default: Run tests.'
9
- task :default => :test
10
-
11
- desc 'Run tests.'
12
- Rake::TestTask.new(:test) do |t|
13
- t.libs << 'lib'
14
- t.pattern = File.join('test', ENV['subset'] || '', ENV['pattern'] || '**/*_test.rb')
15
- t.verbose = true
16
- end
17
-
18
- #
19
- # admin tasks
20
- #
21
-
22
- def gemspec
23
- data = File.read("molecules.gemspec")
24
- spec = nil
25
- Thread.new { spec = eval("$SAFE = 3\n#{data}") }.join
26
- spec
27
- end
28
-
29
- Rake::GemPackageTask.new(gemspec) do |pkg|
30
- pkg.need_tar = true
31
- end
32
-
33
- task :print_manifest do
34
- # collect files from the gemspec, labeling
35
- # with true or false corresponding to the
36
- # file existing or not
37
- files = gemspec.files.inject({}) do |files, file|
38
- files[File.expand_path(file)] = [File.exists?(file), file]
39
- files
40
- end
41
-
42
- # gather non-rdoc/pkg files for the project
43
- # and add to the files list if they are not
44
- # included already (marking by the absence
45
- # of a label)
46
- Dir.glob("**/*").each do |file|
47
- next if file =~ /^(rdoc|pkg)/ || File.directory?(file)
48
-
49
- path = File.expand_path(file)
50
- files[path] = ["", file] unless files.has_key?(path)
51
- end
52
-
53
- # sort and output the results
54
- files.values.sort_by {|exists, file| file }.each do |entry|
55
- puts "%-5s : %s" % entry
56
- end
57
- end
58
-
59
- desc 'Generate documentation.'
60
- Rake::RDocTask.new(:rdoc) do |rdoc|
61
- rdoc.rdoc_dir = 'rdoc'
62
- rdoc.title = "molecules"
63
- rdoc.options << '--line-numbers' << '--inline-source'
64
- rdoc.rdoc_files.include(["README", 'MIT-LICENSE'])
65
- rdoc.rdoc_files.include(gemspec.files.select {|file| file =~ /^lib/})
66
- end
67
-
68
- desc "Publish RDoc to RubyForge"
69
- task :publish_rdoc => [:rdoc] do
70
- config = YAML.load(File.read(File.expand_path("~/.rubyforge/user-config.yml")))
71
- host = "#{config["username"]}@rubyforge.org"
72
-
73
- rsync_args = "-v -c -r"
74
- remote_dir = "/var/www/gforge-projects/bioactive/molecules"
75
- local_dir = "rdoc"
76
-
77
- sh %{rsync #{rsync_args} #{local_dir}/ #{host}:#{remote_dir}}
78
- end
@@ -1,37 +0,0 @@
1
- require File.join(File.dirname(__FILE__), '../tap_test_helper.rb')
2
- require 'molecules/calc'
3
-
4
- class Molecules::CalcTest < Test::Unit::TestCase
5
- acts_as_tap_test
6
-
7
- attr_reader :t
8
-
9
- def setup
10
- super
11
- @t = Molecules::Calc.new
12
- end
13
-
14
- def test_mass_calculation
15
- t.enq("H2O")
16
- app.run
17
-
18
- assert_equal [[Unit.new(18.0105646863, "Da")]], app.results(t)
19
- end
20
-
21
- def test_mass_calculation_with_precision
22
- t.precision = 2
23
- t.enq("H2O", "NH3 + H2O")
24
- app.run
25
-
26
- assert_equal [[Unit.new(18.01, "Da"), Unit.new(35.04, "Da")]], app.results(t)
27
- end
28
-
29
- def test_mass_calculation_with_precision_and_unit_conversion
30
- t.units = "yg"
31
- t.precision = 3
32
- t.enq("H2O")
33
- app.run
34
-
35
- assert_equal [[Unit.new(29.907, "yg")]], app.results(t)
36
- end
37
- end
@@ -1,196 +0,0 @@
1
- require File.join(File.dirname(__FILE__), '../molecules_test_helper.rb')
2
- require 'molecules/empirical_formula'
3
-
4
- class EmpiricalFormulaClassTest < Test::Unit::TestCase
5
- include Molecules
6
-
7
- #
8
- # parse_simple test
9
- #
10
-
11
- def test_parse_simple_documentation
12
- assert_equal "H(2)O", EmpiricalFormula.parse_simple("H(2)O").to_s
13
- assert_equal "H(2)O", EmpiricalFormula.parse_simple("H (2) O").to_s
14
- assert_equal "H(2)O", EmpiricalFormula.parse_simple("HO(-1)O(2)H").to_s
15
- end
16
-
17
- def test_parse_simple
18
- assert_equal([2,1], EmpiricalFormula.parse_simple("HO(-1)O(2)H").formula)
19
- assert_equal([2,1], EmpiricalFormula.parse_simple("H O (-1 )O( 2) H ").formula)
20
- end
21
-
22
- def test_parse_simple_fails_for_malformed_formulae
23
- [
24
- # numbers outside parenthesis
25
- "H2",
26
- # empty parenthesis
27
- "H()",
28
- # mismatched parenthesis
29
- "H(",
30
- ")H",
31
- # anything complex
32
- "H + O"
33
- ].each do |formula|
34
- assert_raise(EmpiricalFormula::ParseError) { EmpiricalFormula.parse_simple(formula) }
35
- end
36
- end
37
-
38
- #
39
- # test class parse
40
- #
41
-
42
- def test_parse_documentation
43
- assert_equal "H(2)O", EmpiricalFormula.parse("H2O").to_s
44
- assert_equal "C(52)H(106)", EmpiricalFormula.parse("CH3(CH2)50CH3").to_s
45
- assert_equal "C(2)H(4)N(2)", EmpiricalFormula.parse("C2H3NO - H2O + NH3").to_s
46
-
47
- block = lambda do |formula|
48
- case formula
49
- when /\[(.*)\]/
50
- factors = $1.split(/,/).collect {|i| i.strip.to_i }
51
- EmpiricalFormula.new(factors)
52
- else nil
53
- end
54
- end
55
-
56
- assert_equal "H(4)O(2)", EmpiricalFormula.parse("H2O + [2, 1]", &block).to_s
57
- assert_raise(EmpiricalFormula::ParseError) { EmpiricalFormula.parse("H2O + :not_expected", &block) }
58
- end
59
-
60
- def test_parse
61
- {
62
- nil => "",
63
- "" => "",
64
- "H" => "H",
65
- "HO" => "HO",
66
- "HFe" => "FeH",
67
- "FeH" => "FeH",
68
- "OH2" => "H(2)O",
69
- "H2O" => "H(2)O",
70
- "C6H12O4" => "C(6)H(12)O(4)",
71
- "Fe2OMg3" => "Fe(2)Mg(3)O",
72
- "(H)2" => "H(2)",
73
- "(OH)2" => "H(2)O(2)",
74
- "(HFe)" => "FeH",
75
- "(FeH)" => "FeH",
76
- "(OH2)2" => "H(4)O(2)",
77
- "(H2O)2" => "H(4)O(2)",
78
- "(C6H12O4)2" => "C(12)H(24)O(8)",
79
- "(Fe2OMg3)2" => "Fe(4)Mg(6)O(2)",
80
- "C6H12O4(C6H12O4)2C6H12O4" => "C(24)H(48)O(16)",
81
- "Fe2OMg3(Fe2OMg3(Fe2OMg3))Fe2OMg3" => "Fe(8)Mg(12)O(4)",
82
- "Fe2OMg3(Fe2OMg3)(Fe2OMg3)Fe2OMg3" => "Fe(8)Mg(12)O(4)",
83
- "Fe2OMg3(Fe2OMg3(Fe2OMg3)3((C)6H12O4)2)2C" => "C(25)Fe(18)H(48)Mg(27)O(25)",
84
- " (H2O) 10 0 " => "H(200)O(100)",
85
- "CH3(CH2)7CH" => "C(9)H(18)",
86
- "H3NCHCO2" => "C(2)H(4)NO(2)",
87
- "(CH3)2CuLi" => "C(2)CuH(6)Li",
88
-
89
- # multipart
90
- "-H" => "H(-1)",
91
- "H2O-H" => "HO",
92
- "H2O - (OH)2+ H2O2-H2O" => ""
93
- }.each_pair do |formula, composition_str|
94
- m = EmpiricalFormula.parse(formula)
95
- assert_equal composition_str, m.to_s, formula
96
- end
97
- end
98
-
99
- def test_parse_fails_for_malformed_formulae
100
- [
101
- # mismatched parenthesis
102
- "H)2",
103
- "(H2",
104
- "(O2(H2)",
105
- "(O)2H2)",
106
- # hanging factors
107
- "2C",
108
- #"(2)",
109
- "(2)2",
110
- "(2C)",
111
- "(2C)2",
112
- "C(2C)",
113
- # empty parenthesis
114
- "()",
115
- "()2"
116
- ].each do |formula|
117
- assert_raise(EmpiricalFormula::ParseError) { EmpiricalFormula.parse(formula) }
118
- end
119
- end
120
-
121
- #
122
- # class mass test
123
- #
124
-
125
- def break_test_class_mass_method
126
- water_mass = EmpiricalFormula::Element::H.mass * 2 + EmpiricalFormula::Element::O.mass
127
- assert_equal 18.010565, water_mass
128
-
129
- assert_equal 18.010565, EmpiricalFormula.mass("H2O")
130
- assert_equal 18.010565, EmpiricalFormula.mass("H + OH")
131
- assert_equal 18, EmpiricalFormula.mass("H2O", 0)
132
- end
133
-
134
- #
135
- # library molecules
136
- #
137
-
138
- def break_test_access_library_molecules
139
- water = EmpiricalFormula::H2O
140
-
141
- assert_equal water, EmpiricalFormula.lookup('h2o')
142
- assert_equal water, EmpiricalFormula.h2o
143
- assert_equal 18.010565, EmpiricalFormula.h2o.mass
144
- end
145
-
146
- # vs the VG Analytical Organic Mass Spectrometry reference, reference date unknown (prior to 2005)
147
- # the data from the data sheet was copied manually to doc/VG Analytical DataSheet.txt
148
- def test_molecule_mass_values_vs_vg_analytical
149
- str = %Q{
150
- NH2 16.01872 16.0226
151
- OH 17.00274 17.0073
152
- OCH3 31.01839 31.0342
153
- CH3CO 43.01839 43.0452}
154
-
155
- molecules = str.split(/\n/)
156
- molecules.each do |mol_str|
157
- next if mol_str.empty?
158
-
159
- name, monoisotopic, average = mol_str.split(/\s/)
160
- monoisotopic = monoisotopic.to_f
161
- average = average.to_f
162
-
163
- molecule = EmpiricalFormula.parse(name)
164
- assert_in_delta monoisotopic, molecule.mass, delta_mass, mol_str
165
- # TODO -- check average mass
166
- end
167
- end
168
-
169
- #
170
- # benchmark
171
- #
172
-
173
- def test_parse_speed
174
- benchmark_test(20) do |x|
175
- n = 10
176
-
177
- ["H20","H2(H2(H2))H2"].each do |formula|
178
- x.report("#{n}k #{formula}") do
179
- (n*1000).times { EmpiricalFormula.parse(formula) }
180
- end
181
- end
182
- end
183
- end
184
-
185
- def test_parse_simple_speed
186
- benchmark_test(20) do |x|
187
- n = 10
188
-
189
- ["H(20)","H(2)H(2)H(2)H(2)"].each do |formula|
190
- x.report("#{n}k #{formula}") do
191
- (n*1000).times { EmpiricalFormula.parse_simple(formula) }
192
- end
193
- end
194
- end
195
- end
196
- end