molecules 0.1.0 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/MIT-LICENSE CHANGED
@@ -1,6 +1,4 @@
1
1
  Copyright (c) 2006-2008, Regents of the University of Colorado.
2
- Developer:: Simon Chiang, Biomolecular Structure Program
3
- Support:: CU Denver School of Medicine Deans Academic Enrichment Fund
4
2
 
5
3
  Permission is hereby granted, free of charge, to any person obtaining a copy of this
6
4
  software and associated documentation files (the "Software"), to deal in the Software
data/README CHANGED
@@ -47,8 +47,8 @@ Molecules provides a mass calculator tap task. Tap[http://tap.rubyforge.org]
47
47
  is not required by molecules in general, but you get this bonus if you have tap
48
48
  installed:
49
49
 
50
- % tap -- molecules/calc ":RPPGFSPFR + H2O"
51
- I[15:34:30] 1077.57 Da :RPPGFSPFR + H2O
50
+ % tap run -- molecules/calc H2O
51
+ I[17:08:00] 18.0105646863 Da H2O
52
52
 
53
53
  == Known Issues
54
54
 
@@ -10,101 +10,40 @@ Unit.setup
10
10
 
11
11
  module Molecules
12
12
 
13
- # :startdoc::manifest a mass calculator
14
- # Calculates the mass of a molecule or empirical formula. The
15
- # options can be used to alter the output (precision, mass
16
- # calculation method etc.) You may enter compound formulae, or
17
- # a list of formulae. In addition, polypeptides can be specified
18
- # using the one-letter residue codes:
13
+ # :startdoc::manifest a mass calculator
19
14
  #
20
- # % tap -- molecules/calc H2O
21
- # I[17:09:00] 18.0105646863 Da H2O
22
- #
23
- # % tap -- molecules/calc H2O -u kg
24
- # I[13:35:59] 2.99072e-026 kg H2O
15
+ # Calculates the mass of a molecule. Compound formulae are allowed and you may
16
+ # specify a list of formulae. The options can be used to alter the output (precision,
17
+ # mass calculation method etc.)
25
18
  #
26
- # % tap -- molecules/calc 'C3H5NO + H2O' C50H73N15O11 -p 2
27
- # I[17:08:21] 89.05 Da C3H5NO + H2O
28
- # I[17:08:21] 1059.56 Da C50H73N15O11
29
- #
30
- # % tap -- molecules/calc :RPPGFSPFR
31
- # I[13:35:02] 1059.56 Da :RPPGFSPFR
32
- #
33
- # Furthermore, if a unimod path is specified in the configurations,
34
- # unimod modifcations may be specified by name as the polypeptide
35
- # termini. Use '%' signs as in a SQL query to shorten the name:
36
- #
37
- # % tap -- molecules/calc 'Acetyl:RPPGFSPFR:Hydroxyl%' --unimod-path <...>
38
- # I[13:33:25] 1059.56 Da Acetyl:RPPGFSPFR:Hydroxyl%
19
+ # % tap run -- molecules/calc H2O
20
+ # I[17:08:00] 18.0105646863 Da H2O
39
21
  #
40
- # The unimod path must point to an sqlite3 ActiveUnimod database, and
41
- # sqlite3-ruby must be installed for this feature to work.
42
- #
43
- # * ActiveUnimod[http://bioactive.rubyforge.org/]
44
- # * sqlite3-ruby[http://rubyforge.org/projects/sqlite-ruby/]
45
- #
22
+ # % tap run -- molecules/calc H2O --units yg --precision 6
23
+ # I[17:08:21] 29.907243 yg H2O
24
+ #
25
+ # % tap run -- molecules/calc 'C3H5NO + H2O' C50H73N15O11 -p 2
26
+ # I[17:08:53] 89.05 Da C3H5NO + H2O
27
+ # I[17:08:53] 1059.56 Da C50H73N15O11
28
+ #
46
29
  class Calc < Tap::Task
47
30
 
48
31
  config :type, :monoisotopic # the mass type calculated
49
32
  config :precision, nil, :short => 'p' # the precision of the mass
50
33
  config :units, "Da", :short => 'u', &c.string # the mass unit reported
51
34
  config :composition, false, :short => 'c', &c.flag # reports the composition, not the formula
52
- config :unimod_path, nil do |path| # the path to the unimod database
53
- case
54
- when path == nil then nil
55
- when File.exists?(path) then path
56
- else raise "path to unimod db does not exist: #{path}"
57
- end
58
- end
59
35
 
60
- # Formulates a query for a modification matching code_name
61
- # for the unimod database. If the code_name contains a '%'
62
- # then the query will use a LIKE syntax, otherwise the
63
- # code_name will be searced for exactly.
64
- def mod_query(code_name)
65
- # should do a rails-like escape on code_name
66
- "SELECT code_name, composition FROM modifications WHERE code_name #{code_name.include?('%') ? 'LIKE' : '='} '#{code_name}'"
36
+ # Parses the formula string into an EmpiricalFormula.
37
+ # Can be used as a hook for more complicated formulae
38
+ # in subclases.
39
+ def parse(formula)
40
+ EmpiricalFormula.parse(formula)
67
41
  end
68
42
 
69
- # Attempts to find and instantiate an EmpiricalFormula for
70
- # a unimod modification matching code_name.
71
- def find_mod(code_name)
72
- raise "no unimod_path was specified" if unimod_path == nil
73
- require 'sqlite3' unless Object.const_defined?(:SQLite3)
74
-
75
- results = []
76
- db = SQLite3::Database.new(unimod_path)
77
- db.execute(mod_query(code_name)) do |row|
78
- results << row
79
- end
80
- db.close
81
-
82
- case results.length
83
- when 1 then EmpiricalFormula.parse_simple(results[0][1])
84
- when 0 then raise "could not find modification: #{code_name}"
85
- else raise "multiple modifications found for: #{code_name} (#{results.collect {|result| result[0]}.join(', ')})"
86
- end
87
- end
88
-
89
- WATER = EmpiricalFormula.parse "H2O"
90
- HYDROGEN = EmpiricalFormula.parse "H"
91
- HYDROXIDE = EmpiricalFormula.parse "OH"
92
-
93
43
  # Returns an array of the calculated masses, in the correct unit.
94
44
  def process(*formulae)
95
- formulae.collect do |formula_str|
96
- formula = EmpiricalFormula.parse(formula_str) do |str|
97
- case str
98
- when /^(.*?):([A-Z]+):?(.*)$/
99
- peptide = Libraries::Polypeptide.new($2) + WATER
100
- peptide += find_mod($1) unless $1.to_s.empty?
101
- peptide += find_mod($3) unless $3.to_s.empty?
102
- peptide
103
- else nil
104
- end
105
- end
106
-
107
- mass = formula.mass do |element|
45
+ formulae.collect do |formula_str|
46
+ mass = parse(formula_str).mass do |element|
108
47
  case type
109
48
  when :monoisotopic then element.mass
110
49
  when :average then element.std_atomic_weight.value
@@ -116,8 +55,8 @@ module Molecules
116
55
  unless precision == nil
117
56
  mass = Unit.new( Utils.round(mass.scalar, precision), units)
118
57
  end
119
-
120
- log mass, composition ? formula : formula_str
58
+
59
+ log "#{mass.scalar} #{mass.units}", composition ? formula : formula_str
121
60
 
122
61
  mass
123
62
  end
@@ -39,7 +39,7 @@ module Molecules
39
39
  factor = nil
40
40
  composition = Hash.new(0)
41
41
  scanner = StringScanner.new(formula.reverse)
42
- while scanner.restsize > 0
42
+ while scanner.rest_size > 0
43
43
  case
44
44
  when scanner.scan_full(/\)(\d+-?)\(/, true, false)
45
45
  # found a factor
@@ -133,7 +133,7 @@ module Molecules
133
133
 
134
134
  # Parse elements and factors out of the formula from right to left
135
135
  scanner = StringScanner.new(formula.reverse)
136
- while scanner.restsize > 0
136
+ while scanner.rest_size > 0
137
137
 
138
138
  case
139
139
  when scanner.scan_full(/(\d+)/, true, false)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: molecules
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simon Chiang
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-08-11 00:00:00 -06:00
12
+ date: 2008-10-13 00:00:00 -06:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -22,6 +22,16 @@ dependencies:
22
22
  - !ruby/object:Gem::Version
23
23
  version: 0.1.0
24
24
  version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: tap
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.10.8
34
+ version:
25
35
  description:
26
36
  email: simon.a.chiang@gmail.com
27
37
  executables: []
@@ -32,9 +42,6 @@ extra_rdoc_files:
32
42
  - README
33
43
  - MIT-LICENSE
34
44
  files:
35
- - MIT-LICENSE
36
- - Rakefile
37
- - README
38
45
  - lib/molecules.rb
39
46
  - lib/molecules/calc.rb
40
47
  - lib/molecules/empirical_formula.rb
@@ -42,16 +49,8 @@ files:
42
49
  - lib/molecules/libraries/residue.rb
43
50
  - lib/molecules/utils.rb
44
51
  - tap.yml
45
- - test/molecules/calc_test.rb
46
- - test/molecules/empirical_formula_class_test.rb
47
- - test/molecules/empirical_formula_test.rb
48
- - test/molecules/libraries/polypeptide_test.rb
49
- - test/molecules/libraries/residue_test.rb
50
- - test/molecules/utils_test.rb
51
- - test/molecules_test.rb
52
- - test/molecules_test_helper.rb
53
- - test/molecules_test_suite.rb
54
- - test/tap_test_helper.rb
52
+ - README
53
+ - MIT-LICENSE
55
54
  has_rdoc: true
56
55
  homepage: http://bioactive.rubyforge.org/molecules/
57
56
  post_install_message:
@@ -78,5 +77,5 @@ rubygems_version: 1.2.0
78
77
  signing_key:
79
78
  specification_version: 2
80
79
  summary: A library of molecules for scientific calculations in Ruby.
81
- test_files:
82
- - test/molecules_test_suite.rb
80
+ test_files: []
81
+
data/Rakefile DELETED
@@ -1,78 +0,0 @@
1
- require 'rake'
2
- require 'rake/testtask'
3
- require 'rake/rdoctask'
4
- require 'rake/gempackagetask'
5
- require 'yaml'
6
-
7
- # tasks
8
- desc 'Default: Run tests.'
9
- task :default => :test
10
-
11
- desc 'Run tests.'
12
- Rake::TestTask.new(:test) do |t|
13
- t.libs << 'lib'
14
- t.pattern = File.join('test', ENV['subset'] || '', ENV['pattern'] || '**/*_test.rb')
15
- t.verbose = true
16
- end
17
-
18
- #
19
- # admin tasks
20
- #
21
-
22
- def gemspec
23
- data = File.read("molecules.gemspec")
24
- spec = nil
25
- Thread.new { spec = eval("$SAFE = 3\n#{data}") }.join
26
- spec
27
- end
28
-
29
- Rake::GemPackageTask.new(gemspec) do |pkg|
30
- pkg.need_tar = true
31
- end
32
-
33
- task :print_manifest do
34
- # collect files from the gemspec, labeling
35
- # with true or false corresponding to the
36
- # file existing or not
37
- files = gemspec.files.inject({}) do |files, file|
38
- files[File.expand_path(file)] = [File.exists?(file), file]
39
- files
40
- end
41
-
42
- # gather non-rdoc/pkg files for the project
43
- # and add to the files list if they are not
44
- # included already (marking by the absence
45
- # of a label)
46
- Dir.glob("**/*").each do |file|
47
- next if file =~ /^(rdoc|pkg)/ || File.directory?(file)
48
-
49
- path = File.expand_path(file)
50
- files[path] = ["", file] unless files.has_key?(path)
51
- end
52
-
53
- # sort and output the results
54
- files.values.sort_by {|exists, file| file }.each do |entry|
55
- puts "%-5s : %s" % entry
56
- end
57
- end
58
-
59
- desc 'Generate documentation.'
60
- Rake::RDocTask.new(:rdoc) do |rdoc|
61
- rdoc.rdoc_dir = 'rdoc'
62
- rdoc.title = "molecules"
63
- rdoc.options << '--line-numbers' << '--inline-source'
64
- rdoc.rdoc_files.include(["README", 'MIT-LICENSE'])
65
- rdoc.rdoc_files.include(gemspec.files.select {|file| file =~ /^lib/})
66
- end
67
-
68
- desc "Publish RDoc to RubyForge"
69
- task :publish_rdoc => [:rdoc] do
70
- config = YAML.load(File.read(File.expand_path("~/.rubyforge/user-config.yml")))
71
- host = "#{config["username"]}@rubyforge.org"
72
-
73
- rsync_args = "-v -c -r"
74
- remote_dir = "/var/www/gforge-projects/bioactive/molecules"
75
- local_dir = "rdoc"
76
-
77
- sh %{rsync #{rsync_args} #{local_dir}/ #{host}:#{remote_dir}}
78
- end
@@ -1,37 +0,0 @@
1
- require File.join(File.dirname(__FILE__), '../tap_test_helper.rb')
2
- require 'molecules/calc'
3
-
4
- class Molecules::CalcTest < Test::Unit::TestCase
5
- acts_as_tap_test
6
-
7
- attr_reader :t
8
-
9
- def setup
10
- super
11
- @t = Molecules::Calc.new
12
- end
13
-
14
- def test_mass_calculation
15
- t.enq("H2O")
16
- app.run
17
-
18
- assert_equal [[Unit.new(18.0105646863, "Da")]], app.results(t)
19
- end
20
-
21
- def test_mass_calculation_with_precision
22
- t.precision = 2
23
- t.enq("H2O", "NH3 + H2O")
24
- app.run
25
-
26
- assert_equal [[Unit.new(18.01, "Da"), Unit.new(35.04, "Da")]], app.results(t)
27
- end
28
-
29
- def test_mass_calculation_with_precision_and_unit_conversion
30
- t.units = "yg"
31
- t.precision = 3
32
- t.enq("H2O")
33
- app.run
34
-
35
- assert_equal [[Unit.new(29.907, "yg")]], app.results(t)
36
- end
37
- end
@@ -1,196 +0,0 @@
1
- require File.join(File.dirname(__FILE__), '../molecules_test_helper.rb')
2
- require 'molecules/empirical_formula'
3
-
4
- class EmpiricalFormulaClassTest < Test::Unit::TestCase
5
- include Molecules
6
-
7
- #
8
- # parse_simple test
9
- #
10
-
11
- def test_parse_simple_documentation
12
- assert_equal "H(2)O", EmpiricalFormula.parse_simple("H(2)O").to_s
13
- assert_equal "H(2)O", EmpiricalFormula.parse_simple("H (2) O").to_s
14
- assert_equal "H(2)O", EmpiricalFormula.parse_simple("HO(-1)O(2)H").to_s
15
- end
16
-
17
- def test_parse_simple
18
- assert_equal([2,1], EmpiricalFormula.parse_simple("HO(-1)O(2)H").formula)
19
- assert_equal([2,1], EmpiricalFormula.parse_simple("H O (-1 )O( 2) H ").formula)
20
- end
21
-
22
- def test_parse_simple_fails_for_malformed_formulae
23
- [
24
- # numbers outside parenthesis
25
- "H2",
26
- # empty parenthesis
27
- "H()",
28
- # mismatched parenthesis
29
- "H(",
30
- ")H",
31
- # anything complex
32
- "H + O"
33
- ].each do |formula|
34
- assert_raise(EmpiricalFormula::ParseError) { EmpiricalFormula.parse_simple(formula) }
35
- end
36
- end
37
-
38
- #
39
- # test class parse
40
- #
41
-
42
- def test_parse_documentation
43
- assert_equal "H(2)O", EmpiricalFormula.parse("H2O").to_s
44
- assert_equal "C(52)H(106)", EmpiricalFormula.parse("CH3(CH2)50CH3").to_s
45
- assert_equal "C(2)H(4)N(2)", EmpiricalFormula.parse("C2H3NO - H2O + NH3").to_s
46
-
47
- block = lambda do |formula|
48
- case formula
49
- when /\[(.*)\]/
50
- factors = $1.split(/,/).collect {|i| i.strip.to_i }
51
- EmpiricalFormula.new(factors)
52
- else nil
53
- end
54
- end
55
-
56
- assert_equal "H(4)O(2)", EmpiricalFormula.parse("H2O + [2, 1]", &block).to_s
57
- assert_raise(EmpiricalFormula::ParseError) { EmpiricalFormula.parse("H2O + :not_expected", &block) }
58
- end
59
-
60
- def test_parse
61
- {
62
- nil => "",
63
- "" => "",
64
- "H" => "H",
65
- "HO" => "HO",
66
- "HFe" => "FeH",
67
- "FeH" => "FeH",
68
- "OH2" => "H(2)O",
69
- "H2O" => "H(2)O",
70
- "C6H12O4" => "C(6)H(12)O(4)",
71
- "Fe2OMg3" => "Fe(2)Mg(3)O",
72
- "(H)2" => "H(2)",
73
- "(OH)2" => "H(2)O(2)",
74
- "(HFe)" => "FeH",
75
- "(FeH)" => "FeH",
76
- "(OH2)2" => "H(4)O(2)",
77
- "(H2O)2" => "H(4)O(2)",
78
- "(C6H12O4)2" => "C(12)H(24)O(8)",
79
- "(Fe2OMg3)2" => "Fe(4)Mg(6)O(2)",
80
- "C6H12O4(C6H12O4)2C6H12O4" => "C(24)H(48)O(16)",
81
- "Fe2OMg3(Fe2OMg3(Fe2OMg3))Fe2OMg3" => "Fe(8)Mg(12)O(4)",
82
- "Fe2OMg3(Fe2OMg3)(Fe2OMg3)Fe2OMg3" => "Fe(8)Mg(12)O(4)",
83
- "Fe2OMg3(Fe2OMg3(Fe2OMg3)3((C)6H12O4)2)2C" => "C(25)Fe(18)H(48)Mg(27)O(25)",
84
- " (H2O) 10 0 " => "H(200)O(100)",
85
- "CH3(CH2)7CH" => "C(9)H(18)",
86
- "H3NCHCO2" => "C(2)H(4)NO(2)",
87
- "(CH3)2CuLi" => "C(2)CuH(6)Li",
88
-
89
- # multipart
90
- "-H" => "H(-1)",
91
- "H2O-H" => "HO",
92
- "H2O - (OH)2+ H2O2-H2O" => ""
93
- }.each_pair do |formula, composition_str|
94
- m = EmpiricalFormula.parse(formula)
95
- assert_equal composition_str, m.to_s, formula
96
- end
97
- end
98
-
99
- def test_parse_fails_for_malformed_formulae
100
- [
101
- # mismatched parenthesis
102
- "H)2",
103
- "(H2",
104
- "(O2(H2)",
105
- "(O)2H2)",
106
- # hanging factors
107
- "2C",
108
- #"(2)",
109
- "(2)2",
110
- "(2C)",
111
- "(2C)2",
112
- "C(2C)",
113
- # empty parenthesis
114
- "()",
115
- "()2"
116
- ].each do |formula|
117
- assert_raise(EmpiricalFormula::ParseError) { EmpiricalFormula.parse(formula) }
118
- end
119
- end
120
-
121
- #
122
- # class mass test
123
- #
124
-
125
- def break_test_class_mass_method
126
- water_mass = EmpiricalFormula::Element::H.mass * 2 + EmpiricalFormula::Element::O.mass
127
- assert_equal 18.010565, water_mass
128
-
129
- assert_equal 18.010565, EmpiricalFormula.mass("H2O")
130
- assert_equal 18.010565, EmpiricalFormula.mass("H + OH")
131
- assert_equal 18, EmpiricalFormula.mass("H2O", 0)
132
- end
133
-
134
- #
135
- # library molecules
136
- #
137
-
138
- def break_test_access_library_molecules
139
- water = EmpiricalFormula::H2O
140
-
141
- assert_equal water, EmpiricalFormula.lookup('h2o')
142
- assert_equal water, EmpiricalFormula.h2o
143
- assert_equal 18.010565, EmpiricalFormula.h2o.mass
144
- end
145
-
146
- # vs the VG Analytical Organic Mass Spectrometry reference, reference date unknown (prior to 2005)
147
- # the data from the data sheet was copied manually to doc/VG Analytical DataSheet.txt
148
- def test_molecule_mass_values_vs_vg_analytical
149
- str = %Q{
150
- NH2 16.01872 16.0226
151
- OH 17.00274 17.0073
152
- OCH3 31.01839 31.0342
153
- CH3CO 43.01839 43.0452}
154
-
155
- molecules = str.split(/\n/)
156
- molecules.each do |mol_str|
157
- next if mol_str.empty?
158
-
159
- name, monoisotopic, average = mol_str.split(/\s/)
160
- monoisotopic = monoisotopic.to_f
161
- average = average.to_f
162
-
163
- molecule = EmpiricalFormula.parse(name)
164
- assert_in_delta monoisotopic, molecule.mass, delta_mass, mol_str
165
- # TODO -- check average mass
166
- end
167
- end
168
-
169
- #
170
- # benchmark
171
- #
172
-
173
- def test_parse_speed
174
- benchmark_test(20) do |x|
175
- n = 10
176
-
177
- ["H20","H2(H2(H2))H2"].each do |formula|
178
- x.report("#{n}k #{formula}") do
179
- (n*1000).times { EmpiricalFormula.parse(formula) }
180
- end
181
- end
182
- end
183
- end
184
-
185
- def test_parse_simple_speed
186
- benchmark_test(20) do |x|
187
- n = 10
188
-
189
- ["H(20)","H(2)H(2)H(2)H(2)"].each do |formula|
190
- x.report("#{n}k #{formula}") do
191
- (n*1000).times { EmpiricalFormula.parse_simple(formula) }
192
- end
193
- end
194
- end
195
- end
196
- end