chemruby 0.9.3 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. data/README +2 -2
  2. data/Rakefile +67 -63
  3. data/ext/extconf.rb +2 -0
  4. data/ext/subcomp.c +461 -320
  5. data/ext/utils.c +56 -0
  6. data/ext/utils.h +13 -0
  7. data/lib/chem.rb +34 -8
  8. data/lib/chem/db.rb +8 -0
  9. data/lib/chem/db/cansmi.rb +1 -1
  10. data/lib/chem/db/cdx.rb +1 -1
  11. data/lib/chem/db/cml.rb +52 -0
  12. data/lib/chem/db/gd.rb +64 -0
  13. data/lib/chem/db/gspan.rb +2 -2
  14. data/lib/chem/db/kcf_rpair.rb +34 -0
  15. data/lib/chem/db/kegg.rb +35 -1
  16. data/lib/chem/db/mdl.rb +75 -34
  17. data/lib/chem/db/opsin.rb +24 -0
  18. data/lib/chem/db/pdb.rb +105 -0
  19. data/lib/chem/db/pdf.rb +2 -0
  20. data/lib/chem/db/pubchem.rb +1071 -88
  21. data/lib/chem/db/rmagick.rb +5 -3
  22. data/lib/chem/db/sdf.rb +28 -2
  23. data/lib/chem/db/smiles/smiles.ry +27 -25
  24. data/lib/chem/db/smiles/smiparser.rb +29 -27
  25. data/lib/chem/db/types/type_gd.rb +35 -0
  26. data/lib/chem/db/types/type_gspan.rb +2 -2
  27. data/lib/chem/db/types/type_kcf.rb +19 -0
  28. data/lib/chem/db/types/type_kegg.rb +2 -0
  29. data/lib/chem/db/types/type_mdl.rb +1 -1
  30. data/lib/chem/db/types/type_png.rb +5 -1
  31. data/lib/chem/db/types/type_rdf.rb +22 -0
  32. data/lib/chem/db/types/type_xyz.rb +1 -1
  33. data/lib/chem/db/vector.rb +19 -3
  34. data/lib/chem/model.rb +5 -2
  35. data/lib/chem/utils.rb +17 -1
  36. data/lib/chem/utils/bitdb.rb +49 -0
  37. data/lib/chem/utils/cas.rb +28 -0
  38. data/lib/chem/utils/cdk.rb +403 -0
  39. data/lib/chem/utils/fingerprint.rb +98 -0
  40. data/lib/chem/utils/geometry.rb +8 -0
  41. data/lib/chem/utils/net.rb +303 -0
  42. data/lib/chem/utils/once.rb +28 -0
  43. data/lib/chem/utils/openbabel.rb +204 -0
  44. data/lib/chem/utils/sssr.rb +33 -25
  45. data/lib/chem/utils/sub.rb +6 -0
  46. data/lib/chem/utils/transform.rb +9 -8
  47. data/lib/chem/utils/ullmann.rb +138 -95
  48. data/lib/graph.rb +5 -6
  49. data/lib/graph/utils.rb +8 -0
  50. data/sample/calc_maximum_common_subgraph.rb +27 -0
  51. data/sample/calc_properties.rb +9 -0
  52. data/sample/data/atp.mol +69 -0
  53. data/sample/data/pioglitazone.mol +58 -0
  54. data/sample/data/rosiglitazone.mol +55 -0
  55. data/sample/data/troglitazone.mol +70 -0
  56. data/sample/find_compound_by_keggapi.rb +19 -0
  57. data/sample/generate_inchi.rb +7 -0
  58. data/sample/generate_substructurekey.rb +11 -0
  59. data/sample/images/ex6.rb +17 -0
  60. data/sample/images/ex7.rb +18 -0
  61. data/sample/iupac2mol.rb +8 -0
  62. data/sample/kekule.rb +13 -0
  63. data/sample/logp.rb +4 -0
  64. data/sample/mcs.rb +13 -0
  65. data/sample/mol2pdf.rb +8 -0
  66. data/sample/pubchem_fetch.rb +8 -0
  67. data/sample/pubchem_search.rb +12 -0
  68. data/sample/rosiglitazone.mol +57 -0
  69. data/sample/smarts.rb +10 -0
  70. data/sample/structure_match.rb +8 -0
  71. data/sample/structure_match_color.rb +22 -0
  72. data/sample/thiazolidinedione.mol +19 -0
  73. data/sample/troglitazone.mol +232 -0
  74. data/sample/vicinity.rb +8 -0
  75. data/test/data/CID_704.sdf +236 -0
  76. data/test/data/CID_994.sdf +146 -0
  77. data/test/data/db_EXPT03276.txt +321 -0
  78. data/test/data/pioglitazone.mol +58 -0
  79. data/test/data/rosiglitazone.mol +55 -0
  80. data/test/data/thiazolidinedione.mol +19 -0
  81. data/test/data/troglitazone.mol +70 -0
  82. data/test/{test_adj.rb → tc_adj.rb} +0 -0
  83. data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
  84. data/test/tc_casrn.rb +17 -0
  85. data/test/tc_cdk.rb +89 -0
  86. data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
  87. data/test/{test_chem.rb → tc_chem.rb} +0 -0
  88. data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
  89. data/test/{test_db.rb → tc_db.rb} +0 -0
  90. data/test/tc_develop.rb +38 -0
  91. data/test/tc_drugbank.rb +13 -0
  92. data/test/{test_eps.rb → tc_eps.rb} +0 -0
  93. data/test/tc_gd.rb +8 -0
  94. data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
  95. data/test/tc_graph.rb +15 -0
  96. data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
  97. data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
  98. data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
  99. data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
  100. data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
  101. data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
  102. data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
  103. data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
  104. data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
  105. data/test/tc_net.rb +5 -0
  106. data/test/tc_once.rb +29 -0
  107. data/test/tc_openbabel.rb +57 -0
  108. data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
  109. data/test/{test_prop.rb → tc_prop.rb} +1 -1
  110. data/test/tc_pubchem.rb +32 -0
  111. data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
  112. data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
  113. data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
  114. data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
  115. data/test/tc_sssr.rb +1 -0
  116. data/test/{test_sub.rb → tc_sub.rb} +0 -0
  117. data/test/tc_subcomp.rb +59 -0
  118. data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
  119. data/test/{test_writer.rb → tc_writer.rb} +0 -0
  120. data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
  121. data/test/ts_current.rb +11 -0
  122. data/test/ts_image.rb +6 -0
  123. data/test/ts_main.rb +12 -0
  124. metadata +259 -194
  125. data/lib/chem/utils/graph_db.rb +0 -146
  126. data/test/test_sssr.rb +0 -18
  127. data/test/test_subcomp.rb +0 -37
@@ -21,14 +21,16 @@ module Chem
21
21
  # Draws line
22
22
  # This method may be invoked from chem/db/vector.rb
23
23
  def line(from, to, color)
24
- @canvas.stroke("rgb(%f, %f, %f)" % color)
24
+ @canvas.stroke("rgb(%f, %f, %f)" % to_256(color))
25
25
  @canvas.line(from[0], from[1], to[0], to[1])
26
26
  end
27
27
 
28
28
  def fill(nodes, color, params = {})
29
- @canvas.fill("rgb(%f, %f, %f)" % params[:color]) if params[:color]
29
+ @canvas.stroke("rgb(%f, %f, %f)" % to_256(color))
30
+ @canvas.fill("rgb(%f, %f, %f)" % to_256(color)) if color
30
31
  path = nodes.inject([]){|ret, node| ret << node[0] ; ret << node[1]}
31
32
  @canvas.polygon(* path)
33
+ @canvas.fill("black")
32
34
  end
33
35
 
34
36
  def text(str, x, y, params = {})
@@ -36,7 +38,7 @@ module Chem
36
38
  metrics = @canvas.get_type_metrics(@img, str)
37
39
  @canvas.stroke('transparent')
38
40
  @canvas.pointsize(params[:pontsize]) if params[:pointsize]
39
- @canvas.fill("rgb(%f, %f, %f)" % params[:color]) if params[:color]
41
+ @canvas.fill("rgb(%f, %f, %f)" % to_256(params[:color])) if params[:color]
40
42
 
41
43
  @canvas.text(x - metrics.width / 2.0,
42
44
  y + metrics.height / 4.0,
@@ -4,6 +4,10 @@ module Chem
4
4
 
5
5
  module MDL
6
6
 
7
+ class MdlMolecule
8
+ attr_accessor :sdf_data
9
+ end
10
+
7
11
  class SdfParser
8
12
  include Enumerable
9
13
 
@@ -19,10 +23,32 @@ module Chem
19
23
  first_entry = true
20
24
  from = 0
21
25
  @input.each("$$$$") do |entry|
22
- from = entry.index("\n") + 1unless first_entry
26
+ from = entry.index("\n") + 1 unless first_entry
23
27
  first_entry = false
24
28
  next if entry[from..-1].length < 3
25
- yield MdlMolecule.parse_io(StringIO.new(entry[from..-1]))
29
+ molio = StringIO.new(entry[from..-1])
30
+ mol = MdlMolecule.parse_io(molio)
31
+ mol.sdf_data = {}
32
+ data_header = nil
33
+
34
+ molio.each do |line|
35
+ if line[0..0] == ">"
36
+ if /<([^>]+)>/.match(line)
37
+ data_header = $1
38
+ elsif /(DT\d+)/.match(line)
39
+ data_header = $1
40
+ end
41
+ mol.sdf_data[data_header] = []
42
+ elsif /^$/.match(line)
43
+ if mol.sdf_data[data_header].respond_to?(:join)
44
+ mol.sdf_data[data_header] = mol.sdf_data[data_header].join("\n")
45
+ end
46
+ # end of data
47
+ else
48
+ mol.sdf_data[data_header] << line.chop
49
+ end
50
+ end
51
+ yield mol
26
52
  end
27
53
 
28
54
  end
@@ -93,16 +93,15 @@ def construct mol, tree, prev = nil
93
93
  else
94
94
  mol.nodes.push(node)
95
95
  mol.join(prev, node) if prev
96
- # if node.pos != nil
97
96
  if node.smiles_pos
98
- node.smiles_pos.each do |smiles_pos|
99
- if @ring[smiles_pos]
100
- mol.join(node, @ring[smiles_pos])
97
+ node.smiles_pos.each do |sp|
98
+ if @ring[sp]
99
+ mol.join(node, @ring[sp])
101
100
  # re-use ring closure digits
102
101
  # Higher-numbered ring closures are not supported yet.
103
- @ring[smiles_pos] = nil
102
+ @ring[sp] = nil
104
103
  else
105
- @ring[smiles_pos] = node
104
+ @ring[sp] = node
106
105
  end
107
106
  end
108
107
  end
@@ -116,13 +115,15 @@ def initialize
116
115
  @ring = []
117
116
  end
118
117
 
119
- @@parser = SmilesParser.new
120
-
121
118
  def self.parse_smiles( smiles )
122
- @@parser.parse( smiles )
119
+ SmilesParser.new.parse( smiles)
123
120
  end
124
121
 
125
- ElementRegex = Regexp.new('\A(' + Chem::Number2Element.inject([]){|ret, el| ret.push(el.to_s)}.sort.reverse.join("|") + ')', Regexp::IGNORECASE)
122
+ ElementRegex = Regexp.new('\A(Cl|Br|B|C|N|O|P|S|F|I|H)', Regexp::IGNORECASE)
123
+ InorganicRegex = /\A\[([^\]]+)\]/
124
+ NumberRegex = /\A\d+/
125
+ BackSlashRegex = /\\/
126
+ OtherRegex = /\A./
126
127
 
127
128
  def parse( line )
128
129
  @q = []
@@ -130,18 +131,18 @@ def parse( line )
130
131
  line.strip!
131
132
  until line.empty? do
132
133
  case line
133
- when /\A\[([^\]]+)\]/
134
+ when InorganicRegex
134
135
  # [nH] : aromatic N-H
135
136
  @q.push [:ATOM_SYMBOL, $&] # [m[1], m[2], m[3], m[4], m[5], m[6]]]
136
137
  when ElementRegex
137
138
  prop = {}
138
139
  prop[:is_aromatic] = (97 <= $&[0]) #and 122 < $&[0]
139
140
  @q.push [:SYMBOL, [$&.capitalize.intern , prop]]
140
- when /\A\d+/
141
+ when NumberRegex
141
142
  @q.push [:NUMBER, $& ]
142
- when /\\/
143
+ when BackSlashRegex
143
144
  @q.push [:BSLASH, :BSLASH]
144
- when /\A./
145
+ when OtherRegex
145
146
  @q.push [$&, $&]
146
147
  else
147
148
  raise RuntimeError, 'must not happen'
@@ -149,7 +150,6 @@ def parse( line )
149
150
  line = $'
150
151
  end
151
152
  @q.push [ :EOL, nil ]
152
- # p @q
153
153
  mol = Chem::SmilesMol.new
154
154
  construct(mol, do_parse)
155
155
  mol
@@ -161,10 +161,12 @@ module Chem
161
161
 
162
162
  class SmilesAtom
163
163
  include Atom
164
- attr_accessor :bond, :element, :element, :chiral, :hydrogen_count, :charge, :smiles_pos, :is_aromatic
164
+ attr_accessor :bond, :element, :element, :chiral, :hydrogen_count, :charge, :is_aromatic, :smiles_pos
165
+
165
166
  def inspect
166
- "{%s %s(%s)}" % [@bond, @element, smiles_pos ? smiles_pos.join : ""]
167
+ "{%s %s(%s)}" % [@bond, @element, smiles_pos ? smiles_pos.join("-") : ""]
167
168
  end
169
+
168
170
  end
169
171
 
170
172
  class SmilesBond
@@ -187,14 +189,14 @@ module Chem
187
189
  return if to.bond == '.'
188
190
  bond = SmilesBond.new
189
191
 
190
- case to.bond
191
- when '='
192
- bond.v = 2
193
- when '#'
194
- bond.v = 3
195
- else
196
- bond.v = 1
197
- end
192
+ bond.v = case to.bond
193
+ when '='
194
+ 2
195
+ when '#'
196
+ 3
197
+ else
198
+ 1
199
+ end
198
200
  @edges.push([bond, from, to])
199
201
  end
200
202
 
@@ -9,7 +9,7 @@ require 'racc/parser'
9
9
 
10
10
  class SmilesParser < Racc::Parser
11
11
 
12
- module_eval <<'..end smiles.ry modeval..id6e4480ccfa', 'smiles.ry', 84
12
+ module_eval <<'..end smiles.ry modeval..idae4de8f30f', 'smiles.ry', 84
13
13
 
14
14
  def next_token
15
15
  @q.shift
@@ -22,16 +22,15 @@ def construct mol, tree, prev = nil
22
22
  else
23
23
  mol.nodes.push(node)
24
24
  mol.join(prev, node) if prev
25
- # if node.pos != nil
26
25
  if node.smiles_pos
27
- node.smiles_pos.each do |smiles_pos|
28
- if @ring[smiles_pos]
29
- mol.join(node, @ring[smiles_pos])
26
+ node.smiles_pos.each do |sp|
27
+ if @ring[sp]
28
+ mol.join(node, @ring[sp])
30
29
  # re-use ring closure digits
31
30
  # Higher-numbered ring closures are not supported yet.
32
- @ring[smiles_pos] = nil
31
+ @ring[sp] = nil
33
32
  else
34
- @ring[smiles_pos] = node
33
+ @ring[sp] = node
35
34
  end
36
35
  end
37
36
  end
@@ -45,13 +44,15 @@ def initialize
45
44
  @ring = []
46
45
  end
47
46
 
48
- @@parser = SmilesParser.new
49
-
50
47
  def self.parse_smiles( smiles )
51
- @@parser.parse( smiles )
48
+ SmilesParser.new.parse( smiles)
52
49
  end
53
50
 
54
- ElementRegex = Regexp.new('\A(' + Chem::Number2Element.inject([]){|ret, el| ret.push(el.to_s)}.sort.reverse.join("|") + ')', Regexp::IGNORECASE)
51
+ ElementRegex = Regexp.new('\A(Cl|Br|B|C|N|O|P|S|F|I|H)', Regexp::IGNORECASE)
52
+ InorganicRegex = /\A\[([^\]]+)\]/
53
+ NumberRegex = /\A\d+/
54
+ BackSlashRegex = /\\/
55
+ OtherRegex = /\A./
55
56
 
56
57
  def parse( line )
57
58
  @q = []
@@ -59,18 +60,18 @@ def parse( line )
59
60
  line.strip!
60
61
  until line.empty? do
61
62
  case line
62
- when /\A\[([^\]]+)\]/
63
+ when InorganicRegex
63
64
  # [nH] : aromatic N-H
64
65
  @q.push [:ATOM_SYMBOL, $&] # [m[1], m[2], m[3], m[4], m[5], m[6]]]
65
66
  when ElementRegex
66
67
  prop = {}
67
68
  prop[:is_aromatic] = (97 <= $&[0]) #and 122 < $&[0]
68
69
  @q.push [:SYMBOL, [$&.capitalize.intern , prop]]
69
- when /\A\d+/
70
+ when NumberRegex
70
71
  @q.push [:NUMBER, $& ]
71
- when /\\/
72
+ when BackSlashRegex
72
73
  @q.push [:BSLASH, :BSLASH]
73
- when /\A./
74
+ when OtherRegex
74
75
  @q.push [$&, $&]
75
76
  else
76
77
  raise RuntimeError, 'must not happen'
@@ -78,13 +79,12 @@ def parse( line )
78
79
  line = $'
79
80
  end
80
81
  @q.push [ :EOL, nil ]
81
- # p @q
82
82
  mol = Chem::SmilesMol.new
83
83
  construct(mol, do_parse)
84
84
  mol
85
85
  end
86
86
 
87
- ..end smiles.ry modeval..id6e4480ccfa
87
+ ..end smiles.ry modeval..idae4de8f30f
88
88
 
89
89
  ##### racc 1.4.4 generates ###
90
90
 
@@ -333,10 +333,12 @@ module Chem
333
333
 
334
334
  class SmilesAtom
335
335
  include Atom
336
- attr_accessor :bond, :element, :element, :chiral, :hydrogen_count, :charge, :smiles_pos, :is_aromatic
336
+ attr_accessor :bond, :element, :element, :chiral, :hydrogen_count, :charge, :is_aromatic, :smiles_pos
337
+
337
338
  def inspect
338
- "{%s %s(%s)}" % [@bond, @element, smiles_pos ? smiles_pos.join : ""]
339
+ "{%s %s(%s)}" % [@bond, @element, smiles_pos ? smiles_pos.join("-") : ""]
339
340
  end
341
+
340
342
  end
341
343
 
342
344
  class SmilesBond
@@ -359,14 +361,14 @@ module Chem
359
361
  return if to.bond == '.'
360
362
  bond = SmilesBond.new
361
363
 
362
- case to.bond
363
- when '='
364
- bond.v = 2
365
- when '#'
366
- bond.v = 3
367
- else
368
- bond.v = 1
369
- end
364
+ bond.v = case to.bond
365
+ when '='
366
+ 2
367
+ when '#'
368
+ 3
369
+ else
370
+ 1
371
+ end
370
372
  @edges.push([bond, from, to])
371
373
  end
372
374
 
@@ -0,0 +1,35 @@
1
+
2
+ module Chem
3
+
4
+ module Type
5
+ module GDType
6
+
7
+ def self.detect_file file
8
+ begin
9
+ require 'GD'
10
+ rescue LoadError
11
+ return false
12
+ end
13
+ ['.png', '.gif', '.jpg', '.jpeg', '.tiff'].include?(File.extname(file))
14
+ end
15
+
16
+ # ChemRuby will never parse PNG ;)
17
+ def self.parse file
18
+ raise NotImplementedError
19
+ end
20
+
21
+ def self.detect_type type
22
+ [:gd_png, :gd_gif, :gd_jpeg, :gd_tiff].include?(type)
23
+ end
24
+
25
+ def self.save mol, filename, params = {}
26
+ require 'chem/db/gd.rb'
27
+ GDWriter.save(mol, filename, params)
28
+ end
29
+
30
+ end
31
+ end
32
+
33
+ ChemTypeRegistry << Type::GDType
34
+
35
+ end
@@ -19,9 +19,9 @@ module Chem
19
19
  type == :gspan
20
20
  end
21
21
 
22
- def self.save mol, filename
22
+ def self.save mol, filename, params = {}
23
23
  # require 'chem/db/gspan.rb'
24
- Chem::GSpan.save(mol, filename)
24
+ Chem::GSpan.save(mol, filename, params)
25
25
  end
26
26
  end
27
27
  end
@@ -25,4 +25,23 @@ module Chem
25
25
 
26
26
  ChemTypeRegistry << Type::KCFType
27
27
 
28
+ module KCFRPairType
29
+
30
+ def self.detect_file file
31
+ File.extname(file) == '.kcf' && /A\d+/.match(file)
32
+ end
33
+
34
+ def self.parse file
35
+ require 'chem/db/kcf_rpair'
36
+ Chem::KCF::RPairMolecule.new(File.open(file))
37
+ end
38
+
39
+ def self.detect_type type
40
+ type == :kcf_rpair
41
+ end
42
+
43
+ end
44
+
45
+ ChemTypeRegistry << KCFRPairType
46
+
28
47
  end
@@ -14,6 +14,7 @@ module Chem
14
14
  end
15
15
 
16
16
  def self.parse file
17
+ require 'chem/db/kegg'
17
18
  Chem::KEGG::KeggReactionParser.new file
18
19
  end
19
20
 
@@ -35,6 +36,7 @@ module Chem
35
36
  end
36
37
 
37
38
  def self.parse file
39
+ require 'chem/db/kegg'
38
40
  Chem::KEGG::KeggReactionMapParser.new file
39
41
  end
40
42
 
@@ -20,7 +20,7 @@ module Chem
20
20
  end
21
21
 
22
22
  def self.save mol, filename, params = {}
23
- # require 'chem/db/mdl.rb'
23
+ require 'chem/db/mdl.rb'
24
24
  mol.save_as_mdl(filename)
25
25
  end
26
26
  end
@@ -5,6 +5,11 @@ module Chem
5
5
  module PNGType
6
6
 
7
7
  def self.detect_file file
8
+ begin
9
+ require 'RMagick'
10
+ rescue LoadError
11
+ return false
12
+ end
8
13
  ['.png', '.gif', '.jpg', '.jpeg', '.tiff'].include?(File.extname(file))
9
14
  end
10
15
 
@@ -18,7 +23,6 @@ module Chem
18
23
  end
19
24
 
20
25
  def self.save mol, filename, params = {}
21
- require 'RMagick'
22
26
  require 'chem/db/rmagick.rb'
23
27
  RMagickWriter.save(mol, filename, params)
24
28
  end
@@ -0,0 +1,22 @@
1
+
2
+ module Chem
3
+
4
+ module Type
5
+
6
+ module RDFType
7
+ def self.detect_file file
8
+ File.extname(file) == '.rdf'
9
+ end
10
+
11
+ def self.parse file
12
+ require 'chem/db/mdl.rb'
13
+ end
14
+
15
+ def self.detect_type type
16
+ type == :rdf
17
+ end
18
+ end
19
+
20
+ end
21
+
22
+ end