chemruby 0.9.3 → 1.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (127) hide show
  1. data/README +2 -2
  2. data/Rakefile +67 -63
  3. data/ext/extconf.rb +2 -0
  4. data/ext/subcomp.c +461 -320
  5. data/ext/utils.c +56 -0
  6. data/ext/utils.h +13 -0
  7. data/lib/chem.rb +34 -8
  8. data/lib/chem/db.rb +8 -0
  9. data/lib/chem/db/cansmi.rb +1 -1
  10. data/lib/chem/db/cdx.rb +1 -1
  11. data/lib/chem/db/cml.rb +52 -0
  12. data/lib/chem/db/gd.rb +64 -0
  13. data/lib/chem/db/gspan.rb +2 -2
  14. data/lib/chem/db/kcf_rpair.rb +34 -0
  15. data/lib/chem/db/kegg.rb +35 -1
  16. data/lib/chem/db/mdl.rb +75 -34
  17. data/lib/chem/db/opsin.rb +24 -0
  18. data/lib/chem/db/pdb.rb +105 -0
  19. data/lib/chem/db/pdf.rb +2 -0
  20. data/lib/chem/db/pubchem.rb +1071 -88
  21. data/lib/chem/db/rmagick.rb +5 -3
  22. data/lib/chem/db/sdf.rb +28 -2
  23. data/lib/chem/db/smiles/smiles.ry +27 -25
  24. data/lib/chem/db/smiles/smiparser.rb +29 -27
  25. data/lib/chem/db/types/type_gd.rb +35 -0
  26. data/lib/chem/db/types/type_gspan.rb +2 -2
  27. data/lib/chem/db/types/type_kcf.rb +19 -0
  28. data/lib/chem/db/types/type_kegg.rb +2 -0
  29. data/lib/chem/db/types/type_mdl.rb +1 -1
  30. data/lib/chem/db/types/type_png.rb +5 -1
  31. data/lib/chem/db/types/type_rdf.rb +22 -0
  32. data/lib/chem/db/types/type_xyz.rb +1 -1
  33. data/lib/chem/db/vector.rb +19 -3
  34. data/lib/chem/model.rb +5 -2
  35. data/lib/chem/utils.rb +17 -1
  36. data/lib/chem/utils/bitdb.rb +49 -0
  37. data/lib/chem/utils/cas.rb +28 -0
  38. data/lib/chem/utils/cdk.rb +403 -0
  39. data/lib/chem/utils/fingerprint.rb +98 -0
  40. data/lib/chem/utils/geometry.rb +8 -0
  41. data/lib/chem/utils/net.rb +303 -0
  42. data/lib/chem/utils/once.rb +28 -0
  43. data/lib/chem/utils/openbabel.rb +204 -0
  44. data/lib/chem/utils/sssr.rb +33 -25
  45. data/lib/chem/utils/sub.rb +6 -0
  46. data/lib/chem/utils/transform.rb +9 -8
  47. data/lib/chem/utils/ullmann.rb +138 -95
  48. data/lib/graph.rb +5 -6
  49. data/lib/graph/utils.rb +8 -0
  50. data/sample/calc_maximum_common_subgraph.rb +27 -0
  51. data/sample/calc_properties.rb +9 -0
  52. data/sample/data/atp.mol +69 -0
  53. data/sample/data/pioglitazone.mol +58 -0
  54. data/sample/data/rosiglitazone.mol +55 -0
  55. data/sample/data/troglitazone.mol +70 -0
  56. data/sample/find_compound_by_keggapi.rb +19 -0
  57. data/sample/generate_inchi.rb +7 -0
  58. data/sample/generate_substructurekey.rb +11 -0
  59. data/sample/images/ex6.rb +17 -0
  60. data/sample/images/ex7.rb +18 -0
  61. data/sample/iupac2mol.rb +8 -0
  62. data/sample/kekule.rb +13 -0
  63. data/sample/logp.rb +4 -0
  64. data/sample/mcs.rb +13 -0
  65. data/sample/mol2pdf.rb +8 -0
  66. data/sample/pubchem_fetch.rb +8 -0
  67. data/sample/pubchem_search.rb +12 -0
  68. data/sample/rosiglitazone.mol +57 -0
  69. data/sample/smarts.rb +10 -0
  70. data/sample/structure_match.rb +8 -0
  71. data/sample/structure_match_color.rb +22 -0
  72. data/sample/thiazolidinedione.mol +19 -0
  73. data/sample/troglitazone.mol +232 -0
  74. data/sample/vicinity.rb +8 -0
  75. data/test/data/CID_704.sdf +236 -0
  76. data/test/data/CID_994.sdf +146 -0
  77. data/test/data/db_EXPT03276.txt +321 -0
  78. data/test/data/pioglitazone.mol +58 -0
  79. data/test/data/rosiglitazone.mol +55 -0
  80. data/test/data/thiazolidinedione.mol +19 -0
  81. data/test/data/troglitazone.mol +70 -0
  82. data/test/{test_adj.rb → tc_adj.rb} +0 -0
  83. data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
  84. data/test/tc_casrn.rb +17 -0
  85. data/test/tc_cdk.rb +89 -0
  86. data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
  87. data/test/{test_chem.rb → tc_chem.rb} +0 -0
  88. data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
  89. data/test/{test_db.rb → tc_db.rb} +0 -0
  90. data/test/tc_develop.rb +38 -0
  91. data/test/tc_drugbank.rb +13 -0
  92. data/test/{test_eps.rb → tc_eps.rb} +0 -0
  93. data/test/tc_gd.rb +8 -0
  94. data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
  95. data/test/tc_graph.rb +15 -0
  96. data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
  97. data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
  98. data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
  99. data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
  100. data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
  101. data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
  102. data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
  103. data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
  104. data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
  105. data/test/tc_net.rb +5 -0
  106. data/test/tc_once.rb +29 -0
  107. data/test/tc_openbabel.rb +57 -0
  108. data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
  109. data/test/{test_prop.rb → tc_prop.rb} +1 -1
  110. data/test/tc_pubchem.rb +32 -0
  111. data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
  112. data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
  113. data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
  114. data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
  115. data/test/tc_sssr.rb +1 -0
  116. data/test/{test_sub.rb → tc_sub.rb} +0 -0
  117. data/test/tc_subcomp.rb +59 -0
  118. data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
  119. data/test/{test_writer.rb → tc_writer.rb} +0 -0
  120. data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
  121. data/test/ts_current.rb +11 -0
  122. data/test/ts_image.rb +6 -0
  123. data/test/ts_main.rb +12 -0
  124. metadata +259 -194
  125. data/lib/chem/utils/graph_db.rb +0 -146
  126. data/test/test_sssr.rb +0 -18
  127. data/test/test_subcomp.rb +0 -37
@@ -21,14 +21,16 @@ module Chem
21
21
  # Draws line
22
22
  # This method may be invoked from chem/db/vector.rb
23
23
  def line(from, to, color)
24
- @canvas.stroke("rgb(%f, %f, %f)" % color)
24
+ @canvas.stroke("rgb(%f, %f, %f)" % to_256(color))
25
25
  @canvas.line(from[0], from[1], to[0], to[1])
26
26
  end
27
27
 
28
28
  def fill(nodes, color, params = {})
29
- @canvas.fill("rgb(%f, %f, %f)" % params[:color]) if params[:color]
29
+ @canvas.stroke("rgb(%f, %f, %f)" % to_256(color))
30
+ @canvas.fill("rgb(%f, %f, %f)" % to_256(color)) if color
30
31
  path = nodes.inject([]){|ret, node| ret << node[0] ; ret << node[1]}
31
32
  @canvas.polygon(* path)
33
+ @canvas.fill("black")
32
34
  end
33
35
 
34
36
  def text(str, x, y, params = {})
@@ -36,7 +38,7 @@ module Chem
36
38
  metrics = @canvas.get_type_metrics(@img, str)
37
39
  @canvas.stroke('transparent')
38
40
  @canvas.pointsize(params[:pontsize]) if params[:pointsize]
39
- @canvas.fill("rgb(%f, %f, %f)" % params[:color]) if params[:color]
41
+ @canvas.fill("rgb(%f, %f, %f)" % to_256(params[:color])) if params[:color]
40
42
 
41
43
  @canvas.text(x - metrics.width / 2.0,
42
44
  y + metrics.height / 4.0,
@@ -4,6 +4,10 @@ module Chem
4
4
 
5
5
  module MDL
6
6
 
7
+ class MdlMolecule
8
+ attr_accessor :sdf_data
9
+ end
10
+
7
11
  class SdfParser
8
12
  include Enumerable
9
13
 
@@ -19,10 +23,32 @@ module Chem
19
23
  first_entry = true
20
24
  from = 0
21
25
  @input.each("$$$$") do |entry|
22
- from = entry.index("\n") + 1unless first_entry
26
+ from = entry.index("\n") + 1 unless first_entry
23
27
  first_entry = false
24
28
  next if entry[from..-1].length < 3
25
- yield MdlMolecule.parse_io(StringIO.new(entry[from..-1]))
29
+ molio = StringIO.new(entry[from..-1])
30
+ mol = MdlMolecule.parse_io(molio)
31
+ mol.sdf_data = {}
32
+ data_header = nil
33
+
34
+ molio.each do |line|
35
+ if line[0..0] == ">"
36
+ if /<([^>]+)>/.match(line)
37
+ data_header = $1
38
+ elsif /(DT\d+)/.match(line)
39
+ data_header = $1
40
+ end
41
+ mol.sdf_data[data_header] = []
42
+ elsif /^$/.match(line)
43
+ if mol.sdf_data[data_header].respond_to?(:join)
44
+ mol.sdf_data[data_header] = mol.sdf_data[data_header].join("\n")
45
+ end
46
+ # end of data
47
+ else
48
+ mol.sdf_data[data_header] << line.chop
49
+ end
50
+ end
51
+ yield mol
26
52
  end
27
53
 
28
54
  end
@@ -93,16 +93,15 @@ def construct mol, tree, prev = nil
93
93
  else
94
94
  mol.nodes.push(node)
95
95
  mol.join(prev, node) if prev
96
- # if node.pos != nil
97
96
  if node.smiles_pos
98
- node.smiles_pos.each do |smiles_pos|
99
- if @ring[smiles_pos]
100
- mol.join(node, @ring[smiles_pos])
97
+ node.smiles_pos.each do |sp|
98
+ if @ring[sp]
99
+ mol.join(node, @ring[sp])
101
100
  # re-use ring closure digits
102
101
  # Higher-numbered ring closures are not supported yet.
103
- @ring[smiles_pos] = nil
102
+ @ring[sp] = nil
104
103
  else
105
- @ring[smiles_pos] = node
104
+ @ring[sp] = node
106
105
  end
107
106
  end
108
107
  end
@@ -116,13 +115,15 @@ def initialize
116
115
  @ring = []
117
116
  end
118
117
 
119
- @@parser = SmilesParser.new
120
-
121
118
  def self.parse_smiles( smiles )
122
- @@parser.parse( smiles )
119
+ SmilesParser.new.parse( smiles)
123
120
  end
124
121
 
125
- ElementRegex = Regexp.new('\A(' + Chem::Number2Element.inject([]){|ret, el| ret.push(el.to_s)}.sort.reverse.join("|") + ')', Regexp::IGNORECASE)
122
+ ElementRegex = Regexp.new('\A(Cl|Br|B|C|N|O|P|S|F|I|H)', Regexp::IGNORECASE)
123
+ InorganicRegex = /\A\[([^\]]+)\]/
124
+ NumberRegex = /\A\d+/
125
+ BackSlashRegex = /\\/
126
+ OtherRegex = /\A./
126
127
 
127
128
  def parse( line )
128
129
  @q = []
@@ -130,18 +131,18 @@ def parse( line )
130
131
  line.strip!
131
132
  until line.empty? do
132
133
  case line
133
- when /\A\[([^\]]+)\]/
134
+ when InorganicRegex
134
135
  # [nH] : aromatic N-H
135
136
  @q.push [:ATOM_SYMBOL, $&] # [m[1], m[2], m[3], m[4], m[5], m[6]]]
136
137
  when ElementRegex
137
138
  prop = {}
138
139
  prop[:is_aromatic] = (97 <= $&[0]) #and 122 < $&[0]
139
140
  @q.push [:SYMBOL, [$&.capitalize.intern , prop]]
140
- when /\A\d+/
141
+ when NumberRegex
141
142
  @q.push [:NUMBER, $& ]
142
- when /\\/
143
+ when BackSlashRegex
143
144
  @q.push [:BSLASH, :BSLASH]
144
- when /\A./
145
+ when OtherRegex
145
146
  @q.push [$&, $&]
146
147
  else
147
148
  raise RuntimeError, 'must not happen'
@@ -149,7 +150,6 @@ def parse( line )
149
150
  line = $'
150
151
  end
151
152
  @q.push [ :EOL, nil ]
152
- # p @q
153
153
  mol = Chem::SmilesMol.new
154
154
  construct(mol, do_parse)
155
155
  mol
@@ -161,10 +161,12 @@ module Chem
161
161
 
162
162
  class SmilesAtom
163
163
  include Atom
164
- attr_accessor :bond, :element, :element, :chiral, :hydrogen_count, :charge, :smiles_pos, :is_aromatic
164
+ attr_accessor :bond, :element, :element, :chiral, :hydrogen_count, :charge, :is_aromatic, :smiles_pos
165
+
165
166
  def inspect
166
- "{%s %s(%s)}" % [@bond, @element, smiles_pos ? smiles_pos.join : ""]
167
+ "{%s %s(%s)}" % [@bond, @element, smiles_pos ? smiles_pos.join("-") : ""]
167
168
  end
169
+
168
170
  end
169
171
 
170
172
  class SmilesBond
@@ -187,14 +189,14 @@ module Chem
187
189
  return if to.bond == '.'
188
190
  bond = SmilesBond.new
189
191
 
190
- case to.bond
191
- when '='
192
- bond.v = 2
193
- when '#'
194
- bond.v = 3
195
- else
196
- bond.v = 1
197
- end
192
+ bond.v = case to.bond
193
+ when '='
194
+ 2
195
+ when '#'
196
+ 3
197
+ else
198
+ 1
199
+ end
198
200
  @edges.push([bond, from, to])
199
201
  end
200
202
 
@@ -9,7 +9,7 @@ require 'racc/parser'
9
9
 
10
10
  class SmilesParser < Racc::Parser
11
11
 
12
- module_eval <<'..end smiles.ry modeval..id6e4480ccfa', 'smiles.ry', 84
12
+ module_eval <<'..end smiles.ry modeval..idae4de8f30f', 'smiles.ry', 84
13
13
 
14
14
  def next_token
15
15
  @q.shift
@@ -22,16 +22,15 @@ def construct mol, tree, prev = nil
22
22
  else
23
23
  mol.nodes.push(node)
24
24
  mol.join(prev, node) if prev
25
- # if node.pos != nil
26
25
  if node.smiles_pos
27
- node.smiles_pos.each do |smiles_pos|
28
- if @ring[smiles_pos]
29
- mol.join(node, @ring[smiles_pos])
26
+ node.smiles_pos.each do |sp|
27
+ if @ring[sp]
28
+ mol.join(node, @ring[sp])
30
29
  # re-use ring closure digits
31
30
  # Higher-numbered ring closures are not supported yet.
32
- @ring[smiles_pos] = nil
31
+ @ring[sp] = nil
33
32
  else
34
- @ring[smiles_pos] = node
33
+ @ring[sp] = node
35
34
  end
36
35
  end
37
36
  end
@@ -45,13 +44,15 @@ def initialize
45
44
  @ring = []
46
45
  end
47
46
 
48
- @@parser = SmilesParser.new
49
-
50
47
  def self.parse_smiles( smiles )
51
- @@parser.parse( smiles )
48
+ SmilesParser.new.parse( smiles)
52
49
  end
53
50
 
54
- ElementRegex = Regexp.new('\A(' + Chem::Number2Element.inject([]){|ret, el| ret.push(el.to_s)}.sort.reverse.join("|") + ')', Regexp::IGNORECASE)
51
+ ElementRegex = Regexp.new('\A(Cl|Br|B|C|N|O|P|S|F|I|H)', Regexp::IGNORECASE)
52
+ InorganicRegex = /\A\[([^\]]+)\]/
53
+ NumberRegex = /\A\d+/
54
+ BackSlashRegex = /\\/
55
+ OtherRegex = /\A./
55
56
 
56
57
  def parse( line )
57
58
  @q = []
@@ -59,18 +60,18 @@ def parse( line )
59
60
  line.strip!
60
61
  until line.empty? do
61
62
  case line
62
- when /\A\[([^\]]+)\]/
63
+ when InorganicRegex
63
64
  # [nH] : aromatic N-H
64
65
  @q.push [:ATOM_SYMBOL, $&] # [m[1], m[2], m[3], m[4], m[5], m[6]]]
65
66
  when ElementRegex
66
67
  prop = {}
67
68
  prop[:is_aromatic] = (97 <= $&[0]) #and 122 < $&[0]
68
69
  @q.push [:SYMBOL, [$&.capitalize.intern , prop]]
69
- when /\A\d+/
70
+ when NumberRegex
70
71
  @q.push [:NUMBER, $& ]
71
- when /\\/
72
+ when BackSlashRegex
72
73
  @q.push [:BSLASH, :BSLASH]
73
- when /\A./
74
+ when OtherRegex
74
75
  @q.push [$&, $&]
75
76
  else
76
77
  raise RuntimeError, 'must not happen'
@@ -78,13 +79,12 @@ def parse( line )
78
79
  line = $'
79
80
  end
80
81
  @q.push [ :EOL, nil ]
81
- # p @q
82
82
  mol = Chem::SmilesMol.new
83
83
  construct(mol, do_parse)
84
84
  mol
85
85
  end
86
86
 
87
- ..end smiles.ry modeval..id6e4480ccfa
87
+ ..end smiles.ry modeval..idae4de8f30f
88
88
 
89
89
  ##### racc 1.4.4 generates ###
90
90
 
@@ -333,10 +333,12 @@ module Chem
333
333
 
334
334
  class SmilesAtom
335
335
  include Atom
336
- attr_accessor :bond, :element, :element, :chiral, :hydrogen_count, :charge, :smiles_pos, :is_aromatic
336
+ attr_accessor :bond, :element, :element, :chiral, :hydrogen_count, :charge, :is_aromatic, :smiles_pos
337
+
337
338
  def inspect
338
- "{%s %s(%s)}" % [@bond, @element, smiles_pos ? smiles_pos.join : ""]
339
+ "{%s %s(%s)}" % [@bond, @element, smiles_pos ? smiles_pos.join("-") : ""]
339
340
  end
341
+
340
342
  end
341
343
 
342
344
  class SmilesBond
@@ -359,14 +361,14 @@ module Chem
359
361
  return if to.bond == '.'
360
362
  bond = SmilesBond.new
361
363
 
362
- case to.bond
363
- when '='
364
- bond.v = 2
365
- when '#'
366
- bond.v = 3
367
- else
368
- bond.v = 1
369
- end
364
+ bond.v = case to.bond
365
+ when '='
366
+ 2
367
+ when '#'
368
+ 3
369
+ else
370
+ 1
371
+ end
370
372
  @edges.push([bond, from, to])
371
373
  end
372
374
 
@@ -0,0 +1,35 @@
1
+
2
+ module Chem
3
+
4
+ module Type
5
+ module GDType
6
+
7
+ def self.detect_file file
8
+ begin
9
+ require 'GD'
10
+ rescue LoadError
11
+ return false
12
+ end
13
+ ['.png', '.gif', '.jpg', '.jpeg', '.tiff'].include?(File.extname(file))
14
+ end
15
+
16
+ # ChemRuby will never parse PNG ;)
17
+ def self.parse file
18
+ raise NotImplementedError
19
+ end
20
+
21
+ def self.detect_type type
22
+ [:gd_png, :gd_gif, :gd_jpeg, :gd_tiff].include?(type)
23
+ end
24
+
25
+ def self.save mol, filename, params = {}
26
+ require 'chem/db/gd.rb'
27
+ GDWriter.save(mol, filename, params)
28
+ end
29
+
30
+ end
31
+ end
32
+
33
+ ChemTypeRegistry << Type::GDType
34
+
35
+ end
@@ -19,9 +19,9 @@ module Chem
19
19
  type == :gspan
20
20
  end
21
21
 
22
- def self.save mol, filename
22
+ def self.save mol, filename, params = {}
23
23
  # require 'chem/db/gspan.rb'
24
- Chem::GSpan.save(mol, filename)
24
+ Chem::GSpan.save(mol, filename, params)
25
25
  end
26
26
  end
27
27
  end
@@ -25,4 +25,23 @@ module Chem
25
25
 
26
26
  ChemTypeRegistry << Type::KCFType
27
27
 
28
+ module KCFRPairType
29
+
30
+ def self.detect_file file
31
+ File.extname(file) == '.kcf' && /A\d+/.match(file)
32
+ end
33
+
34
+ def self.parse file
35
+ require 'chem/db/kcf_rpair'
36
+ Chem::KCF::RPairMolecule.new(File.open(file))
37
+ end
38
+
39
+ def self.detect_type type
40
+ type == :kcf_rpair
41
+ end
42
+
43
+ end
44
+
45
+ ChemTypeRegistry << KCFRPairType
46
+
28
47
  end
@@ -14,6 +14,7 @@ module Chem
14
14
  end
15
15
 
16
16
  def self.parse file
17
+ require 'chem/db/kegg'
17
18
  Chem::KEGG::KeggReactionParser.new file
18
19
  end
19
20
 
@@ -35,6 +36,7 @@ module Chem
35
36
  end
36
37
 
37
38
  def self.parse file
39
+ require 'chem/db/kegg'
38
40
  Chem::KEGG::KeggReactionMapParser.new file
39
41
  end
40
42
 
@@ -20,7 +20,7 @@ module Chem
20
20
  end
21
21
 
22
22
  def self.save mol, filename, params = {}
23
- # require 'chem/db/mdl.rb'
23
+ require 'chem/db/mdl.rb'
24
24
  mol.save_as_mdl(filename)
25
25
  end
26
26
  end
@@ -5,6 +5,11 @@ module Chem
5
5
  module PNGType
6
6
 
7
7
  def self.detect_file file
8
+ begin
9
+ require 'RMagick'
10
+ rescue LoadError
11
+ return false
12
+ end
8
13
  ['.png', '.gif', '.jpg', '.jpeg', '.tiff'].include?(File.extname(file))
9
14
  end
10
15
 
@@ -18,7 +23,6 @@ module Chem
18
23
  end
19
24
 
20
25
  def self.save mol, filename, params = {}
21
- require 'RMagick'
22
26
  require 'chem/db/rmagick.rb'
23
27
  RMagickWriter.save(mol, filename, params)
24
28
  end
@@ -0,0 +1,22 @@
1
+
2
+ module Chem
3
+
4
+ module Type
5
+
6
+ module RDFType
7
+ def self.detect_file file
8
+ File.extname(file) == '.rdf'
9
+ end
10
+
11
+ def self.parse file
12
+ require 'chem/db/mdl.rb'
13
+ end
14
+
15
+ def self.detect_type type
16
+ type == :rdf
17
+ end
18
+ end
19
+
20
+ end
21
+
22
+ end