chemruby 0.9.3 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. data/README +2 -2
  2. data/Rakefile +67 -63
  3. data/ext/extconf.rb +2 -0
  4. data/ext/subcomp.c +461 -320
  5. data/ext/utils.c +56 -0
  6. data/ext/utils.h +13 -0
  7. data/lib/chem.rb +34 -8
  8. data/lib/chem/db.rb +8 -0
  9. data/lib/chem/db/cansmi.rb +1 -1
  10. data/lib/chem/db/cdx.rb +1 -1
  11. data/lib/chem/db/cml.rb +52 -0
  12. data/lib/chem/db/gd.rb +64 -0
  13. data/lib/chem/db/gspan.rb +2 -2
  14. data/lib/chem/db/kcf_rpair.rb +34 -0
  15. data/lib/chem/db/kegg.rb +35 -1
  16. data/lib/chem/db/mdl.rb +75 -34
  17. data/lib/chem/db/opsin.rb +24 -0
  18. data/lib/chem/db/pdb.rb +105 -0
  19. data/lib/chem/db/pdf.rb +2 -0
  20. data/lib/chem/db/pubchem.rb +1071 -88
  21. data/lib/chem/db/rmagick.rb +5 -3
  22. data/lib/chem/db/sdf.rb +28 -2
  23. data/lib/chem/db/smiles/smiles.ry +27 -25
  24. data/lib/chem/db/smiles/smiparser.rb +29 -27
  25. data/lib/chem/db/types/type_gd.rb +35 -0
  26. data/lib/chem/db/types/type_gspan.rb +2 -2
  27. data/lib/chem/db/types/type_kcf.rb +19 -0
  28. data/lib/chem/db/types/type_kegg.rb +2 -0
  29. data/lib/chem/db/types/type_mdl.rb +1 -1
  30. data/lib/chem/db/types/type_png.rb +5 -1
  31. data/lib/chem/db/types/type_rdf.rb +22 -0
  32. data/lib/chem/db/types/type_xyz.rb +1 -1
  33. data/lib/chem/db/vector.rb +19 -3
  34. data/lib/chem/model.rb +5 -2
  35. data/lib/chem/utils.rb +17 -1
  36. data/lib/chem/utils/bitdb.rb +49 -0
  37. data/lib/chem/utils/cas.rb +28 -0
  38. data/lib/chem/utils/cdk.rb +403 -0
  39. data/lib/chem/utils/fingerprint.rb +98 -0
  40. data/lib/chem/utils/geometry.rb +8 -0
  41. data/lib/chem/utils/net.rb +303 -0
  42. data/lib/chem/utils/once.rb +28 -0
  43. data/lib/chem/utils/openbabel.rb +204 -0
  44. data/lib/chem/utils/sssr.rb +33 -25
  45. data/lib/chem/utils/sub.rb +6 -0
  46. data/lib/chem/utils/transform.rb +9 -8
  47. data/lib/chem/utils/ullmann.rb +138 -95
  48. data/lib/graph.rb +5 -6
  49. data/lib/graph/utils.rb +8 -0
  50. data/sample/calc_maximum_common_subgraph.rb +27 -0
  51. data/sample/calc_properties.rb +9 -0
  52. data/sample/data/atp.mol +69 -0
  53. data/sample/data/pioglitazone.mol +58 -0
  54. data/sample/data/rosiglitazone.mol +55 -0
  55. data/sample/data/troglitazone.mol +70 -0
  56. data/sample/find_compound_by_keggapi.rb +19 -0
  57. data/sample/generate_inchi.rb +7 -0
  58. data/sample/generate_substructurekey.rb +11 -0
  59. data/sample/images/ex6.rb +17 -0
  60. data/sample/images/ex7.rb +18 -0
  61. data/sample/iupac2mol.rb +8 -0
  62. data/sample/kekule.rb +13 -0
  63. data/sample/logp.rb +4 -0
  64. data/sample/mcs.rb +13 -0
  65. data/sample/mol2pdf.rb +8 -0
  66. data/sample/pubchem_fetch.rb +8 -0
  67. data/sample/pubchem_search.rb +12 -0
  68. data/sample/rosiglitazone.mol +57 -0
  69. data/sample/smarts.rb +10 -0
  70. data/sample/structure_match.rb +8 -0
  71. data/sample/structure_match_color.rb +22 -0
  72. data/sample/thiazolidinedione.mol +19 -0
  73. data/sample/troglitazone.mol +232 -0
  74. data/sample/vicinity.rb +8 -0
  75. data/test/data/CID_704.sdf +236 -0
  76. data/test/data/CID_994.sdf +146 -0
  77. data/test/data/db_EXPT03276.txt +321 -0
  78. data/test/data/pioglitazone.mol +58 -0
  79. data/test/data/rosiglitazone.mol +55 -0
  80. data/test/data/thiazolidinedione.mol +19 -0
  81. data/test/data/troglitazone.mol +70 -0
  82. data/test/{test_adj.rb → tc_adj.rb} +0 -0
  83. data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
  84. data/test/tc_casrn.rb +17 -0
  85. data/test/tc_cdk.rb +89 -0
  86. data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
  87. data/test/{test_chem.rb → tc_chem.rb} +0 -0
  88. data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
  89. data/test/{test_db.rb → tc_db.rb} +0 -0
  90. data/test/tc_develop.rb +38 -0
  91. data/test/tc_drugbank.rb +13 -0
  92. data/test/{test_eps.rb → tc_eps.rb} +0 -0
  93. data/test/tc_gd.rb +8 -0
  94. data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
  95. data/test/tc_graph.rb +15 -0
  96. data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
  97. data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
  98. data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
  99. data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
  100. data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
  101. data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
  102. data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
  103. data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
  104. data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
  105. data/test/tc_net.rb +5 -0
  106. data/test/tc_once.rb +29 -0
  107. data/test/tc_openbabel.rb +57 -0
  108. data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
  109. data/test/{test_prop.rb → tc_prop.rb} +1 -1
  110. data/test/tc_pubchem.rb +32 -0
  111. data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
  112. data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
  113. data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
  114. data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
  115. data/test/tc_sssr.rb +1 -0
  116. data/test/{test_sub.rb → tc_sub.rb} +0 -0
  117. data/test/tc_subcomp.rb +59 -0
  118. data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
  119. data/test/{test_writer.rb → tc_writer.rb} +0 -0
  120. data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
  121. data/test/ts_current.rb +11 -0
  122. data/test/ts_image.rb +6 -0
  123. data/test/ts_main.rb +12 -0
  124. metadata +259 -194
  125. data/lib/chem/utils/graph_db.rb +0 -146
  126. data/test/test_sssr.rb +0 -18
  127. data/test/test_subcomp.rb +0 -37
@@ -1,146 +0,0 @@
1
-
2
- require 'chem'
3
- require 'dbm'
4
-
5
- module Graph
6
-
7
- class SubGraphDB
8
-
9
- # Create Database object
10
- # @idx : index database for @mat and @typ
11
- # also stores number of nodes.
12
- # @mat : adjacency matrix database
13
- # @typ : node type database
14
- # @dbm : property database
15
- def initialize dbname, mode = "w"
16
- @dbm = DBM.open("#{dbname}.dbm")
17
- @idx = open("#{dbname}.idx", mode)
18
- @mat = open("#{dbname}.mat", mode)
19
- @typ = open("#{dbname}.typ", mode)
20
- end
21
-
22
- def self.open dbname, mode = "r"
23
- if mode == "r"
24
- SubGraphDB.new(dbname, mode)
25
- else
26
- self.new(dbname, mode)
27
- end
28
- end
29
-
30
- def []= key, mol
31
- adj = mol.adj_matrix
32
- @idx.print [mol.nodes.length, @mat.tell, adj.length].pack("i*")
33
- @mat.print adj
34
- @typ.print mol.nodes.inject([]){ |ret, node| ret.push(node.atomic_number)}.pack("i*")
35
- end
36
-
37
- # Closes database
38
- def close
39
- @dbm.close
40
- @idx.close
41
- @mat.close
42
- @typ.close
43
- end
44
-
45
- # Searches molecule from database
46
- # Example:
47
- # db = SubGraphDB.open("somewhere/dbname")
48
- # db.search(SMILES("CCCC"))
49
- def search mol
50
- @idx.rewind
51
- i = 1
52
- until @idx.eof?
53
- n_nodes, mat, len_matrix = @idx.read(4 * 3).unpack("i*")
54
- m = [0xff].pack("c") * 100
55
- open("test.bin", "w").puts m
56
-
57
- matrix = read_mat(mat, len_matrix)
58
- #SubGraphDB.show(m, mol.nodes.length, n_nodes)
59
- if SubGraphDB.match(matrix, n_nodes, mol.adjacency_list, mol.nodes.length, m)
60
- puts "C%05d" % i
61
- end
62
- i += 1
63
- end
64
- end
65
-
66
- private
67
- def read_mat idx, len_matrix
68
- @mat.seek(idx)
69
- @mat.read(len_matrix)
70
- end
71
-
72
- end
73
-
74
- end
75
-
76
- __END__
77
- require 'chem/utils/subgraph/subcomp'
78
-
79
- module Chem
80
- def self.open_db db_name
81
- GraphDB.new(db_name)
82
- end
83
- end
84
-
85
- class GraphDB
86
-
87
- # def self.open db_name
88
- # self.new(db_name)
89
- # end
90
-
91
- # def initialize db_name, mode="w"
92
- # if mode == "a"
93
- # @idx = File.open("#{db_name}.idx", "r+")
94
- # else
95
- # @idx = File.open("#{db_name}.idx", mode)
96
- # end
97
- # @typ = File.open("#{db_name}.typ", mode)
98
- # @dat = File.open("#{db_name}.dat", mode)
99
- # @num = File.open("#{db_name}.num", mode)
100
- # @typ.print("NodeType ISAM")
101
- # @dat.print("Adjacency ISAM")
102
- # end
103
-
104
- def insert mol
105
- n_bytes = mol.nodes.length / ($ARC * 8.0)
106
-
107
- # @typ.flush
108
- # @dat.flush
109
- # @idx.flush
110
-
111
- # record_num = @idx.tell
112
- # @idx.print [@typ.tell, @dat.tell].pack("ii")
113
-
114
- # @typ.print [mol.nodes.length, mol.edges.length].pack("ii")
115
- # @dat.print [n_bytes.ceil].pack("i")
116
-
117
- # @typ.print mol.nodes.collect { |node|
118
- # Chem::Element2Number[node.element]
119
- # }.pack("i*")
120
-
121
- # mol.nodes.each do |k|
122
- # atom_type = mol.atoms[k].setup_graph(i)
123
- # @typ.print [atom_type].pack("i")
124
- # end
125
-
126
-
127
- # @dat.print [n_bytes.ceil].pack("i")
128
- # j = 0
129
- # mol.atoms.keys.sort.each do |k|
130
- # 0.upto(n_bytes.ceil - 1) do |o|
131
- # i = 0
132
- # mol.atoms[k].set_neighbor
133
- # 0.upto($ARC * 8 - 1) do |m|
134
- # i += mol.atoms[k].neighbor.include?(mol.atoms[m + 8 * $ARC * o + 1]) ? 2**m : 0
135
- # end
136
- # @dat.print [i].pack("L")
137
- # end
138
- # j += 1
139
- # end
140
- # #p mol.adjacency_list
141
- # mol.connection.each do |c|
142
- # puts "%040b" % c
143
- # end
144
- #record_num
145
- end
146
- end
@@ -1,18 +0,0 @@
1
- # $Id: test_sssr.rb 65 2005-10-25 17:17:36Z tanaka $
2
-
3
- require 'chem/utils/sssr'
4
-
5
- require 'test/all'
6
- require 'test/ctab_test'
7
-
8
- class SssrTest < Test::Unit::TestCase
9
-
10
- def setup
11
- end
12
-
13
- def test_coronen
14
- coronen = Chem.open_mol($data_dir + '/A_21/coronen.mol')
15
- coronen.find_sssr
16
- end
17
-
18
- end
@@ -1,37 +0,0 @@
1
- # $Id: test_subcomp.rb 101 2006-01-06 22:26:49Z tanaka $
2
-
3
- require 'test/unit'
4
- require 'test/all'
5
-
6
- require 'chem'
7
-
8
- class SubcompTest < Test::Unit::TestCase
9
-
10
- def setup
11
- @mol = Chem.open_mol($data_dir + 'hypericin.mol')
12
- end
13
-
14
- # def test_mol
15
- # mol = Chem.open_mol($data_dir + 'hypericin.mol')
16
- # #assert_equal(mol.nodes.length, mol.match_by_ullmann(mol).length)
17
- # end
18
-
19
- # def test_match_by_atom
20
- # mol2 = Chem.parse_smiles("CCCBrCC")
21
- # mol = Chem.parse_smiles("CCBr")
22
- # assert_equal(mol.nodes.length, mol.match_by_ullmann(mol2).length);
23
- # end
24
-
25
- def test_search
26
- @mol.match_by_ullmann(@mol)
27
- end
28
-
29
- # def test_exhaustive
30
- # cyclopropane = SMILES("C1CCC1")
31
- # match = cyclopropane.match_by_ullmann(cyclopropane)
32
- # assert_equal([0, 1, 2, 3], match)
33
- # match = cyclopropane.match_exhaustively(cyclopropane)
34
- # end
35
-
36
- end
37
-