chemruby 0.9.3 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. data/README +2 -2
  2. data/Rakefile +67 -63
  3. data/ext/extconf.rb +2 -0
  4. data/ext/subcomp.c +461 -320
  5. data/ext/utils.c +56 -0
  6. data/ext/utils.h +13 -0
  7. data/lib/chem.rb +34 -8
  8. data/lib/chem/db.rb +8 -0
  9. data/lib/chem/db/cansmi.rb +1 -1
  10. data/lib/chem/db/cdx.rb +1 -1
  11. data/lib/chem/db/cml.rb +52 -0
  12. data/lib/chem/db/gd.rb +64 -0
  13. data/lib/chem/db/gspan.rb +2 -2
  14. data/lib/chem/db/kcf_rpair.rb +34 -0
  15. data/lib/chem/db/kegg.rb +35 -1
  16. data/lib/chem/db/mdl.rb +75 -34
  17. data/lib/chem/db/opsin.rb +24 -0
  18. data/lib/chem/db/pdb.rb +105 -0
  19. data/lib/chem/db/pdf.rb +2 -0
  20. data/lib/chem/db/pubchem.rb +1071 -88
  21. data/lib/chem/db/rmagick.rb +5 -3
  22. data/lib/chem/db/sdf.rb +28 -2
  23. data/lib/chem/db/smiles/smiles.ry +27 -25
  24. data/lib/chem/db/smiles/smiparser.rb +29 -27
  25. data/lib/chem/db/types/type_gd.rb +35 -0
  26. data/lib/chem/db/types/type_gspan.rb +2 -2
  27. data/lib/chem/db/types/type_kcf.rb +19 -0
  28. data/lib/chem/db/types/type_kegg.rb +2 -0
  29. data/lib/chem/db/types/type_mdl.rb +1 -1
  30. data/lib/chem/db/types/type_png.rb +5 -1
  31. data/lib/chem/db/types/type_rdf.rb +22 -0
  32. data/lib/chem/db/types/type_xyz.rb +1 -1
  33. data/lib/chem/db/vector.rb +19 -3
  34. data/lib/chem/model.rb +5 -2
  35. data/lib/chem/utils.rb +17 -1
  36. data/lib/chem/utils/bitdb.rb +49 -0
  37. data/lib/chem/utils/cas.rb +28 -0
  38. data/lib/chem/utils/cdk.rb +403 -0
  39. data/lib/chem/utils/fingerprint.rb +98 -0
  40. data/lib/chem/utils/geometry.rb +8 -0
  41. data/lib/chem/utils/net.rb +303 -0
  42. data/lib/chem/utils/once.rb +28 -0
  43. data/lib/chem/utils/openbabel.rb +204 -0
  44. data/lib/chem/utils/sssr.rb +33 -25
  45. data/lib/chem/utils/sub.rb +6 -0
  46. data/lib/chem/utils/transform.rb +9 -8
  47. data/lib/chem/utils/ullmann.rb +138 -95
  48. data/lib/graph.rb +5 -6
  49. data/lib/graph/utils.rb +8 -0
  50. data/sample/calc_maximum_common_subgraph.rb +27 -0
  51. data/sample/calc_properties.rb +9 -0
  52. data/sample/data/atp.mol +69 -0
  53. data/sample/data/pioglitazone.mol +58 -0
  54. data/sample/data/rosiglitazone.mol +55 -0
  55. data/sample/data/troglitazone.mol +70 -0
  56. data/sample/find_compound_by_keggapi.rb +19 -0
  57. data/sample/generate_inchi.rb +7 -0
  58. data/sample/generate_substructurekey.rb +11 -0
  59. data/sample/images/ex6.rb +17 -0
  60. data/sample/images/ex7.rb +18 -0
  61. data/sample/iupac2mol.rb +8 -0
  62. data/sample/kekule.rb +13 -0
  63. data/sample/logp.rb +4 -0
  64. data/sample/mcs.rb +13 -0
  65. data/sample/mol2pdf.rb +8 -0
  66. data/sample/pubchem_fetch.rb +8 -0
  67. data/sample/pubchem_search.rb +12 -0
  68. data/sample/rosiglitazone.mol +57 -0
  69. data/sample/smarts.rb +10 -0
  70. data/sample/structure_match.rb +8 -0
  71. data/sample/structure_match_color.rb +22 -0
  72. data/sample/thiazolidinedione.mol +19 -0
  73. data/sample/troglitazone.mol +232 -0
  74. data/sample/vicinity.rb +8 -0
  75. data/test/data/CID_704.sdf +236 -0
  76. data/test/data/CID_994.sdf +146 -0
  77. data/test/data/db_EXPT03276.txt +321 -0
  78. data/test/data/pioglitazone.mol +58 -0
  79. data/test/data/rosiglitazone.mol +55 -0
  80. data/test/data/thiazolidinedione.mol +19 -0
  81. data/test/data/troglitazone.mol +70 -0
  82. data/test/{test_adj.rb → tc_adj.rb} +0 -0
  83. data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
  84. data/test/tc_casrn.rb +17 -0
  85. data/test/tc_cdk.rb +89 -0
  86. data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
  87. data/test/{test_chem.rb → tc_chem.rb} +0 -0
  88. data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
  89. data/test/{test_db.rb → tc_db.rb} +0 -0
  90. data/test/tc_develop.rb +38 -0
  91. data/test/tc_drugbank.rb +13 -0
  92. data/test/{test_eps.rb → tc_eps.rb} +0 -0
  93. data/test/tc_gd.rb +8 -0
  94. data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
  95. data/test/tc_graph.rb +15 -0
  96. data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
  97. data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
  98. data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
  99. data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
  100. data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
  101. data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
  102. data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
  103. data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
  104. data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
  105. data/test/tc_net.rb +5 -0
  106. data/test/tc_once.rb +29 -0
  107. data/test/tc_openbabel.rb +57 -0
  108. data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
  109. data/test/{test_prop.rb → tc_prop.rb} +1 -1
  110. data/test/tc_pubchem.rb +32 -0
  111. data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
  112. data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
  113. data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
  114. data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
  115. data/test/tc_sssr.rb +1 -0
  116. data/test/{test_sub.rb → tc_sub.rb} +0 -0
  117. data/test/tc_subcomp.rb +59 -0
  118. data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
  119. data/test/{test_writer.rb → tc_writer.rb} +0 -0
  120. data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
  121. data/test/ts_current.rb +11 -0
  122. data/test/ts_image.rb +6 -0
  123. data/test/ts_main.rb +12 -0
  124. metadata +259 -194
  125. data/lib/chem/utils/graph_db.rb +0 -146
  126. data/test/test_sssr.rb +0 -18
  127. data/test/test_subcomp.rb +0 -37
data/README CHANGED
@@ -52,7 +52,7 @@ For testing and developing ChemRuby:
52
52
 
53
53
  == INSTALL
54
54
 
55
- In the chemruby source directory (such as chemruby-x.x.x/), run install.rb
55
+ In the chemruby source directory (such as chemruby-x.x.x/), run setup.rb
56
56
  as follows:
57
57
 
58
58
  % ruby setup.rb config
@@ -109,7 +109,7 @@ Note that, setup.rb included in the ChemRuby package comes from
109
109
 
110
110
  License of This README file can be also distributed under the Ruby's license.
111
111
 
112
- Copyright (C) 2006 TANAKA Nobuya <tanaka@chemruby.org>
112
+ Copyright (C) 2006 TANAKA Nobuya <t@chemruby.org>
113
113
  KATAYAMA Toshiaki <k@bioruby.org>
114
114
 
115
115
  == CONTACT
data/Rakefile CHANGED
@@ -8,12 +8,15 @@
8
8
 
9
9
  require 'rake/clean'
10
10
  require 'rake/testtask'
11
- require 'rake/gempackagetask'
11
+
12
+ require "rake/gempackagetask"
13
+ require 'rubygems'
12
14
 
13
15
  task :default => [:help]
14
16
 
15
- PKG_VERSION = "0.9.3"
16
17
  PKG_BUILD = "RC1"
18
+ PKG_VERSION = "1.1.9"
19
+
17
20
 
18
21
  PKG_FILES = FileList[
19
22
  "Rakefile", "README", #"ChangeLog", "Releases", "TODO",
@@ -26,6 +29,7 @@ PKG_FILES = FileList[
26
29
  "lib/**/*.rb",
27
30
  "lib/**/*.ry",
28
31
  "test/**/*",
32
+ "temp/",
29
33
  "sample/**/*.rb",
30
34
  "sample/**/*.mol",
31
35
  "ext/**/*.h",
@@ -37,76 +41,30 @@ PKG_FILES = FileList[
37
41
  # "test/**/*"
38
42
  ]
39
43
 
40
- task :help do |t|
41
- puts <<EOL
42
-
43
- ChemRuby #{PKG_VERSION}
44
-
45
- To install ChemRuby, you need at least
46
-
47
- * ruby-1.8.2 (or later)
48
- * Ruby header files (included in original Ruby)
49
- * C language compilers (such as gcc)
50
-
51
- If the following modules are installed, ChemRuby will use it.
52
- You can install them later.
53
-
54
- * RMagick ( You will find how to install them in http://www.chemruby.org)
55
-
56
- == Compiling and Installing
57
-
58
- % rake compile
59
- % sudo rake install
60
-
61
- or just
62
-
63
- % sudo ruby setup.rb
64
-
65
- == Compiling RDOC
66
-
67
- % rake doc
68
-
69
- == Test
70
-
71
- % rake test
72
-
73
- You will need RMagick and other libraries to pass all the tests.
74
-
75
- EOL
76
-
77
- end
78
-
79
44
  task :doc do |t|
80
45
  system "rdoc --main README ./lib README"
81
46
  end
82
47
 
83
48
 
84
- task :dev => [:test]
49
+ task :dev => [:compile]
85
50
  Rake::TestTask.new(:dev) do |t|
86
51
  t.libs << File.join('ext')
87
52
  t.libs << File.join('lib')
88
- t.libs << File.join('dev/lib')
89
- t.libs << File.join('dev/ext')
90
- # cd 'dev/ext/chem/db/inchi/' do
91
- # ruby %{extconf.rb}
92
- # sh "make"
93
- # end
94
- t.test_files = FileList['dev/test/test*.rb']
53
+ t.test_files = FileList['test/ts_current.rb']
95
54
  end
96
55
 
97
56
  task :test => [:compile]
98
57
  Rake::TestTask.new(:test) do |t|
99
58
  t.libs << File.join('ext')
100
59
  t.libs << File.join('lib')
101
- t.test_files = FileList['test/test*.rb']
60
+ t.test_files = FileList['test/ts_main.rb']
102
61
  end
103
62
 
104
63
  task :light => [:compile]
105
64
  Rake::TestTask.new(:light) do |t|
106
65
  t.libs << File.join('ext')
107
66
  t.libs << File.join('lib')
108
- t.test_files = FileList['test/test_subcomp.rb']
109
- #'test/test_kegg.rb'# 'test/test_kcf_glycan.rb' #FileList['test/test_canonical_smiles.rb']
67
+ t.test_files = FileList['test/tc_sssr.rb']
110
68
  end
111
69
 
112
70
  task :rm do
@@ -179,17 +137,63 @@ end
179
137
  desc "Compiling library"
180
138
  task :compile => ['lib/chem/db/smiles/smiparser.rb', 'lib/chem/db/iupac/iuparser.rb', 'lib/chem/db/linucs/linparser.rb', "ext/subcomp.#{Config::CONFIG["DLEXT"]}"]
181
139
 
182
- spec = Gem::Specification.new do |s|
183
- s.name = 'chemruby'
184
- s.version = PKG_VERSION
185
- s.require_path = 'lib'
186
- s.autorequire = 'chem'
187
- s.files = PKG_FILES
188
- s.extensions << 'ext/extconf.rb'
189
- s.summary = "A framework program for cheminformatics"
140
+ begin
141
+ require 'rake/gempackagetask'
142
+
143
+ spec = Gem::Specification.new do |s|
144
+ s.name = 'chemruby'
145
+ s.version = PKG_VERSION
146
+ s.require_path = 'lib'
147
+ s.autorequire = 'chem'
148
+ s.files = PKG_FILES
149
+ s.extensions << 'ext/extconf.rb'
150
+ s.summary = "A framework program for cheminformatics"
151
+ end
152
+
153
+ Rake::GemPackageTask.new(spec) do |pkg|
154
+ pkg.need_tar = true
155
+ pkg.need_tar_gz = true
156
+ pkg.package_files += PKG_FILES
157
+ end
158
+ rescue
159
+ puts 'Install RubyGems to make gem'
190
160
  end
191
161
 
192
- Rake::GemPackageTask.new(spec) do |pkg|
193
- pkg.need_tar = true
194
- pkg.package_files += PKG_FILES
162
+ task :help do |t|
163
+ puts <<EOL
164
+
165
+ ChemRuby #{PKG_VERSION}
166
+
167
+ To install ChemRuby, you need at least
168
+
169
+ * ruby-1.8.2 (or later)
170
+ * Ruby header files (included in original Ruby)
171
+ * C language compilers (such as gcc)
172
+
173
+ If the following modules are installed, ChemRuby will use it.
174
+ You can install them later.
175
+
176
+ * RMagick ( You will find how to install them in http://www.chemruby.org)
177
+
178
+ == Compiling and Installing
179
+
180
+ % rake compile
181
+ % sudo rake install
182
+
183
+ or just
184
+
185
+ % sudo ruby setup.rb
186
+
187
+ == Compiling RDOC
188
+
189
+ % rake doc
190
+
191
+ == Test
192
+
193
+ % rake test
194
+
195
+ You will need RMagick and other libraries to pass all the tests.
196
+
197
+ EOL
198
+
195
199
  end
@@ -1,4 +1,6 @@
1
1
 
2
2
  require 'mkmf'
3
3
 
4
+ # $CFLAGS = " -g -lefence"
5
+
4
6
  create_makefile("subcomp")
@@ -4,413 +4,554 @@
4
4
 
5
5
  $Author: nobyt $
6
6
 
7
- Copyright (C) 2004-2006 Nobuya Tanaka
7
+ Copyright (C) 2004-2007 Nobuya Tanaka
8
8
 
9
9
  **********************************************************************/
10
10
 
11
- #define FULL 0xffffffff
12
- #define ZERO 0x0
13
-
14
- #define FAIL 0;
15
- #define SUCCESS 1;
16
-
17
11
  #include <ruby.h>
12
+ // #include "bitdb.h"
13
+ #include "utils.h"
18
14
 
19
15
  static void
20
- show(long *m, int pa, int pb)
21
- {
22
- int i, j, k;
23
- static int count = 0;
24
- int n_words;
25
-
26
- n_words = (pb - 1) / (sizeof(int) * 8) + 1;
16
+ show(long * l, int h, int w){
17
+ int i, j;
18
+ int counter = 0;
19
+ int n_bytes;
27
20
 
28
- //printf("count : %3d\n", count++);
21
+ n_bytes = NBYTES(w);
29
22
 
30
- printf("\n ");
31
- for(i = 0 ; i < pb ; i++){
23
+ printf(" ");
24
+ for(i = 0 ; i < w ; i++){
32
25
  printf("%d", i % 10);
33
26
  }
34
27
  printf("\n");
35
- for(i = 0 ; i < pa * n_words ; i += n_words){
36
- printf("%d ", (i / n_words) % 10);
37
- for(k = 0 ; k < n_words ; k++){
38
- for(j = k * 32 ; j < ((k + 1) * 32 < pb ? (k + 1) * 32 : pb) ; j++){
39
- if(m[i + k ] & (1 << (j - k * 32)))
40
- printf("@");
41
- else
42
- printf(".");
43
- }
44
- //printf(" ");
28
+
29
+ for(i = 0 ; i < h ; i++){
30
+ printf("%3d ", i);
31
+ for(j = 0 ; j < n_bytes ; j++){
32
+ dump_long(l[counter], (j == n_bytes - 1) ? ((w - 1) % ARCH + 1) : ARCH);
33
+ counter++;
45
34
  }
46
35
  printf("\n");
47
36
  }
48
- printf("\n");
49
37
  }
50
38
 
51
- /*
52
- * call-seq:
53
- * SubGraphDB.show -> print out adjacency matrix
54
- *
55
- * This function is mainly for debug.
56
- */
57
-
58
- static VALUE
59
- subcomp_show(VALUE self, VALUE str, VALUE pa, VALUE pb)
39
+ static FILE *
40
+ db_file_open(const char * filename, const char * extension)
60
41
  {
61
- printf("subcomp_show called %3d %3d\n", FIX2INT(pa), FIX2INT(pb));
62
- show((long * )RSTRING(str)->ptr, FIX2INT(pa), FIX2INT(pb));
63
- return Qnil;
42
+ FILE * fp;
43
+ char new_filename[50];
44
+
45
+ strncpy(new_filename, filename, sizeof(new_filename) - 5);
46
+ strncat(new_filename, extension, sizeof(new_filename) - strlen(extension) - 1);
47
+
48
+ fp = fopen(new_filename, "r");
49
+
50
+ if(fp == NULL){
51
+ rb_raise(rb_eException, "File can not open");
52
+ }
53
+ return fp;
64
54
  }
65
55
 
56
+ struct CompoundDB{
57
+ FILE * mat;
58
+ FILE * idx;
59
+ FILE * typ;
60
+ };
66
61
 
67
- /*
68
- * returns number of trailing zero of m-bit
69
- */
70
- static int ntz_m(long *y, int pb){
71
- int i = 0;
72
- int n;
73
- long x;
62
+ struct Query{
63
+ int len;
64
+ int edge_len;
74
65
 
75
- n = 1;
66
+ long * type;
67
+ int ** ptr;
68
+ int * num;
69
+ int * idx;
70
+ };
76
71
 
77
- while(i < pb && y[i] == 0){
78
- n += 32;
79
- i++;
80
- }
72
+ struct Target{
73
+ int n_bits;
74
+ int n_bytes;
81
75
 
82
- x = y[i];
76
+ int max_length;
83
77
 
84
- if((x & 0x0000FFFF) == 0) {n = n + 16 ; x = x >> 16;}
85
- if((x & 0x000000FF) == 0) {n = n + 8 ; x = x >> 8;}
86
- if((x & 0x0000000F) == 0) {n = n + 4 ; x = x >> 4;}
87
- if((x & 0x00000003) == 0) {n = n + 2 ; x = x >> 2;}
88
- return n - (x & 1);
89
- }
78
+ long * mat;
79
+ long * typ;
80
+ };
81
+
82
+ struct State{
83
+ int height;
84
+ int width;
85
+ int n_bytes;
86
+
87
+ int max_length;
88
+ int length;
89
+ long * mat;
90
+ int depth;
91
+
92
+ long * res;
93
+ int res_counter;
94
+ int res_max_len;
95
+ };
96
+
97
+ struct Record{
98
+ int n_bits;
99
+ int n_bytes;
100
+ int mat_pos;
101
+ int information;
102
+ };
90
103
 
91
- static int ntz(long x){
92
- int n;
104
+ query_dump(struct Query * query){
105
+ int i, j;
93
106
 
94
- if (x == 0) return (32);
95
- n = 1;
96
- if((x & 0x0000FFFF) == 0) {n = n + 16 ; x = x >> 16;}
97
- if((x & 0x000000FF) == 0) {n = n + 8 ; x = x >> 8;}
98
- if((x & 0x0000000F) == 0) {n = n + 4 ; x = x >> 4;}
99
- if((x & 0x00000003) == 0) {n = n + 2 ; x = x >> 2;}
100
- return n - (x & 1);
107
+ for(i = 0 ; i < query->len ; i++){
108
+ for(j = 0 ; j < query->num[i] ; j++){
109
+ printf("query->ptr[%d][%d] = %d\n", i, j, query->ptr[i][j]);
110
+ }
111
+ }
101
112
  }
102
113
 
103
- static int ntz_n_words(long * x, int n_words){
104
- int i;
105
- int words = 0;
106
- for(i = 0 ; x[i] == 0 && i < n_words ; i++){
107
- words += 32;
114
+ static void
115
+ target_free_db(struct Target * target)
116
+ {
117
+ free(target->mat);
118
+ target->mat = NULL;
119
+ free(target->typ);
120
+ target->typ = NULL;
121
+ }
122
+
123
+ static void
124
+ target_setup_db(struct Target * target, struct Record * record)
125
+ {
126
+ target->n_bits = record->n_bits;
127
+ target->n_bytes = record->n_bytes;
128
+ if(target->max_length < (record->n_bits * record->n_bytes)){
129
+ if(target->max_length != 0){ target_free_db(target); }
130
+
131
+ target->mat = talloc(sizeof(long) * record->n_bits * record->n_bytes);
132
+ target->typ = talloc(sizeof(long) * record->n_bits);
133
+ target->max_length = record->n_bits * record->n_bytes;
108
134
  }
109
- return ntz(x[i]) + words;
110
135
  }
111
136
 
112
- long bit_mask[32] = {
113
- 0x1, 0x2, 0x4, 0x8,
114
- 0x10, 0x20, 0x40, 0x80,
115
- 0x100, 0x200, 0x400, 0x800,
116
- 0x1000, 0x2000, 0x4000, 0x8000,
117
- 0x10000, 0x20000, 0x40000, 0x80000,
118
- 0x100000, 0x200000, 0x400000, 0x800000,
119
- 0x1000000, 0x2000000, 0x4000000, 0x8000000,
120
- 0x10000000, 0x20000000, 0x40000000, 0x80000000,
121
- };
137
+ static void
138
+ state_push_result(struct State * state)
139
+ {
140
+ if(state->res_max_len < state->res_counter){
141
+ state->res_max_len = state->res_max_len * 2;
142
+ state->res = (long *) trealloc(state->res, state->res_max_len);
143
+ }
144
+ memcpy(state->res + state->res_counter * state->length * sizeof(long),
145
+ state->mat,
146
+ state->height * state->n_bytes * sizeof(long));
147
+ state->res_counter++;
148
+ }
122
149
 
123
- long reverse_bit[32] = {
124
- 0xfffffffe,
125
- 0xfffffffd,
126
- 0xfffffffb,
127
- 0xfffffff7,
128
- 0xffffffef,
129
- 0xffffffdf,
130
- 0xffffffbf,
131
- 0xffffff7f,
132
- 0xfffffeff,
133
- 0xfffffdff,
134
- 0xfffffbff,
135
- 0xfffff7ff,
136
- 0xffffefff,
137
- 0xffffdfff,
138
- 0xffffbfff,
139
- 0xffff7fff,
140
- 0xfffeffff,
141
- 0xfffdffff,
142
- 0xfffbffff,
143
- 0xfff7ffff,
144
- 0xffefffff,
145
- 0xffdfffff,
146
- 0xffbfffff,
147
- 0xff7fffff,
148
- 0xfeffffff,
149
- 0xfdffffff,
150
- 0xfbffffff,
151
- 0xf7ffffff,
152
- 0xefffffff,
153
- 0xdfffffff,
154
- 0xbfffffff,
155
- 0x7fffffff,
156
- };
150
+ static VALUE
151
+ state_get_result(struct State * state)
152
+ {
153
+ VALUE result_array;
154
+ VALUE tmp;
155
+ int i, j;
156
+ int counter;
157
+
158
+ result_array = rb_ary_new();
159
+
160
+ for(i = 0 ; i < state->res_counter ; i++){
161
+ tmp = rb_ary_new();
162
+ counter = i * state->n_bytes * state->height * sizeof(long);
163
+ for(j = 0 ; j < state->height ; j++){
164
+ rb_ary_push(tmp,
165
+ INT2FIX(m_ntz(state->res + counter + j * state->n_bytes,
166
+ state->n_bytes)));
167
+ }
168
+ rb_ary_push(result_array, tmp);
169
+ }
170
+ return result_array;
171
+ }
157
172
 
158
- //int matchN(ADJACENCY *adj_ptr, long *b, long *m, int pa, int pb)
159
- static int matchN(const int * num_adj, long ** point, long *b, long *m, int pa, int pb)
173
+ static void
174
+ state_free(struct State * state)
160
175
  {
161
- long * mm;// current matrix
162
- long f[1000];//which columns has been used at an intermediate state of computing
163
- long h[100];// pb < 100 * 32
176
+ free(state->mat);
177
+ free(state->res);
178
+ state->mat = NULL;
179
+ }
164
180
 
165
- int d;// depth for matrix
166
- int k;// width for matrix
167
- int dd;// depth of matrix in refinement step
168
- int kk;// width of matrix in refinement step
181
+ static void
182
+ state_allocate(struct State * state, struct Query * query, struct Target * target)
183
+ {
184
+ int i;
169
185
 
170
- int i, j;//temp
171
- long l;// temp
186
+ state->height = query->len;
187
+ state->width = target->n_bits;
188
+ state->n_bytes = target->n_bytes;
189
+ state->res_counter = 0;
172
190
 
173
- short vflag;//valid check flag
174
- int n_words;// number of words needed for storing 'pb' bits.
175
- long refine_mm;// pointer for mm(match matrix) used in refinment step.
191
+ if(state->max_length < query->len * target->n_bytes){
176
192
 
177
- d = k = 0;
178
- // start back track
179
- for(i = 0 ; i < (pb / 32 + 1) ; i++)
180
- h[i] = 0;
181
- for(i = 0 ; i < 10 ; i++)
182
- f[i] = 0;
193
+ if(state->max_length != 0){
194
+ printf("state->free called max_length : %d\n", state->max_length);
195
+ state_free(state);
196
+ }
183
197
 
184
- n_words = (pb - 1) / (sizeof(int) * 8) + 1;
198
+ state->mat = (long *)talloc((query->len + 2) *// Depth
199
+ target->n_bytes * // Width
200
+ state->height * // Height
201
+ sizeof(long)); // sizeof(long)
202
+
203
+ state->res_max_len = (query->len + 2) *// Depth
204
+ target->n_bytes * // Width
205
+ state->height * // Height
206
+ sizeof(long) * 100;
207
+ state->res = (long *)talloc(state->res_max_len); // sizeof(long)
208
+ state->max_length = query->len * target->n_bytes;
209
+ }
210
+ state->length = query->len * target->n_bytes;
211
+ state->depth = -1;
185
212
 
186
- /* show(b, pb, pb); */
187
- /* show(m, pa, pb); */
213
+ for(i = 0 ; i < state->length ; i++){ state->mat[i] = 0;}
214
+ }
188
215
 
189
- if( d == 0 && k == 0){
190
- k = ntz_n_words(m, n_words);
191
- h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
192
- }
193
- while(k <= pb && d <= pa){
194
- /* printf("d : %3d k : %3d n_words : %3d\n", d, k, n_words); */
195
- if(d < 0){
196
- printf("d < 0 return \n");
197
- return FAIL;
216
+ static void
217
+ state_setup(struct State * state, struct Query * query, struct Target * target)
218
+ {
219
+ int i, j;
220
+ for(i = 0 ; i < query->len ; i++){
221
+ for(j = 0 ; j < target->n_bits ; j++){
222
+ if (query->type[i] == target->typ[j]){
223
+ BITON(state->mat, i, j, target->n_bytes);
224
+ }
198
225
  }
226
+ }
227
+ }
199
228
 
200
- // Idea for optimization :
201
- // instead of using following equation, just (mm = mm + len) and (mm = mm - len).
202
- mm = m + pa * (d + 1) * n_words;
203
- /* printf("pa : %d d : %d k : %d n_words : %d hint : %d\n", pa, d, k, n_words, pa * (d + 1) * n_words); */
204
-
205
- //printf("ntz : %d\n", ntz(mm));
206
- //k = ntz(mm + d);
207
- // set (k, d) bit '1', clear k-column and d-row '0'
208
- /* printf("k : %d d: %d\n", k, d); */
209
- for(j = 0 ; j < n_words ; j++){
210
- if(j == (k / 32)){
211
- for(i = 0 ; i < pa ; i++){
212
- mm[i * n_words + j] = mm[(i - pa) * n_words + j] & reverse_bit[k - (k / 32) * 32];
213
- }
214
- mm[d * n_words + j] = bit_mask[k - (k / 32) * 32];
215
- }else{
216
- for(i = 0 ; i < pa ; i++){
217
- mm[i * n_words + j] = mm[(i - pa) * n_words + j];
218
- }
219
- mm[d * n_words + j] = ZERO;
229
+ static void
230
+ state_setup_block(struct State * state)
231
+ {
232
+ int i, j;
233
+ for(i = 0 ; i < state->height ; i++){
234
+ for(j = 0 ; j < state->width ; j++){
235
+ if (rb_yield_values(2, INT2FIX(i), INT2FIX(j))){
236
+ BITON(state->mat, i, j, state->n_bytes);
220
237
  }
221
238
  }
222
- // BEGIN
223
- /* show(mm, pa, pb); */
224
- // END
225
-
226
- // Refinement step
227
- // Hot Spot!!
228
- dd = kk = 0;
229
- /* printf("before refinement step \n"); */
230
- /* show(mm, pa, pb); */
231
-
232
- while(dd != pa){
233
- while(kk != pb){
234
- //Idea for optimization :
235
- //refine_mm should not updated 1 / 32 times.mm[dd + ((kk - 1) / 32)]
236
-
237
- //Idea for optimization :
238
- // when mm is sparse there may be better algorithm
239
- // for searching '1' bit.
240
- if(mm[dd * n_words + ((kk - 1) / 32)] & bit_mask[kk - ((kk - 1) / 32) * 32]){
241
- // Following loop can be flattened
242
- for(i = 0 ; i < num_adj[dd] ; i++){
243
- l = 0;
244
- for(j = 0 ; j < n_words ; j++){
245
- l |= (b[kk * n_words + j] & mm[point[dd][i] * n_words + j]);
239
+ }
240
+ }
241
+
242
+ static void
243
+ state_push(struct State * state)
244
+ {
245
+ memmove(state->mat + state->length,
246
+ state->mat,
247
+ state->length * sizeof(long) );
248
+ state->mat += state->length;
249
+ state->depth++;
250
+ }
251
+
252
+ static void
253
+ state_pop(struct State * state)
254
+ {
255
+ state->mat -= state->length;
256
+ state->depth--;
257
+ }
258
+
259
+ inline static long
260
+ has_bit(long * mat, int height, int width, int n_bytes){
261
+ return (mat[height * n_bytes + width / ARCH] & (1 << (width % ARCH)));
262
+ }
263
+
264
+ /*
265
+ * Hot spot
266
+ */
267
+ inline static void
268
+ refine(struct State * state, struct Query * query, struct Target * target){
269
+ int i, j, k, l, m, bit_removed;
270
+ bit_removed = 1;
271
+ while(bit_removed){
272
+ bit_removed = 0;// false
273
+ for(i = 0 ; i < query->len ; i++){
274
+ for(j = 0 ; j < target->n_bits ; j++){
275
+ if(has_bit(state->mat, i, j, target->n_bytes)){
276
+ for(k = 0 ; k < query->num[i] ; k++){
277
+ m = 0;
278
+ for(l = 0 ; l < target->n_bytes ; l++){
279
+ if((state->mat[query->ptr[i][k] * target->n_bytes + l] &
280
+ target->mat[j * target->n_bytes + l]) != 0){
281
+ m++;
282
+ }
246
283
  }
247
- if(l == 0){
248
- mm[dd * n_words + (kk / 32)] &= reverse_bit[kk - (kk / 32) * 32];//remove bit
249
- /* break;//quit for loop */
284
+ if(m == 0){
285
+ BITOFF(state->mat, i, j, target->n_bytes);
286
+ bit_removed = 1;
250
287
  }
251
288
  }
252
-
253
289
  }
254
- kk++;
255
290
  }
256
- // Idea for optimization
257
- // every 32 bit is tested here.
258
- kk = 0;
259
- dd++;
260
291
  }
261
- /* show(mm, pa, pb); */
262
-
263
- //Checking whether match matrices are valid.
264
- // Subgraph isomorphism can be checked here before reaching d == pa.
265
- vflag = SUCCESS;
266
- for(i = 0 ; i < pa ; i++){
267
- l = 0;
268
- for(j = 0 ; j < n_words ; j++){
269
- l |= mm[i * n_words + j];
270
- }
271
- if(l == 0){
272
- vflag = FAIL;
273
- break;
292
+ }
293
+ }
294
+
295
+ static void
296
+ state_clear_bits(long * l, int h, int w, int n_bytes, int height){
297
+ int i;
298
+ for(i = 0 ; i < n_bytes ; i++){ l[i + h * n_bytes] = 0; }
299
+ for(i = 0 ; i < height ; i++){ BITOFF(l, i, w, n_bytes); }
300
+ BITON(l, h, w, n_bytes);
301
+ }
302
+
303
+ #define TRUE 1
304
+ #define FALSE 0
305
+
306
+ inline static int
307
+ state_is_valid(struct State * state){
308
+ int i, j, n_bytes, flag;
309
+ // n_bytes = NBYTES(state->length);
310
+ for(i = 0 ; i < state->height ; i++){
311
+ flag = 0;
312
+ for(j = 0 ; j < state->n_bytes ; j++){
313
+ if(state->mat[i * state->n_bytes + j] != 0){
314
+ flag++;
274
315
  }
275
316
  }
317
+ if(flag == 0)
318
+ return FALSE;
319
+ }
320
+ return TRUE;
321
+ }
276
322
 
277
- if(vflag){// Success
278
- f[d] = k;
279
- k = 0;
280
- while(h[k / 32] & bit_mask[k - (k / 32) * 32])
281
- k++;
282
- d++;
283
- if(d == pa){
284
- /* show(mm, pa, pb); */
285
- //printf("FOUND! d : %d\n", d);
286
- return SUCCESS;
287
- }
288
- else{
289
- h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
290
- }
291
- }else{//Failed
292
- h[k / 32] &= reverse_bit[k - (k / 32) * 32];//remove bit
293
- k++;
294
- //printf("d : %d k : %d\n", d, k);
295
- while((h[k / 32] & bit_mask[k - (k / 32) * 32] ||
296
- (m[d * n_words + (k / 32)] & bit_mask[k - (k / 32) * 32] ) == 0) &&
297
- k < pb)
298
- k++;
299
- /* printf("d : %d k : %d\n", d, k); */
300
- while(k > pb){
301
- if(d == 0){
302
- return FAIL;
323
+ static void
324
+ search_by_ullmann(struct State * state, struct Query * query, struct Target * target){
325
+ int k;
326
+ // Idea for optimization
327
+ //show(state->mat, query->len, target->n_bits);
328
+ if(state->depth == state->height - 1){
329
+ //printf("FOUND!\n");
330
+ state_push_result(state);
331
+ //show(state->mat, query->len, target->n_bits);
332
+ }else{
333
+ for(k = 0 ; k < target->n_bits ; k++){
334
+ if(has_bit(state->mat,
335
+ state->depth + 1,
336
+ k,
337
+ target->n_bytes)){
338
+ state_push(state);
339
+ state_clear_bits(state->mat, state->depth, k, target->n_bytes, query->len);
340
+ //show(state->mat, query->len, target->n_bits);
341
+ refine(state, query, target);
342
+ //show(state->mat, query->len, target->n_bits);
343
+ if(state_is_valid(state) == TRUE){
344
+ //show(state->mat, query->len, target->n_bits);
345
+ search_by_ullmann(state, query, target);
303
346
  }
304
- d--;
305
- k = f[d];
306
- h[k / 32] &= reverse_bit[k - (k / 32) * 32];//remove bit
307
- k++;
308
- while(h[k / 32] & bit_mask[k - (k / 32) * 32])
309
- k++;
347
+ state_pop(state);
310
348
  }
311
- h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
312
349
  }
313
350
  }
314
- //printf("d : %d k : %d FAIL!\n", d, k);
315
- return FAIL;
316
351
  }
317
352
 
318
- static void set_adjacency(int * num_adj, long ** point, long * adj, VALUE ret){
319
- int i, j, n_words;
320
- int off_set = 0;
353
+ static void
354
+ db_load(struct CompoundDB * db, struct Query * query){
355
+
356
+ int new_n_bits;
357
+ int new_n_bytes;
358
+ int mat_ptr;
359
+
360
+ struct Target target;
361
+ struct State state;
362
+ struct Record record;
363
+
364
+ int i, j;
321
365
 
322
- n_words = (RARRAY(ret)->len - 1) / (sizeof(int) * 8) + 1;
366
+ target.n_bits = 0;
367
+ target.n_bytes = 0;
368
+ target.max_length = 0;
369
+ state.max_length = 0;
323
370
 
324
- for(i = 0 ; i < RARRAY(ret)->len ; i++){
325
- num_adj[i] = FIX2INT(rb_funcall(RARRAY(ret)->ptr[i], rb_intern("length"), 0));
326
- point[i] = adj + off_set;
327
- for(j = 0 ; j < RARRAY(RARRAY(ret)->ptr[i])->len ; j++){
328
- adj[off_set++] = FIX2INT(RARRAY(RARRAY(ret)->ptr[i])->ptr[j]);
329
- //printf(" %d ", FIX2INT(RARRAY(RARRAY(ret)->ptr[i])->ptr[j]));
371
+ for(;;){
372
+ if(feof(db->idx) || feof(db->mat) || feof(db->mat)){
373
+ printf("Database broken!\n");
374
+ return;
375
+ }
376
+
377
+ fread(& record, sizeof(struct Record), 1, db->idx);
378
+ if(record.n_bits == -1){
379
+ return;
380
+ }
381
+ target_setup_db(& target, & record);
382
+ if(record.information != -1){
383
+
384
+ fread(target.mat, sizeof(long), target.n_bits * target.n_bytes, db->mat);
385
+ fread(target.typ, sizeof(long), target.n_bits, db->typ);
386
+
387
+ state_allocate(& state, query, & target);
388
+ state_setup(& state, query, & target);
389
+ //show(state.mat, query->len, target.n_bits);
390
+ search_by_ullmann(& state, query, & target);
391
+ }else{
392
+ fread(target.typ, sizeof(long), target.n_bytes, db->typ);
393
+ printf("atom_number : %d\n", target.typ[0]);
330
394
  }
331
- //printf("\n");
332
395
  }
396
+ target_free_db(& target);
397
+ state_free(& state);
333
398
  }
334
399
 
335
- static VALUE subcomp_match_by_ullmann(VALUE self, VALUE a_matrix, VALUE pa, VALUE other_adj, VALUE pb, VALUE match){
336
- // variables for adjacency list of graph A
337
- int num_adj[1000];
338
- long * point[1000];
339
- long adj[3000];//adjacency list
400
+ static void
401
+ query_setup(VALUE mol, struct Query * query){
402
+ VALUE atom_type_str;
403
+ VALUE adj_index;
404
+ VALUE edges;
340
405
 
341
- // match matrix; = pa * (n_words * pa)
342
- long * mm;//[800000];
343
- long * m;
406
+ int i, j, k;
344
407
 
345
- //temporary variables
346
- int i;
347
- int result;
348
- VALUE mapping;
408
+ // allocating and setting atom type
409
+ atom_type_str = rb_funcall(mol, rb_intern("typ_str"), 0);
410
+ Check_Type(atom_type_str, T_STRING);
349
411
 
350
- int n_pb, n_pa;
351
- int n_words;
352
- int sizeof_mm;
412
+ query->len = RSTRING(atom_type_str)->len / sizeof(long);
413
+ query->type = (long *)talloc(query->len * sizeof(long));
414
+ memcpy(query->type, RSTRING(atom_type_str)->ptr, sizeof(long) * query->len);
353
415
 
354
- n_pb = NUM2INT(pb);
355
- n_pa = NUM2INT(pa);
356
-
357
- if(n_pb > n_pa){
358
- return Qfalse;
359
- }
416
+ // allocatting and setting index
417
+ adj_index = rb_funcall(mol, rb_intern("adjacent_index"), 0);
418
+ Check_Type(adj_index, T_ARRAY);
360
419
 
361
- sizeof_mm = n_pa * (n_pb + 1) * n_words;
420
+ edges = rb_funcall(mol, rb_intern("edges"), 0);
421
+ Check_Type(edges, T_ARRAY);
362
422
 
363
- n_words = (n_pa - 1) / (sizeof(int) * 8) + 1;
423
+ query->edge_len = RARRAY(edges)->len;
364
424
 
365
- mm = (long * )malloc(sizeof(long) * 800000);
366
- if(RSTRING(match)->len > 800000 * sizeof(long))
367
- rb_raise(rb_eArgError, "Length of match matrix too short! %d", sizeof(mm));
368
-
369
- memcpy(mm, (long *)RSTRING(match)->ptr, RSTRING(match)->len); // BUG!!
425
+ query->ptr = (int **) talloc(query->len * sizeof(int **) );
426
+ query->num = (int * ) talloc(query->len * sizeof(int * ) );
427
+ query->idx = (int * ) talloc(query->edge_len * sizeof(int * ) * 2 );
370
428
 
371
- Check_Type(a_matrix, T_STRING);
429
+ k = 0;
430
+ for(i = 0 ; i < query->len ; i++){
431
+ Check_Type(rb_ary_entry(adj_index, i), T_ARRAY);
432
+ query->num[i] = RARRAY(rb_ary_entry(adj_index, i))->len;
433
+ query->ptr[i] = query->idx + k;
434
+ for(j = 0 ; j < query->num[i] ; j++){
435
+ Check_Type(rb_ary_entry(rb_ary_entry(adj_index, i), j), T_FIXNUM);
436
+ query->idx[k] = FIX2INT(rb_ary_entry(rb_ary_entry(adj_index, i), j));
437
+ k++;
438
+ }
439
+ }
372
440
 
373
- set_adjacency(num_adj, point, adj, other_adj);
441
+ }
374
442
 
375
- //show(mm, n_pa, n_pb);
376
- //show((long *)RSTRING(a_matrix)->ptr, n_pa, n_pa);
443
+ static void
444
+ query_free(struct Query * query){
445
+ free(query->type);
446
+ free(query->ptr);
447
+ free(query->num);
448
+ free(query->idx);
449
+
450
+ query->type = NULL;
451
+ query->ptr = NULL;
452
+ query->num = NULL;
453
+ query->idx = NULL;
454
+ }
377
455
 
378
- result = matchN(num_adj, point, (long *)RSTRING(a_matrix)->ptr, mm, n_pb, n_pa);
456
+ static VALUE
457
+ db_search(VALUE self, VALUE database_name, VALUE q_mol, VALUE block)
458
+ {
459
+ char * filename;
460
+ struct CompoundDB db;
461
+ struct Query query;
379
462
 
380
- if(result == 1){//?
381
- mapping = rb_ary_new();
382
- //printf("n_words : %d n_pa : %d n_pb : %d n_words * n_pa * n_pa : %d", n_words, n_pa, n_pb, n_words * n_pa * n_pa);
383
- //show(mm + n_words * n_pb * n_pb, n_pb, n_pa);
384
-
385
- for(i = 0 ; i < n_pb ; i++){
386
- rb_ary_push(mapping, INT2FIX(ntz_m(mm + n_words * n_pb * n_pb + i * n_words, n_pa)));
387
- }
388
- return mapping;
463
+ filename = StringValuePtr(database_name);
464
+
465
+ if(strlen(filename) > 40){
466
+ rb_raise(rb_eException, "length of database name must less than 40!");
389
467
  }
390
- return Qfalse;
468
+
469
+ query_setup(q_mol, & query);
470
+
471
+ db.mat = db_file_open(filename, ".mat");
472
+ db.idx = db_file_open(filename, ".idx");
473
+ db.typ = db_file_open(filename, ".typ");
474
+
475
+ db_load(& db, & query);
476
+
477
+ query_free(& query);
478
+
479
+ fclose(db.mat);
480
+ fclose(db.idx);
481
+ fclose(db.typ);
391
482
  }
392
483
 
393
- // DataBase for substructure search
484
+ static void
485
+ target_setup(VALUE t_mol, struct Target * target){
486
+ VALUE bit_mat;
487
+ VALUE bit_str;
488
+ VALUE atom_types;
394
489
 
395
- struct dbmdata {
396
- int di_size;
397
- };
490
+ int i;
491
+
492
+ atom_types = rb_funcall(t_mol, rb_intern("typ_str"), 0);
493
+ Check_Type(atom_types, T_STRING);
494
+
495
+ target->n_bits = RSTRING(atom_types)->len / sizeof(long);
496
+ target->typ = (long *)talloc(target->n_bits * sizeof(long));
497
+ memcpy(target->typ, RSTRING(atom_types)->ptr, target->n_bits * sizeof(long));
498
+
499
+ /*
500
+ * Set up adjacency matrix
501
+ */
502
+ bit_mat = rb_funcall(t_mol, rb_intern("bit_mat"), 0);
503
+ bit_str = rb_funcall(bit_mat, rb_intern("bit_str"), 0);
504
+
505
+ target->n_bytes = NBYTES(target->n_bits);
398
506
 
399
- static VALUE sdb_s_search(VALUE dbname){
400
- rb_p(dbname);
507
+ target->mat = (long *)talloc(target->n_bytes * target->n_bits * sizeof(long));
508
+ memcpy(target->mat, RSTRING(bit_str)->ptr, RSTRING(bit_str)->len);
401
509
  }
402
510
 
403
- Init_subcomp(){
404
- VALUE subcomp_cGraph;
405
- VALUE subcomp_cSubGraphDB;
511
+ static void
512
+ target_free(struct Target * target){
513
+ free(target->typ);
514
+ free(target->mat);
515
+ }
516
+
517
+ static VALUE
518
+ mol_by_mol(VALUE self, VALUE q_mol, VALUE t_mol)
519
+ {
520
+ struct Query query;
521
+ struct Target target;
522
+ struct State state;
523
+ VALUE result;
524
+
525
+ target.max_length = 0;
526
+ state.max_length = 0;
527
+
528
+ query_setup( q_mol, & query );
529
+ target_setup( t_mol, & target );
406
530
 
407
- subcomp_cGraph = rb_define_module("Graph");
408
- rb_define_method(subcomp_cGraph, "subcomp_match_by_ullmann", subcomp_match_by_ullmann, 5);
531
+ state_allocate(& state, & query, & target);
409
532
 
410
- subcomp_cSubGraphDB = rb_define_class_under(subcomp_cGraph, "SubGraphDB", rb_cObject);
533
+ if(rb_block_given_p() == Qtrue){
534
+ state_setup_block(& state);
535
+ }
536
+ else{
537
+ state_setup(& state, & query, & target);
538
+ }
539
+
540
+ search_by_ullmann(& state, & query, & target);
541
+ result = state_get_result(& state);
542
+
543
+ query_free(& query);
544
+ target_free(& target);
545
+ state_free(& state);
546
+
547
+ return result;
548
+ }
411
549
 
412
- rb_define_method(subcomp_cSubGraphDB, "open_for_search", sdb_s_search, 0);
550
+ void Init_subcomp(){
551
+ VALUE subcomp_mChem;
413
552
 
414
- rb_define_singleton_method(subcomp_cSubGraphDB, "show", subcomp_show, 3);
415
- rb_define_singleton_method(subcomp_cSubGraphDB, "match", subcomp_match_by_ullmann, 5);
553
+ subcomp_mChem = rb_define_module("Chem");
554
+ rb_define_singleton_method(subcomp_mChem, "match_by_ullmann", mol_by_mol, 2);
555
+ rb_define_singleton_method(subcomp_mChem, "db_search", db_search, 2);
556
+ //define_bitdb_method();
416
557
  }