chemruby 0.9.3 → 1.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (127) hide show
  1. data/README +2 -2
  2. data/Rakefile +67 -63
  3. data/ext/extconf.rb +2 -0
  4. data/ext/subcomp.c +461 -320
  5. data/ext/utils.c +56 -0
  6. data/ext/utils.h +13 -0
  7. data/lib/chem.rb +34 -8
  8. data/lib/chem/db.rb +8 -0
  9. data/lib/chem/db/cansmi.rb +1 -1
  10. data/lib/chem/db/cdx.rb +1 -1
  11. data/lib/chem/db/cml.rb +52 -0
  12. data/lib/chem/db/gd.rb +64 -0
  13. data/lib/chem/db/gspan.rb +2 -2
  14. data/lib/chem/db/kcf_rpair.rb +34 -0
  15. data/lib/chem/db/kegg.rb +35 -1
  16. data/lib/chem/db/mdl.rb +75 -34
  17. data/lib/chem/db/opsin.rb +24 -0
  18. data/lib/chem/db/pdb.rb +105 -0
  19. data/lib/chem/db/pdf.rb +2 -0
  20. data/lib/chem/db/pubchem.rb +1071 -88
  21. data/lib/chem/db/rmagick.rb +5 -3
  22. data/lib/chem/db/sdf.rb +28 -2
  23. data/lib/chem/db/smiles/smiles.ry +27 -25
  24. data/lib/chem/db/smiles/smiparser.rb +29 -27
  25. data/lib/chem/db/types/type_gd.rb +35 -0
  26. data/lib/chem/db/types/type_gspan.rb +2 -2
  27. data/lib/chem/db/types/type_kcf.rb +19 -0
  28. data/lib/chem/db/types/type_kegg.rb +2 -0
  29. data/lib/chem/db/types/type_mdl.rb +1 -1
  30. data/lib/chem/db/types/type_png.rb +5 -1
  31. data/lib/chem/db/types/type_rdf.rb +22 -0
  32. data/lib/chem/db/types/type_xyz.rb +1 -1
  33. data/lib/chem/db/vector.rb +19 -3
  34. data/lib/chem/model.rb +5 -2
  35. data/lib/chem/utils.rb +17 -1
  36. data/lib/chem/utils/bitdb.rb +49 -0
  37. data/lib/chem/utils/cas.rb +28 -0
  38. data/lib/chem/utils/cdk.rb +403 -0
  39. data/lib/chem/utils/fingerprint.rb +98 -0
  40. data/lib/chem/utils/geometry.rb +8 -0
  41. data/lib/chem/utils/net.rb +303 -0
  42. data/lib/chem/utils/once.rb +28 -0
  43. data/lib/chem/utils/openbabel.rb +204 -0
  44. data/lib/chem/utils/sssr.rb +33 -25
  45. data/lib/chem/utils/sub.rb +6 -0
  46. data/lib/chem/utils/transform.rb +9 -8
  47. data/lib/chem/utils/ullmann.rb +138 -95
  48. data/lib/graph.rb +5 -6
  49. data/lib/graph/utils.rb +8 -0
  50. data/sample/calc_maximum_common_subgraph.rb +27 -0
  51. data/sample/calc_properties.rb +9 -0
  52. data/sample/data/atp.mol +69 -0
  53. data/sample/data/pioglitazone.mol +58 -0
  54. data/sample/data/rosiglitazone.mol +55 -0
  55. data/sample/data/troglitazone.mol +70 -0
  56. data/sample/find_compound_by_keggapi.rb +19 -0
  57. data/sample/generate_inchi.rb +7 -0
  58. data/sample/generate_substructurekey.rb +11 -0
  59. data/sample/images/ex6.rb +17 -0
  60. data/sample/images/ex7.rb +18 -0
  61. data/sample/iupac2mol.rb +8 -0
  62. data/sample/kekule.rb +13 -0
  63. data/sample/logp.rb +4 -0
  64. data/sample/mcs.rb +13 -0
  65. data/sample/mol2pdf.rb +8 -0
  66. data/sample/pubchem_fetch.rb +8 -0
  67. data/sample/pubchem_search.rb +12 -0
  68. data/sample/rosiglitazone.mol +57 -0
  69. data/sample/smarts.rb +10 -0
  70. data/sample/structure_match.rb +8 -0
  71. data/sample/structure_match_color.rb +22 -0
  72. data/sample/thiazolidinedione.mol +19 -0
  73. data/sample/troglitazone.mol +232 -0
  74. data/sample/vicinity.rb +8 -0
  75. data/test/data/CID_704.sdf +236 -0
  76. data/test/data/CID_994.sdf +146 -0
  77. data/test/data/db_EXPT03276.txt +321 -0
  78. data/test/data/pioglitazone.mol +58 -0
  79. data/test/data/rosiglitazone.mol +55 -0
  80. data/test/data/thiazolidinedione.mol +19 -0
  81. data/test/data/troglitazone.mol +70 -0
  82. data/test/{test_adj.rb → tc_adj.rb} +0 -0
  83. data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
  84. data/test/tc_casrn.rb +17 -0
  85. data/test/tc_cdk.rb +89 -0
  86. data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
  87. data/test/{test_chem.rb → tc_chem.rb} +0 -0
  88. data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
  89. data/test/{test_db.rb → tc_db.rb} +0 -0
  90. data/test/tc_develop.rb +38 -0
  91. data/test/tc_drugbank.rb +13 -0
  92. data/test/{test_eps.rb → tc_eps.rb} +0 -0
  93. data/test/tc_gd.rb +8 -0
  94. data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
  95. data/test/tc_graph.rb +15 -0
  96. data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
  97. data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
  98. data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
  99. data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
  100. data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
  101. data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
  102. data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
  103. data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
  104. data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
  105. data/test/tc_net.rb +5 -0
  106. data/test/tc_once.rb +29 -0
  107. data/test/tc_openbabel.rb +57 -0
  108. data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
  109. data/test/{test_prop.rb → tc_prop.rb} +1 -1
  110. data/test/tc_pubchem.rb +32 -0
  111. data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
  112. data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
  113. data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
  114. data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
  115. data/test/tc_sssr.rb +1 -0
  116. data/test/{test_sub.rb → tc_sub.rb} +0 -0
  117. data/test/tc_subcomp.rb +59 -0
  118. data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
  119. data/test/{test_writer.rb → tc_writer.rb} +0 -0
  120. data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
  121. data/test/ts_current.rb +11 -0
  122. data/test/ts_image.rb +6 -0
  123. data/test/ts_main.rb +12 -0
  124. metadata +259 -194
  125. data/lib/chem/utils/graph_db.rb +0 -146
  126. data/test/test_sssr.rb +0 -18
  127. data/test/test_subcomp.rb +0 -37
data/README CHANGED
@@ -52,7 +52,7 @@ For testing and developing ChemRuby:
52
52
 
53
53
  == INSTALL
54
54
 
55
- In the chemruby source directory (such as chemruby-x.x.x/), run install.rb
55
+ In the chemruby source directory (such as chemruby-x.x.x/), run setup.rb
56
56
  as follows:
57
57
 
58
58
  % ruby setup.rb config
@@ -109,7 +109,7 @@ Note that, setup.rb included in the ChemRuby package comes from
109
109
 
110
110
  License of This README file can be also distributed under the Ruby's license.
111
111
 
112
- Copyright (C) 2006 TANAKA Nobuya <tanaka@chemruby.org>
112
+ Copyright (C) 2006 TANAKA Nobuya <t@chemruby.org>
113
113
  KATAYAMA Toshiaki <k@bioruby.org>
114
114
 
115
115
  == CONTACT
data/Rakefile CHANGED
@@ -8,12 +8,15 @@
8
8
 
9
9
  require 'rake/clean'
10
10
  require 'rake/testtask'
11
- require 'rake/gempackagetask'
11
+
12
+ require "rake/gempackagetask"
13
+ require 'rubygems'
12
14
 
13
15
  task :default => [:help]
14
16
 
15
- PKG_VERSION = "0.9.3"
16
17
  PKG_BUILD = "RC1"
18
+ PKG_VERSION = "1.1.9"
19
+
17
20
 
18
21
  PKG_FILES = FileList[
19
22
  "Rakefile", "README", #"ChangeLog", "Releases", "TODO",
@@ -26,6 +29,7 @@ PKG_FILES = FileList[
26
29
  "lib/**/*.rb",
27
30
  "lib/**/*.ry",
28
31
  "test/**/*",
32
+ "temp/",
29
33
  "sample/**/*.rb",
30
34
  "sample/**/*.mol",
31
35
  "ext/**/*.h",
@@ -37,76 +41,30 @@ PKG_FILES = FileList[
37
41
  # "test/**/*"
38
42
  ]
39
43
 
40
- task :help do |t|
41
- puts <<EOL
42
-
43
- ChemRuby #{PKG_VERSION}
44
-
45
- To install ChemRuby, you need at least
46
-
47
- * ruby-1.8.2 (or later)
48
- * Ruby header files (included in original Ruby)
49
- * C language compilers (such as gcc)
50
-
51
- If the following modules are installed, ChemRuby will use it.
52
- You can install them later.
53
-
54
- * RMagick ( You will find how to install them in http://www.chemruby.org)
55
-
56
- == Compiling and Installing
57
-
58
- % rake compile
59
- % sudo rake install
60
-
61
- or just
62
-
63
- % sudo ruby setup.rb
64
-
65
- == Compiling RDOC
66
-
67
- % rake doc
68
-
69
- == Test
70
-
71
- % rake test
72
-
73
- You will need RMagick and other libraries to pass all the tests.
74
-
75
- EOL
76
-
77
- end
78
-
79
44
  task :doc do |t|
80
45
  system "rdoc --main README ./lib README"
81
46
  end
82
47
 
83
48
 
84
- task :dev => [:test]
49
+ task :dev => [:compile]
85
50
  Rake::TestTask.new(:dev) do |t|
86
51
  t.libs << File.join('ext')
87
52
  t.libs << File.join('lib')
88
- t.libs << File.join('dev/lib')
89
- t.libs << File.join('dev/ext')
90
- # cd 'dev/ext/chem/db/inchi/' do
91
- # ruby %{extconf.rb}
92
- # sh "make"
93
- # end
94
- t.test_files = FileList['dev/test/test*.rb']
53
+ t.test_files = FileList['test/ts_current.rb']
95
54
  end
96
55
 
97
56
  task :test => [:compile]
98
57
  Rake::TestTask.new(:test) do |t|
99
58
  t.libs << File.join('ext')
100
59
  t.libs << File.join('lib')
101
- t.test_files = FileList['test/test*.rb']
60
+ t.test_files = FileList['test/ts_main.rb']
102
61
  end
103
62
 
104
63
  task :light => [:compile]
105
64
  Rake::TestTask.new(:light) do |t|
106
65
  t.libs << File.join('ext')
107
66
  t.libs << File.join('lib')
108
- t.test_files = FileList['test/test_subcomp.rb']
109
- #'test/test_kegg.rb'# 'test/test_kcf_glycan.rb' #FileList['test/test_canonical_smiles.rb']
67
+ t.test_files = FileList['test/tc_sssr.rb']
110
68
  end
111
69
 
112
70
  task :rm do
@@ -179,17 +137,63 @@ end
179
137
  desc "Compiling library"
180
138
  task :compile => ['lib/chem/db/smiles/smiparser.rb', 'lib/chem/db/iupac/iuparser.rb', 'lib/chem/db/linucs/linparser.rb', "ext/subcomp.#{Config::CONFIG["DLEXT"]}"]
181
139
 
182
- spec = Gem::Specification.new do |s|
183
- s.name = 'chemruby'
184
- s.version = PKG_VERSION
185
- s.require_path = 'lib'
186
- s.autorequire = 'chem'
187
- s.files = PKG_FILES
188
- s.extensions << 'ext/extconf.rb'
189
- s.summary = "A framework program for cheminformatics"
140
+ begin
141
+ require 'rake/gempackagetask'
142
+
143
+ spec = Gem::Specification.new do |s|
144
+ s.name = 'chemruby'
145
+ s.version = PKG_VERSION
146
+ s.require_path = 'lib'
147
+ s.autorequire = 'chem'
148
+ s.files = PKG_FILES
149
+ s.extensions << 'ext/extconf.rb'
150
+ s.summary = "A framework program for cheminformatics"
151
+ end
152
+
153
+ Rake::GemPackageTask.new(spec) do |pkg|
154
+ pkg.need_tar = true
155
+ pkg.need_tar_gz = true
156
+ pkg.package_files += PKG_FILES
157
+ end
158
+ rescue
159
+ puts 'Install RubyGems to make gem'
190
160
  end
191
161
 
192
- Rake::GemPackageTask.new(spec) do |pkg|
193
- pkg.need_tar = true
194
- pkg.package_files += PKG_FILES
162
+ task :help do |t|
163
+ puts <<EOL
164
+
165
+ ChemRuby #{PKG_VERSION}
166
+
167
+ To install ChemRuby, you need at least
168
+
169
+ * ruby-1.8.2 (or later)
170
+ * Ruby header files (included in original Ruby)
171
+ * C language compilers (such as gcc)
172
+
173
+ If the following modules are installed, ChemRuby will use it.
174
+ You can install them later.
175
+
176
+ * RMagick ( You will find how to install them in http://www.chemruby.org)
177
+
178
+ == Compiling and Installing
179
+
180
+ % rake compile
181
+ % sudo rake install
182
+
183
+ or just
184
+
185
+ % sudo ruby setup.rb
186
+
187
+ == Compiling RDOC
188
+
189
+ % rake doc
190
+
191
+ == Test
192
+
193
+ % rake test
194
+
195
+ You will need RMagick and other libraries to pass all the tests.
196
+
197
+ EOL
198
+
195
199
  end
@@ -1,4 +1,6 @@
1
1
 
2
2
  require 'mkmf'
3
3
 
4
+ # $CFLAGS = " -g -lefence"
5
+
4
6
  create_makefile("subcomp")
@@ -4,413 +4,554 @@
4
4
 
5
5
  $Author: nobyt $
6
6
 
7
- Copyright (C) 2004-2006 Nobuya Tanaka
7
+ Copyright (C) 2004-2007 Nobuya Tanaka
8
8
 
9
9
  **********************************************************************/
10
10
 
11
- #define FULL 0xffffffff
12
- #define ZERO 0x0
13
-
14
- #define FAIL 0;
15
- #define SUCCESS 1;
16
-
17
11
  #include <ruby.h>
12
+ // #include "bitdb.h"
13
+ #include "utils.h"
18
14
 
19
15
  static void
20
- show(long *m, int pa, int pb)
21
- {
22
- int i, j, k;
23
- static int count = 0;
24
- int n_words;
25
-
26
- n_words = (pb - 1) / (sizeof(int) * 8) + 1;
16
+ show(long * l, int h, int w){
17
+ int i, j;
18
+ int counter = 0;
19
+ int n_bytes;
27
20
 
28
- //printf("count : %3d\n", count++);
21
+ n_bytes = NBYTES(w);
29
22
 
30
- printf("\n ");
31
- for(i = 0 ; i < pb ; i++){
23
+ printf(" ");
24
+ for(i = 0 ; i < w ; i++){
32
25
  printf("%d", i % 10);
33
26
  }
34
27
  printf("\n");
35
- for(i = 0 ; i < pa * n_words ; i += n_words){
36
- printf("%d ", (i / n_words) % 10);
37
- for(k = 0 ; k < n_words ; k++){
38
- for(j = k * 32 ; j < ((k + 1) * 32 < pb ? (k + 1) * 32 : pb) ; j++){
39
- if(m[i + k ] & (1 << (j - k * 32)))
40
- printf("@");
41
- else
42
- printf(".");
43
- }
44
- //printf(" ");
28
+
29
+ for(i = 0 ; i < h ; i++){
30
+ printf("%3d ", i);
31
+ for(j = 0 ; j < n_bytes ; j++){
32
+ dump_long(l[counter], (j == n_bytes - 1) ? ((w - 1) % ARCH + 1) : ARCH);
33
+ counter++;
45
34
  }
46
35
  printf("\n");
47
36
  }
48
- printf("\n");
49
37
  }
50
38
 
51
- /*
52
- * call-seq:
53
- * SubGraphDB.show -> print out adjacency matrix
54
- *
55
- * This function is mainly for debug.
56
- */
57
-
58
- static VALUE
59
- subcomp_show(VALUE self, VALUE str, VALUE pa, VALUE pb)
39
+ static FILE *
40
+ db_file_open(const char * filename, const char * extension)
60
41
  {
61
- printf("subcomp_show called %3d %3d\n", FIX2INT(pa), FIX2INT(pb));
62
- show((long * )RSTRING(str)->ptr, FIX2INT(pa), FIX2INT(pb));
63
- return Qnil;
42
+ FILE * fp;
43
+ char new_filename[50];
44
+
45
+ strncpy(new_filename, filename, sizeof(new_filename) - 5);
46
+ strncat(new_filename, extension, sizeof(new_filename) - strlen(extension) - 1);
47
+
48
+ fp = fopen(new_filename, "r");
49
+
50
+ if(fp == NULL){
51
+ rb_raise(rb_eException, "File can not open");
52
+ }
53
+ return fp;
64
54
  }
65
55
 
56
+ struct CompoundDB{
57
+ FILE * mat;
58
+ FILE * idx;
59
+ FILE * typ;
60
+ };
66
61
 
67
- /*
68
- * returns number of trailing zero of m-bit
69
- */
70
- static int ntz_m(long *y, int pb){
71
- int i = 0;
72
- int n;
73
- long x;
62
+ struct Query{
63
+ int len;
64
+ int edge_len;
74
65
 
75
- n = 1;
66
+ long * type;
67
+ int ** ptr;
68
+ int * num;
69
+ int * idx;
70
+ };
76
71
 
77
- while(i < pb && y[i] == 0){
78
- n += 32;
79
- i++;
80
- }
72
+ struct Target{
73
+ int n_bits;
74
+ int n_bytes;
81
75
 
82
- x = y[i];
76
+ int max_length;
83
77
 
84
- if((x & 0x0000FFFF) == 0) {n = n + 16 ; x = x >> 16;}
85
- if((x & 0x000000FF) == 0) {n = n + 8 ; x = x >> 8;}
86
- if((x & 0x0000000F) == 0) {n = n + 4 ; x = x >> 4;}
87
- if((x & 0x00000003) == 0) {n = n + 2 ; x = x >> 2;}
88
- return n - (x & 1);
89
- }
78
+ long * mat;
79
+ long * typ;
80
+ };
81
+
82
+ struct State{
83
+ int height;
84
+ int width;
85
+ int n_bytes;
86
+
87
+ int max_length;
88
+ int length;
89
+ long * mat;
90
+ int depth;
91
+
92
+ long * res;
93
+ int res_counter;
94
+ int res_max_len;
95
+ };
96
+
97
+ struct Record{
98
+ int n_bits;
99
+ int n_bytes;
100
+ int mat_pos;
101
+ int information;
102
+ };
90
103
 
91
- static int ntz(long x){
92
- int n;
104
+ query_dump(struct Query * query){
105
+ int i, j;
93
106
 
94
- if (x == 0) return (32);
95
- n = 1;
96
- if((x & 0x0000FFFF) == 0) {n = n + 16 ; x = x >> 16;}
97
- if((x & 0x000000FF) == 0) {n = n + 8 ; x = x >> 8;}
98
- if((x & 0x0000000F) == 0) {n = n + 4 ; x = x >> 4;}
99
- if((x & 0x00000003) == 0) {n = n + 2 ; x = x >> 2;}
100
- return n - (x & 1);
107
+ for(i = 0 ; i < query->len ; i++){
108
+ for(j = 0 ; j < query->num[i] ; j++){
109
+ printf("query->ptr[%d][%d] = %d\n", i, j, query->ptr[i][j]);
110
+ }
111
+ }
101
112
  }
102
113
 
103
- static int ntz_n_words(long * x, int n_words){
104
- int i;
105
- int words = 0;
106
- for(i = 0 ; x[i] == 0 && i < n_words ; i++){
107
- words += 32;
114
+ static void
115
+ target_free_db(struct Target * target)
116
+ {
117
+ free(target->mat);
118
+ target->mat = NULL;
119
+ free(target->typ);
120
+ target->typ = NULL;
121
+ }
122
+
123
+ static void
124
+ target_setup_db(struct Target * target, struct Record * record)
125
+ {
126
+ target->n_bits = record->n_bits;
127
+ target->n_bytes = record->n_bytes;
128
+ if(target->max_length < (record->n_bits * record->n_bytes)){
129
+ if(target->max_length != 0){ target_free_db(target); }
130
+
131
+ target->mat = talloc(sizeof(long) * record->n_bits * record->n_bytes);
132
+ target->typ = talloc(sizeof(long) * record->n_bits);
133
+ target->max_length = record->n_bits * record->n_bytes;
108
134
  }
109
- return ntz(x[i]) + words;
110
135
  }
111
136
 
112
- long bit_mask[32] = {
113
- 0x1, 0x2, 0x4, 0x8,
114
- 0x10, 0x20, 0x40, 0x80,
115
- 0x100, 0x200, 0x400, 0x800,
116
- 0x1000, 0x2000, 0x4000, 0x8000,
117
- 0x10000, 0x20000, 0x40000, 0x80000,
118
- 0x100000, 0x200000, 0x400000, 0x800000,
119
- 0x1000000, 0x2000000, 0x4000000, 0x8000000,
120
- 0x10000000, 0x20000000, 0x40000000, 0x80000000,
121
- };
137
+ static void
138
+ state_push_result(struct State * state)
139
+ {
140
+ if(state->res_max_len < state->res_counter){
141
+ state->res_max_len = state->res_max_len * 2;
142
+ state->res = (long *) trealloc(state->res, state->res_max_len);
143
+ }
144
+ memcpy(state->res + state->res_counter * state->length * sizeof(long),
145
+ state->mat,
146
+ state->height * state->n_bytes * sizeof(long));
147
+ state->res_counter++;
148
+ }
122
149
 
123
- long reverse_bit[32] = {
124
- 0xfffffffe,
125
- 0xfffffffd,
126
- 0xfffffffb,
127
- 0xfffffff7,
128
- 0xffffffef,
129
- 0xffffffdf,
130
- 0xffffffbf,
131
- 0xffffff7f,
132
- 0xfffffeff,
133
- 0xfffffdff,
134
- 0xfffffbff,
135
- 0xfffff7ff,
136
- 0xffffefff,
137
- 0xffffdfff,
138
- 0xffffbfff,
139
- 0xffff7fff,
140
- 0xfffeffff,
141
- 0xfffdffff,
142
- 0xfffbffff,
143
- 0xfff7ffff,
144
- 0xffefffff,
145
- 0xffdfffff,
146
- 0xffbfffff,
147
- 0xff7fffff,
148
- 0xfeffffff,
149
- 0xfdffffff,
150
- 0xfbffffff,
151
- 0xf7ffffff,
152
- 0xefffffff,
153
- 0xdfffffff,
154
- 0xbfffffff,
155
- 0x7fffffff,
156
- };
150
+ static VALUE
151
+ state_get_result(struct State * state)
152
+ {
153
+ VALUE result_array;
154
+ VALUE tmp;
155
+ int i, j;
156
+ int counter;
157
+
158
+ result_array = rb_ary_new();
159
+
160
+ for(i = 0 ; i < state->res_counter ; i++){
161
+ tmp = rb_ary_new();
162
+ counter = i * state->n_bytes * state->height * sizeof(long);
163
+ for(j = 0 ; j < state->height ; j++){
164
+ rb_ary_push(tmp,
165
+ INT2FIX(m_ntz(state->res + counter + j * state->n_bytes,
166
+ state->n_bytes)));
167
+ }
168
+ rb_ary_push(result_array, tmp);
169
+ }
170
+ return result_array;
171
+ }
157
172
 
158
- //int matchN(ADJACENCY *adj_ptr, long *b, long *m, int pa, int pb)
159
- static int matchN(const int * num_adj, long ** point, long *b, long *m, int pa, int pb)
173
+ static void
174
+ state_free(struct State * state)
160
175
  {
161
- long * mm;// current matrix
162
- long f[1000];//which columns has been used at an intermediate state of computing
163
- long h[100];// pb < 100 * 32
176
+ free(state->mat);
177
+ free(state->res);
178
+ state->mat = NULL;
179
+ }
164
180
 
165
- int d;// depth for matrix
166
- int k;// width for matrix
167
- int dd;// depth of matrix in refinement step
168
- int kk;// width of matrix in refinement step
181
+ static void
182
+ state_allocate(struct State * state, struct Query * query, struct Target * target)
183
+ {
184
+ int i;
169
185
 
170
- int i, j;//temp
171
- long l;// temp
186
+ state->height = query->len;
187
+ state->width = target->n_bits;
188
+ state->n_bytes = target->n_bytes;
189
+ state->res_counter = 0;
172
190
 
173
- short vflag;//valid check flag
174
- int n_words;// number of words needed for storing 'pb' bits.
175
- long refine_mm;// pointer for mm(match matrix) used in refinment step.
191
+ if(state->max_length < query->len * target->n_bytes){
176
192
 
177
- d = k = 0;
178
- // start back track
179
- for(i = 0 ; i < (pb / 32 + 1) ; i++)
180
- h[i] = 0;
181
- for(i = 0 ; i < 10 ; i++)
182
- f[i] = 0;
193
+ if(state->max_length != 0){
194
+ printf("state->free called max_length : %d\n", state->max_length);
195
+ state_free(state);
196
+ }
183
197
 
184
- n_words = (pb - 1) / (sizeof(int) * 8) + 1;
198
+ state->mat = (long *)talloc((query->len + 2) *// Depth
199
+ target->n_bytes * // Width
200
+ state->height * // Height
201
+ sizeof(long)); // sizeof(long)
202
+
203
+ state->res_max_len = (query->len + 2) *// Depth
204
+ target->n_bytes * // Width
205
+ state->height * // Height
206
+ sizeof(long) * 100;
207
+ state->res = (long *)talloc(state->res_max_len); // sizeof(long)
208
+ state->max_length = query->len * target->n_bytes;
209
+ }
210
+ state->length = query->len * target->n_bytes;
211
+ state->depth = -1;
185
212
 
186
- /* show(b, pb, pb); */
187
- /* show(m, pa, pb); */
213
+ for(i = 0 ; i < state->length ; i++){ state->mat[i] = 0;}
214
+ }
188
215
 
189
- if( d == 0 && k == 0){
190
- k = ntz_n_words(m, n_words);
191
- h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
192
- }
193
- while(k <= pb && d <= pa){
194
- /* printf("d : %3d k : %3d n_words : %3d\n", d, k, n_words); */
195
- if(d < 0){
196
- printf("d < 0 return \n");
197
- return FAIL;
216
+ static void
217
+ state_setup(struct State * state, struct Query * query, struct Target * target)
218
+ {
219
+ int i, j;
220
+ for(i = 0 ; i < query->len ; i++){
221
+ for(j = 0 ; j < target->n_bits ; j++){
222
+ if (query->type[i] == target->typ[j]){
223
+ BITON(state->mat, i, j, target->n_bytes);
224
+ }
198
225
  }
226
+ }
227
+ }
199
228
 
200
- // Idea for optimization :
201
- // instead of using following equation, just (mm = mm + len) and (mm = mm - len).
202
- mm = m + pa * (d + 1) * n_words;
203
- /* printf("pa : %d d : %d k : %d n_words : %d hint : %d\n", pa, d, k, n_words, pa * (d + 1) * n_words); */
204
-
205
- //printf("ntz : %d\n", ntz(mm));
206
- //k = ntz(mm + d);
207
- // set (k, d) bit '1', clear k-column and d-row '0'
208
- /* printf("k : %d d: %d\n", k, d); */
209
- for(j = 0 ; j < n_words ; j++){
210
- if(j == (k / 32)){
211
- for(i = 0 ; i < pa ; i++){
212
- mm[i * n_words + j] = mm[(i - pa) * n_words + j] & reverse_bit[k - (k / 32) * 32];
213
- }
214
- mm[d * n_words + j] = bit_mask[k - (k / 32) * 32];
215
- }else{
216
- for(i = 0 ; i < pa ; i++){
217
- mm[i * n_words + j] = mm[(i - pa) * n_words + j];
218
- }
219
- mm[d * n_words + j] = ZERO;
229
+ static void
230
+ state_setup_block(struct State * state)
231
+ {
232
+ int i, j;
233
+ for(i = 0 ; i < state->height ; i++){
234
+ for(j = 0 ; j < state->width ; j++){
235
+ if (rb_yield_values(2, INT2FIX(i), INT2FIX(j))){
236
+ BITON(state->mat, i, j, state->n_bytes);
220
237
  }
221
238
  }
222
- // BEGIN
223
- /* show(mm, pa, pb); */
224
- // END
225
-
226
- // Refinement step
227
- // Hot Spot!!
228
- dd = kk = 0;
229
- /* printf("before refinement step \n"); */
230
- /* show(mm, pa, pb); */
231
-
232
- while(dd != pa){
233
- while(kk != pb){
234
- //Idea for optimization :
235
- //refine_mm should not updated 1 / 32 times.mm[dd + ((kk - 1) / 32)]
236
-
237
- //Idea for optimization :
238
- // when mm is sparse there may be better algorithm
239
- // for searching '1' bit.
240
- if(mm[dd * n_words + ((kk - 1) / 32)] & bit_mask[kk - ((kk - 1) / 32) * 32]){
241
- // Following loop can be flattened
242
- for(i = 0 ; i < num_adj[dd] ; i++){
243
- l = 0;
244
- for(j = 0 ; j < n_words ; j++){
245
- l |= (b[kk * n_words + j] & mm[point[dd][i] * n_words + j]);
239
+ }
240
+ }
241
+
242
+ static void
243
+ state_push(struct State * state)
244
+ {
245
+ memmove(state->mat + state->length,
246
+ state->mat,
247
+ state->length * sizeof(long) );
248
+ state->mat += state->length;
249
+ state->depth++;
250
+ }
251
+
252
+ static void
253
+ state_pop(struct State * state)
254
+ {
255
+ state->mat -= state->length;
256
+ state->depth--;
257
+ }
258
+
259
+ inline static long
260
+ has_bit(long * mat, int height, int width, int n_bytes){
261
+ return (mat[height * n_bytes + width / ARCH] & (1 << (width % ARCH)));
262
+ }
263
+
264
+ /*
265
+ * Hot spot
266
+ */
267
+ inline static void
268
+ refine(struct State * state, struct Query * query, struct Target * target){
269
+ int i, j, k, l, m, bit_removed;
270
+ bit_removed = 1;
271
+ while(bit_removed){
272
+ bit_removed = 0;// false
273
+ for(i = 0 ; i < query->len ; i++){
274
+ for(j = 0 ; j < target->n_bits ; j++){
275
+ if(has_bit(state->mat, i, j, target->n_bytes)){
276
+ for(k = 0 ; k < query->num[i] ; k++){
277
+ m = 0;
278
+ for(l = 0 ; l < target->n_bytes ; l++){
279
+ if((state->mat[query->ptr[i][k] * target->n_bytes + l] &
280
+ target->mat[j * target->n_bytes + l]) != 0){
281
+ m++;
282
+ }
246
283
  }
247
- if(l == 0){
248
- mm[dd * n_words + (kk / 32)] &= reverse_bit[kk - (kk / 32) * 32];//remove bit
249
- /* break;//quit for loop */
284
+ if(m == 0){
285
+ BITOFF(state->mat, i, j, target->n_bytes);
286
+ bit_removed = 1;
250
287
  }
251
288
  }
252
-
253
289
  }
254
- kk++;
255
290
  }
256
- // Idea for optimization
257
- // every 32 bit is tested here.
258
- kk = 0;
259
- dd++;
260
291
  }
261
- /* show(mm, pa, pb); */
262
-
263
- //Checking whether match matrices are valid.
264
- // Subgraph isomorphism can be checked here before reaching d == pa.
265
- vflag = SUCCESS;
266
- for(i = 0 ; i < pa ; i++){
267
- l = 0;
268
- for(j = 0 ; j < n_words ; j++){
269
- l |= mm[i * n_words + j];
270
- }
271
- if(l == 0){
272
- vflag = FAIL;
273
- break;
292
+ }
293
+ }
294
+
295
+ static void
296
+ state_clear_bits(long * l, int h, int w, int n_bytes, int height){
297
+ int i;
298
+ for(i = 0 ; i < n_bytes ; i++){ l[i + h * n_bytes] = 0; }
299
+ for(i = 0 ; i < height ; i++){ BITOFF(l, i, w, n_bytes); }
300
+ BITON(l, h, w, n_bytes);
301
+ }
302
+
303
+ #define TRUE 1
304
+ #define FALSE 0
305
+
306
+ inline static int
307
+ state_is_valid(struct State * state){
308
+ int i, j, n_bytes, flag;
309
+ // n_bytes = NBYTES(state->length);
310
+ for(i = 0 ; i < state->height ; i++){
311
+ flag = 0;
312
+ for(j = 0 ; j < state->n_bytes ; j++){
313
+ if(state->mat[i * state->n_bytes + j] != 0){
314
+ flag++;
274
315
  }
275
316
  }
317
+ if(flag == 0)
318
+ return FALSE;
319
+ }
320
+ return TRUE;
321
+ }
276
322
 
277
- if(vflag){// Success
278
- f[d] = k;
279
- k = 0;
280
- while(h[k / 32] & bit_mask[k - (k / 32) * 32])
281
- k++;
282
- d++;
283
- if(d == pa){
284
- /* show(mm, pa, pb); */
285
- //printf("FOUND! d : %d\n", d);
286
- return SUCCESS;
287
- }
288
- else{
289
- h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
290
- }
291
- }else{//Failed
292
- h[k / 32] &= reverse_bit[k - (k / 32) * 32];//remove bit
293
- k++;
294
- //printf("d : %d k : %d\n", d, k);
295
- while((h[k / 32] & bit_mask[k - (k / 32) * 32] ||
296
- (m[d * n_words + (k / 32)] & bit_mask[k - (k / 32) * 32] ) == 0) &&
297
- k < pb)
298
- k++;
299
- /* printf("d : %d k : %d\n", d, k); */
300
- while(k > pb){
301
- if(d == 0){
302
- return FAIL;
323
+ static void
324
+ search_by_ullmann(struct State * state, struct Query * query, struct Target * target){
325
+ int k;
326
+ // Idea for optimization
327
+ //show(state->mat, query->len, target->n_bits);
328
+ if(state->depth == state->height - 1){
329
+ //printf("FOUND!\n");
330
+ state_push_result(state);
331
+ //show(state->mat, query->len, target->n_bits);
332
+ }else{
333
+ for(k = 0 ; k < target->n_bits ; k++){
334
+ if(has_bit(state->mat,
335
+ state->depth + 1,
336
+ k,
337
+ target->n_bytes)){
338
+ state_push(state);
339
+ state_clear_bits(state->mat, state->depth, k, target->n_bytes, query->len);
340
+ //show(state->mat, query->len, target->n_bits);
341
+ refine(state, query, target);
342
+ //show(state->mat, query->len, target->n_bits);
343
+ if(state_is_valid(state) == TRUE){
344
+ //show(state->mat, query->len, target->n_bits);
345
+ search_by_ullmann(state, query, target);
303
346
  }
304
- d--;
305
- k = f[d];
306
- h[k / 32] &= reverse_bit[k - (k / 32) * 32];//remove bit
307
- k++;
308
- while(h[k / 32] & bit_mask[k - (k / 32) * 32])
309
- k++;
347
+ state_pop(state);
310
348
  }
311
- h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
312
349
  }
313
350
  }
314
- //printf("d : %d k : %d FAIL!\n", d, k);
315
- return FAIL;
316
351
  }
317
352
 
318
- static void set_adjacency(int * num_adj, long ** point, long * adj, VALUE ret){
319
- int i, j, n_words;
320
- int off_set = 0;
353
+ static void
354
+ db_load(struct CompoundDB * db, struct Query * query){
355
+
356
+ int new_n_bits;
357
+ int new_n_bytes;
358
+ int mat_ptr;
359
+
360
+ struct Target target;
361
+ struct State state;
362
+ struct Record record;
363
+
364
+ int i, j;
321
365
 
322
- n_words = (RARRAY(ret)->len - 1) / (sizeof(int) * 8) + 1;
366
+ target.n_bits = 0;
367
+ target.n_bytes = 0;
368
+ target.max_length = 0;
369
+ state.max_length = 0;
323
370
 
324
- for(i = 0 ; i < RARRAY(ret)->len ; i++){
325
- num_adj[i] = FIX2INT(rb_funcall(RARRAY(ret)->ptr[i], rb_intern("length"), 0));
326
- point[i] = adj + off_set;
327
- for(j = 0 ; j < RARRAY(RARRAY(ret)->ptr[i])->len ; j++){
328
- adj[off_set++] = FIX2INT(RARRAY(RARRAY(ret)->ptr[i])->ptr[j]);
329
- //printf(" %d ", FIX2INT(RARRAY(RARRAY(ret)->ptr[i])->ptr[j]));
371
+ for(;;){
372
+ if(feof(db->idx) || feof(db->mat) || feof(db->mat)){
373
+ printf("Database broken!\n");
374
+ return;
375
+ }
376
+
377
+ fread(& record, sizeof(struct Record), 1, db->idx);
378
+ if(record.n_bits == -1){
379
+ return;
380
+ }
381
+ target_setup_db(& target, & record);
382
+ if(record.information != -1){
383
+
384
+ fread(target.mat, sizeof(long), target.n_bits * target.n_bytes, db->mat);
385
+ fread(target.typ, sizeof(long), target.n_bits, db->typ);
386
+
387
+ state_allocate(& state, query, & target);
388
+ state_setup(& state, query, & target);
389
+ //show(state.mat, query->len, target.n_bits);
390
+ search_by_ullmann(& state, query, & target);
391
+ }else{
392
+ fread(target.typ, sizeof(long), target.n_bytes, db->typ);
393
+ printf("atom_number : %d\n", target.typ[0]);
330
394
  }
331
- //printf("\n");
332
395
  }
396
+ target_free_db(& target);
397
+ state_free(& state);
333
398
  }
334
399
 
335
- static VALUE subcomp_match_by_ullmann(VALUE self, VALUE a_matrix, VALUE pa, VALUE other_adj, VALUE pb, VALUE match){
336
- // variables for adjacency list of graph A
337
- int num_adj[1000];
338
- long * point[1000];
339
- long adj[3000];//adjacency list
400
+ static void
401
+ query_setup(VALUE mol, struct Query * query){
402
+ VALUE atom_type_str;
403
+ VALUE adj_index;
404
+ VALUE edges;
340
405
 
341
- // match matrix; = pa * (n_words * pa)
342
- long * mm;//[800000];
343
- long * m;
406
+ int i, j, k;
344
407
 
345
- //temporary variables
346
- int i;
347
- int result;
348
- VALUE mapping;
408
+ // allocating and setting atom type
409
+ atom_type_str = rb_funcall(mol, rb_intern("typ_str"), 0);
410
+ Check_Type(atom_type_str, T_STRING);
349
411
 
350
- int n_pb, n_pa;
351
- int n_words;
352
- int sizeof_mm;
412
+ query->len = RSTRING(atom_type_str)->len / sizeof(long);
413
+ query->type = (long *)talloc(query->len * sizeof(long));
414
+ memcpy(query->type, RSTRING(atom_type_str)->ptr, sizeof(long) * query->len);
353
415
 
354
- n_pb = NUM2INT(pb);
355
- n_pa = NUM2INT(pa);
356
-
357
- if(n_pb > n_pa){
358
- return Qfalse;
359
- }
416
+ // allocatting and setting index
417
+ adj_index = rb_funcall(mol, rb_intern("adjacent_index"), 0);
418
+ Check_Type(adj_index, T_ARRAY);
360
419
 
361
- sizeof_mm = n_pa * (n_pb + 1) * n_words;
420
+ edges = rb_funcall(mol, rb_intern("edges"), 0);
421
+ Check_Type(edges, T_ARRAY);
362
422
 
363
- n_words = (n_pa - 1) / (sizeof(int) * 8) + 1;
423
+ query->edge_len = RARRAY(edges)->len;
364
424
 
365
- mm = (long * )malloc(sizeof(long) * 800000);
366
- if(RSTRING(match)->len > 800000 * sizeof(long))
367
- rb_raise(rb_eArgError, "Length of match matrix too short! %d", sizeof(mm));
368
-
369
- memcpy(mm, (long *)RSTRING(match)->ptr, RSTRING(match)->len); // BUG!!
425
+ query->ptr = (int **) talloc(query->len * sizeof(int **) );
426
+ query->num = (int * ) talloc(query->len * sizeof(int * ) );
427
+ query->idx = (int * ) talloc(query->edge_len * sizeof(int * ) * 2 );
370
428
 
371
- Check_Type(a_matrix, T_STRING);
429
+ k = 0;
430
+ for(i = 0 ; i < query->len ; i++){
431
+ Check_Type(rb_ary_entry(adj_index, i), T_ARRAY);
432
+ query->num[i] = RARRAY(rb_ary_entry(adj_index, i))->len;
433
+ query->ptr[i] = query->idx + k;
434
+ for(j = 0 ; j < query->num[i] ; j++){
435
+ Check_Type(rb_ary_entry(rb_ary_entry(adj_index, i), j), T_FIXNUM);
436
+ query->idx[k] = FIX2INT(rb_ary_entry(rb_ary_entry(adj_index, i), j));
437
+ k++;
438
+ }
439
+ }
372
440
 
373
- set_adjacency(num_adj, point, adj, other_adj);
441
+ }
374
442
 
375
- //show(mm, n_pa, n_pb);
376
- //show((long *)RSTRING(a_matrix)->ptr, n_pa, n_pa);
443
+ static void
444
+ query_free(struct Query * query){
445
+ free(query->type);
446
+ free(query->ptr);
447
+ free(query->num);
448
+ free(query->idx);
449
+
450
+ query->type = NULL;
451
+ query->ptr = NULL;
452
+ query->num = NULL;
453
+ query->idx = NULL;
454
+ }
377
455
 
378
- result = matchN(num_adj, point, (long *)RSTRING(a_matrix)->ptr, mm, n_pb, n_pa);
456
+ static VALUE
457
+ db_search(VALUE self, VALUE database_name, VALUE q_mol, VALUE block)
458
+ {
459
+ char * filename;
460
+ struct CompoundDB db;
461
+ struct Query query;
379
462
 
380
- if(result == 1){//?
381
- mapping = rb_ary_new();
382
- //printf("n_words : %d n_pa : %d n_pb : %d n_words * n_pa * n_pa : %d", n_words, n_pa, n_pb, n_words * n_pa * n_pa);
383
- //show(mm + n_words * n_pb * n_pb, n_pb, n_pa);
384
-
385
- for(i = 0 ; i < n_pb ; i++){
386
- rb_ary_push(mapping, INT2FIX(ntz_m(mm + n_words * n_pb * n_pb + i * n_words, n_pa)));
387
- }
388
- return mapping;
463
+ filename = StringValuePtr(database_name);
464
+
465
+ if(strlen(filename) > 40){
466
+ rb_raise(rb_eException, "length of database name must less than 40!");
389
467
  }
390
- return Qfalse;
468
+
469
+ query_setup(q_mol, & query);
470
+
471
+ db.mat = db_file_open(filename, ".mat");
472
+ db.idx = db_file_open(filename, ".idx");
473
+ db.typ = db_file_open(filename, ".typ");
474
+
475
+ db_load(& db, & query);
476
+
477
+ query_free(& query);
478
+
479
+ fclose(db.mat);
480
+ fclose(db.idx);
481
+ fclose(db.typ);
391
482
  }
392
483
 
393
- // DataBase for substructure search
484
+ static void
485
+ target_setup(VALUE t_mol, struct Target * target){
486
+ VALUE bit_mat;
487
+ VALUE bit_str;
488
+ VALUE atom_types;
394
489
 
395
- struct dbmdata {
396
- int di_size;
397
- };
490
+ int i;
491
+
492
+ atom_types = rb_funcall(t_mol, rb_intern("typ_str"), 0);
493
+ Check_Type(atom_types, T_STRING);
494
+
495
+ target->n_bits = RSTRING(atom_types)->len / sizeof(long);
496
+ target->typ = (long *)talloc(target->n_bits * sizeof(long));
497
+ memcpy(target->typ, RSTRING(atom_types)->ptr, target->n_bits * sizeof(long));
498
+
499
+ /*
500
+ * Set up adjacency matrix
501
+ */
502
+ bit_mat = rb_funcall(t_mol, rb_intern("bit_mat"), 0);
503
+ bit_str = rb_funcall(bit_mat, rb_intern("bit_str"), 0);
504
+
505
+ target->n_bytes = NBYTES(target->n_bits);
398
506
 
399
- static VALUE sdb_s_search(VALUE dbname){
400
- rb_p(dbname);
507
+ target->mat = (long *)talloc(target->n_bytes * target->n_bits * sizeof(long));
508
+ memcpy(target->mat, RSTRING(bit_str)->ptr, RSTRING(bit_str)->len);
401
509
  }
402
510
 
403
- Init_subcomp(){
404
- VALUE subcomp_cGraph;
405
- VALUE subcomp_cSubGraphDB;
511
+ static void
512
+ target_free(struct Target * target){
513
+ free(target->typ);
514
+ free(target->mat);
515
+ }
516
+
517
+ static VALUE
518
+ mol_by_mol(VALUE self, VALUE q_mol, VALUE t_mol)
519
+ {
520
+ struct Query query;
521
+ struct Target target;
522
+ struct State state;
523
+ VALUE result;
524
+
525
+ target.max_length = 0;
526
+ state.max_length = 0;
527
+
528
+ query_setup( q_mol, & query );
529
+ target_setup( t_mol, & target );
406
530
 
407
- subcomp_cGraph = rb_define_module("Graph");
408
- rb_define_method(subcomp_cGraph, "subcomp_match_by_ullmann", subcomp_match_by_ullmann, 5);
531
+ state_allocate(& state, & query, & target);
409
532
 
410
- subcomp_cSubGraphDB = rb_define_class_under(subcomp_cGraph, "SubGraphDB", rb_cObject);
533
+ if(rb_block_given_p() == Qtrue){
534
+ state_setup_block(& state);
535
+ }
536
+ else{
537
+ state_setup(& state, & query, & target);
538
+ }
539
+
540
+ search_by_ullmann(& state, & query, & target);
541
+ result = state_get_result(& state);
542
+
543
+ query_free(& query);
544
+ target_free(& target);
545
+ state_free(& state);
546
+
547
+ return result;
548
+ }
411
549
 
412
- rb_define_method(subcomp_cSubGraphDB, "open_for_search", sdb_s_search, 0);
550
+ void Init_subcomp(){
551
+ VALUE subcomp_mChem;
413
552
 
414
- rb_define_singleton_method(subcomp_cSubGraphDB, "show", subcomp_show, 3);
415
- rb_define_singleton_method(subcomp_cSubGraphDB, "match", subcomp_match_by_ullmann, 5);
553
+ subcomp_mChem = rb_define_module("Chem");
554
+ rb_define_singleton_method(subcomp_mChem, "match_by_ullmann", mol_by_mol, 2);
555
+ rb_define_singleton_method(subcomp_mChem, "db_search", db_search, 2);
556
+ //define_bitdb_method();
416
557
  }