chemruby 0.9.3 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +2 -2
- data/Rakefile +67 -63
- data/ext/extconf.rb +2 -0
- data/ext/subcomp.c +461 -320
- data/ext/utils.c +56 -0
- data/ext/utils.h +13 -0
- data/lib/chem.rb +34 -8
- data/lib/chem/db.rb +8 -0
- data/lib/chem/db/cansmi.rb +1 -1
- data/lib/chem/db/cdx.rb +1 -1
- data/lib/chem/db/cml.rb +52 -0
- data/lib/chem/db/gd.rb +64 -0
- data/lib/chem/db/gspan.rb +2 -2
- data/lib/chem/db/kcf_rpair.rb +34 -0
- data/lib/chem/db/kegg.rb +35 -1
- data/lib/chem/db/mdl.rb +75 -34
- data/lib/chem/db/opsin.rb +24 -0
- data/lib/chem/db/pdb.rb +105 -0
- data/lib/chem/db/pdf.rb +2 -0
- data/lib/chem/db/pubchem.rb +1071 -88
- data/lib/chem/db/rmagick.rb +5 -3
- data/lib/chem/db/sdf.rb +28 -2
- data/lib/chem/db/smiles/smiles.ry +27 -25
- data/lib/chem/db/smiles/smiparser.rb +29 -27
- data/lib/chem/db/types/type_gd.rb +35 -0
- data/lib/chem/db/types/type_gspan.rb +2 -2
- data/lib/chem/db/types/type_kcf.rb +19 -0
- data/lib/chem/db/types/type_kegg.rb +2 -0
- data/lib/chem/db/types/type_mdl.rb +1 -1
- data/lib/chem/db/types/type_png.rb +5 -1
- data/lib/chem/db/types/type_rdf.rb +22 -0
- data/lib/chem/db/types/type_xyz.rb +1 -1
- data/lib/chem/db/vector.rb +19 -3
- data/lib/chem/model.rb +5 -2
- data/lib/chem/utils.rb +17 -1
- data/lib/chem/utils/bitdb.rb +49 -0
- data/lib/chem/utils/cas.rb +28 -0
- data/lib/chem/utils/cdk.rb +403 -0
- data/lib/chem/utils/fingerprint.rb +98 -0
- data/lib/chem/utils/geometry.rb +8 -0
- data/lib/chem/utils/net.rb +303 -0
- data/lib/chem/utils/once.rb +28 -0
- data/lib/chem/utils/openbabel.rb +204 -0
- data/lib/chem/utils/sssr.rb +33 -25
- data/lib/chem/utils/sub.rb +6 -0
- data/lib/chem/utils/transform.rb +9 -8
- data/lib/chem/utils/ullmann.rb +138 -95
- data/lib/graph.rb +5 -6
- data/lib/graph/utils.rb +8 -0
- data/sample/calc_maximum_common_subgraph.rb +27 -0
- data/sample/calc_properties.rb +9 -0
- data/sample/data/atp.mol +69 -0
- data/sample/data/pioglitazone.mol +58 -0
- data/sample/data/rosiglitazone.mol +55 -0
- data/sample/data/troglitazone.mol +70 -0
- data/sample/find_compound_by_keggapi.rb +19 -0
- data/sample/generate_inchi.rb +7 -0
- data/sample/generate_substructurekey.rb +11 -0
- data/sample/images/ex6.rb +17 -0
- data/sample/images/ex7.rb +18 -0
- data/sample/iupac2mol.rb +8 -0
- data/sample/kekule.rb +13 -0
- data/sample/logp.rb +4 -0
- data/sample/mcs.rb +13 -0
- data/sample/mol2pdf.rb +8 -0
- data/sample/pubchem_fetch.rb +8 -0
- data/sample/pubchem_search.rb +12 -0
- data/sample/rosiglitazone.mol +57 -0
- data/sample/smarts.rb +10 -0
- data/sample/structure_match.rb +8 -0
- data/sample/structure_match_color.rb +22 -0
- data/sample/thiazolidinedione.mol +19 -0
- data/sample/troglitazone.mol +232 -0
- data/sample/vicinity.rb +8 -0
- data/test/data/CID_704.sdf +236 -0
- data/test/data/CID_994.sdf +146 -0
- data/test/data/db_EXPT03276.txt +321 -0
- data/test/data/pioglitazone.mol +58 -0
- data/test/data/rosiglitazone.mol +55 -0
- data/test/data/thiazolidinedione.mol +19 -0
- data/test/data/troglitazone.mol +70 -0
- data/test/{test_adj.rb → tc_adj.rb} +0 -0
- data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
- data/test/tc_casrn.rb +17 -0
- data/test/tc_cdk.rb +89 -0
- data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
- data/test/{test_chem.rb → tc_chem.rb} +0 -0
- data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
- data/test/{test_db.rb → tc_db.rb} +0 -0
- data/test/tc_develop.rb +38 -0
- data/test/tc_drugbank.rb +13 -0
- data/test/{test_eps.rb → tc_eps.rb} +0 -0
- data/test/tc_gd.rb +8 -0
- data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
- data/test/tc_graph.rb +15 -0
- data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
- data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
- data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
- data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
- data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
- data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
- data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
- data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
- data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
- data/test/tc_net.rb +5 -0
- data/test/tc_once.rb +29 -0
- data/test/tc_openbabel.rb +57 -0
- data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
- data/test/{test_prop.rb → tc_prop.rb} +1 -1
- data/test/tc_pubchem.rb +32 -0
- data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
- data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
- data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
- data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
- data/test/tc_sssr.rb +1 -0
- data/test/{test_sub.rb → tc_sub.rb} +0 -0
- data/test/tc_subcomp.rb +59 -0
- data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
- data/test/{test_writer.rb → tc_writer.rb} +0 -0
- data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
- data/test/ts_current.rb +11 -0
- data/test/ts_image.rb +6 -0
- data/test/ts_main.rb +12 -0
- metadata +259 -194
- data/lib/chem/utils/graph_db.rb +0 -146
- data/test/test_sssr.rb +0 -18
- data/test/test_subcomp.rb +0 -37
data/README
CHANGED
@@ -52,7 +52,7 @@ For testing and developing ChemRuby:
|
|
52
52
|
|
53
53
|
== INSTALL
|
54
54
|
|
55
|
-
In the chemruby source directory (such as chemruby-x.x.x/), run
|
55
|
+
In the chemruby source directory (such as chemruby-x.x.x/), run setup.rb
|
56
56
|
as follows:
|
57
57
|
|
58
58
|
% ruby setup.rb config
|
@@ -109,7 +109,7 @@ Note that, setup.rb included in the ChemRuby package comes from
|
|
109
109
|
|
110
110
|
License of This README file can be also distributed under the Ruby's license.
|
111
111
|
|
112
|
-
Copyright (C) 2006 TANAKA Nobuya <
|
112
|
+
Copyright (C) 2006 TANAKA Nobuya <t@chemruby.org>
|
113
113
|
KATAYAMA Toshiaki <k@bioruby.org>
|
114
114
|
|
115
115
|
== CONTACT
|
data/Rakefile
CHANGED
@@ -8,12 +8,15 @@
|
|
8
8
|
|
9
9
|
require 'rake/clean'
|
10
10
|
require 'rake/testtask'
|
11
|
-
|
11
|
+
|
12
|
+
require "rake/gempackagetask"
|
13
|
+
require 'rubygems'
|
12
14
|
|
13
15
|
task :default => [:help]
|
14
16
|
|
15
|
-
PKG_VERSION = "0.9.3"
|
16
17
|
PKG_BUILD = "RC1"
|
18
|
+
PKG_VERSION = "1.1.9"
|
19
|
+
|
17
20
|
|
18
21
|
PKG_FILES = FileList[
|
19
22
|
"Rakefile", "README", #"ChangeLog", "Releases", "TODO",
|
@@ -26,6 +29,7 @@ PKG_FILES = FileList[
|
|
26
29
|
"lib/**/*.rb",
|
27
30
|
"lib/**/*.ry",
|
28
31
|
"test/**/*",
|
32
|
+
"temp/",
|
29
33
|
"sample/**/*.rb",
|
30
34
|
"sample/**/*.mol",
|
31
35
|
"ext/**/*.h",
|
@@ -37,76 +41,30 @@ PKG_FILES = FileList[
|
|
37
41
|
# "test/**/*"
|
38
42
|
]
|
39
43
|
|
40
|
-
task :help do |t|
|
41
|
-
puts <<EOL
|
42
|
-
|
43
|
-
ChemRuby #{PKG_VERSION}
|
44
|
-
|
45
|
-
To install ChemRuby, you need at least
|
46
|
-
|
47
|
-
* ruby-1.8.2 (or later)
|
48
|
-
* Ruby header files (included in original Ruby)
|
49
|
-
* C language compilers (such as gcc)
|
50
|
-
|
51
|
-
If the following modules are installed, ChemRuby will use it.
|
52
|
-
You can install them later.
|
53
|
-
|
54
|
-
* RMagick ( You will find how to install them in http://www.chemruby.org)
|
55
|
-
|
56
|
-
== Compiling and Installing
|
57
|
-
|
58
|
-
% rake compile
|
59
|
-
% sudo rake install
|
60
|
-
|
61
|
-
or just
|
62
|
-
|
63
|
-
% sudo ruby setup.rb
|
64
|
-
|
65
|
-
== Compiling RDOC
|
66
|
-
|
67
|
-
% rake doc
|
68
|
-
|
69
|
-
== Test
|
70
|
-
|
71
|
-
% rake test
|
72
|
-
|
73
|
-
You will need RMagick and other libraries to pass all the tests.
|
74
|
-
|
75
|
-
EOL
|
76
|
-
|
77
|
-
end
|
78
|
-
|
79
44
|
task :doc do |t|
|
80
45
|
system "rdoc --main README ./lib README"
|
81
46
|
end
|
82
47
|
|
83
48
|
|
84
|
-
task :dev => [:
|
49
|
+
task :dev => [:compile]
|
85
50
|
Rake::TestTask.new(:dev) do |t|
|
86
51
|
t.libs << File.join('ext')
|
87
52
|
t.libs << File.join('lib')
|
88
|
-
t.
|
89
|
-
t.libs << File.join('dev/ext')
|
90
|
-
# cd 'dev/ext/chem/db/inchi/' do
|
91
|
-
# ruby %{extconf.rb}
|
92
|
-
# sh "make"
|
93
|
-
# end
|
94
|
-
t.test_files = FileList['dev/test/test*.rb']
|
53
|
+
t.test_files = FileList['test/ts_current.rb']
|
95
54
|
end
|
96
55
|
|
97
56
|
task :test => [:compile]
|
98
57
|
Rake::TestTask.new(:test) do |t|
|
99
58
|
t.libs << File.join('ext')
|
100
59
|
t.libs << File.join('lib')
|
101
|
-
t.test_files = FileList['test/
|
60
|
+
t.test_files = FileList['test/ts_main.rb']
|
102
61
|
end
|
103
62
|
|
104
63
|
task :light => [:compile]
|
105
64
|
Rake::TestTask.new(:light) do |t|
|
106
65
|
t.libs << File.join('ext')
|
107
66
|
t.libs << File.join('lib')
|
108
|
-
t.test_files = FileList['test/
|
109
|
-
#'test/test_kegg.rb'# 'test/test_kcf_glycan.rb' #FileList['test/test_canonical_smiles.rb']
|
67
|
+
t.test_files = FileList['test/tc_sssr.rb']
|
110
68
|
end
|
111
69
|
|
112
70
|
task :rm do
|
@@ -179,17 +137,63 @@ end
|
|
179
137
|
desc "Compiling library"
|
180
138
|
task :compile => ['lib/chem/db/smiles/smiparser.rb', 'lib/chem/db/iupac/iuparser.rb', 'lib/chem/db/linucs/linparser.rb', "ext/subcomp.#{Config::CONFIG["DLEXT"]}"]
|
181
139
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
140
|
+
begin
|
141
|
+
require 'rake/gempackagetask'
|
142
|
+
|
143
|
+
spec = Gem::Specification.new do |s|
|
144
|
+
s.name = 'chemruby'
|
145
|
+
s.version = PKG_VERSION
|
146
|
+
s.require_path = 'lib'
|
147
|
+
s.autorequire = 'chem'
|
148
|
+
s.files = PKG_FILES
|
149
|
+
s.extensions << 'ext/extconf.rb'
|
150
|
+
s.summary = "A framework program for cheminformatics"
|
151
|
+
end
|
152
|
+
|
153
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
154
|
+
pkg.need_tar = true
|
155
|
+
pkg.need_tar_gz = true
|
156
|
+
pkg.package_files += PKG_FILES
|
157
|
+
end
|
158
|
+
rescue
|
159
|
+
puts 'Install RubyGems to make gem'
|
190
160
|
end
|
191
161
|
|
192
|
-
|
193
|
-
|
194
|
-
|
162
|
+
task :help do |t|
|
163
|
+
puts <<EOL
|
164
|
+
|
165
|
+
ChemRuby #{PKG_VERSION}
|
166
|
+
|
167
|
+
To install ChemRuby, you need at least
|
168
|
+
|
169
|
+
* ruby-1.8.2 (or later)
|
170
|
+
* Ruby header files (included in original Ruby)
|
171
|
+
* C language compilers (such as gcc)
|
172
|
+
|
173
|
+
If the following modules are installed, ChemRuby will use it.
|
174
|
+
You can install them later.
|
175
|
+
|
176
|
+
* RMagick ( You will find how to install them in http://www.chemruby.org)
|
177
|
+
|
178
|
+
== Compiling and Installing
|
179
|
+
|
180
|
+
% rake compile
|
181
|
+
% sudo rake install
|
182
|
+
|
183
|
+
or just
|
184
|
+
|
185
|
+
% sudo ruby setup.rb
|
186
|
+
|
187
|
+
== Compiling RDOC
|
188
|
+
|
189
|
+
% rake doc
|
190
|
+
|
191
|
+
== Test
|
192
|
+
|
193
|
+
% rake test
|
194
|
+
|
195
|
+
You will need RMagick and other libraries to pass all the tests.
|
196
|
+
|
197
|
+
EOL
|
198
|
+
|
195
199
|
end
|
data/ext/extconf.rb
CHANGED
data/ext/subcomp.c
CHANGED
@@ -4,413 +4,554 @@
|
|
4
4
|
|
5
5
|
$Author: nobyt $
|
6
6
|
|
7
|
-
Copyright (C) 2004-
|
7
|
+
Copyright (C) 2004-2007 Nobuya Tanaka
|
8
8
|
|
9
9
|
**********************************************************************/
|
10
10
|
|
11
|
-
#define FULL 0xffffffff
|
12
|
-
#define ZERO 0x0
|
13
|
-
|
14
|
-
#define FAIL 0;
|
15
|
-
#define SUCCESS 1;
|
16
|
-
|
17
11
|
#include <ruby.h>
|
12
|
+
// #include "bitdb.h"
|
13
|
+
#include "utils.h"
|
18
14
|
|
19
15
|
static void
|
20
|
-
show(long *
|
21
|
-
|
22
|
-
int
|
23
|
-
|
24
|
-
int n_words;
|
25
|
-
|
26
|
-
n_words = (pb - 1) / (sizeof(int) * 8) + 1;
|
16
|
+
show(long * l, int h, int w){
|
17
|
+
int i, j;
|
18
|
+
int counter = 0;
|
19
|
+
int n_bytes;
|
27
20
|
|
28
|
-
|
21
|
+
n_bytes = NBYTES(w);
|
29
22
|
|
30
|
-
printf("
|
31
|
-
for(i = 0 ; i <
|
23
|
+
printf(" ");
|
24
|
+
for(i = 0 ; i < w ; i++){
|
32
25
|
printf("%d", i % 10);
|
33
26
|
}
|
34
27
|
printf("\n");
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
else
|
42
|
-
printf(".");
|
43
|
-
}
|
44
|
-
//printf(" ");
|
28
|
+
|
29
|
+
for(i = 0 ; i < h ; i++){
|
30
|
+
printf("%3d ", i);
|
31
|
+
for(j = 0 ; j < n_bytes ; j++){
|
32
|
+
dump_long(l[counter], (j == n_bytes - 1) ? ((w - 1) % ARCH + 1) : ARCH);
|
33
|
+
counter++;
|
45
34
|
}
|
46
35
|
printf("\n");
|
47
36
|
}
|
48
|
-
printf("\n");
|
49
37
|
}
|
50
38
|
|
51
|
-
|
52
|
-
*
|
53
|
-
* SubGraphDB.show -> print out adjacency matrix
|
54
|
-
*
|
55
|
-
* This function is mainly for debug.
|
56
|
-
*/
|
57
|
-
|
58
|
-
static VALUE
|
59
|
-
subcomp_show(VALUE self, VALUE str, VALUE pa, VALUE pb)
|
39
|
+
static FILE *
|
40
|
+
db_file_open(const char * filename, const char * extension)
|
60
41
|
{
|
61
|
-
|
62
|
-
|
63
|
-
|
42
|
+
FILE * fp;
|
43
|
+
char new_filename[50];
|
44
|
+
|
45
|
+
strncpy(new_filename, filename, sizeof(new_filename) - 5);
|
46
|
+
strncat(new_filename, extension, sizeof(new_filename) - strlen(extension) - 1);
|
47
|
+
|
48
|
+
fp = fopen(new_filename, "r");
|
49
|
+
|
50
|
+
if(fp == NULL){
|
51
|
+
rb_raise(rb_eException, "File can not open");
|
52
|
+
}
|
53
|
+
return fp;
|
64
54
|
}
|
65
55
|
|
56
|
+
struct CompoundDB{
|
57
|
+
FILE * mat;
|
58
|
+
FILE * idx;
|
59
|
+
FILE * typ;
|
60
|
+
};
|
66
61
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
static int ntz_m(long *y, int pb){
|
71
|
-
int i = 0;
|
72
|
-
int n;
|
73
|
-
long x;
|
62
|
+
struct Query{
|
63
|
+
int len;
|
64
|
+
int edge_len;
|
74
65
|
|
75
|
-
|
66
|
+
long * type;
|
67
|
+
int ** ptr;
|
68
|
+
int * num;
|
69
|
+
int * idx;
|
70
|
+
};
|
76
71
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
}
|
72
|
+
struct Target{
|
73
|
+
int n_bits;
|
74
|
+
int n_bytes;
|
81
75
|
|
82
|
-
|
76
|
+
int max_length;
|
83
77
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
78
|
+
long * mat;
|
79
|
+
long * typ;
|
80
|
+
};
|
81
|
+
|
82
|
+
struct State{
|
83
|
+
int height;
|
84
|
+
int width;
|
85
|
+
int n_bytes;
|
86
|
+
|
87
|
+
int max_length;
|
88
|
+
int length;
|
89
|
+
long * mat;
|
90
|
+
int depth;
|
91
|
+
|
92
|
+
long * res;
|
93
|
+
int res_counter;
|
94
|
+
int res_max_len;
|
95
|
+
};
|
96
|
+
|
97
|
+
struct Record{
|
98
|
+
int n_bits;
|
99
|
+
int n_bytes;
|
100
|
+
int mat_pos;
|
101
|
+
int information;
|
102
|
+
};
|
90
103
|
|
91
|
-
|
92
|
-
int
|
104
|
+
query_dump(struct Query * query){
|
105
|
+
int i, j;
|
93
106
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
if((x & 0x00000003) == 0) {n = n + 2 ; x = x >> 2;}
|
100
|
-
return n - (x & 1);
|
107
|
+
for(i = 0 ; i < query->len ; i++){
|
108
|
+
for(j = 0 ; j < query->num[i] ; j++){
|
109
|
+
printf("query->ptr[%d][%d] = %d\n", i, j, query->ptr[i][j]);
|
110
|
+
}
|
111
|
+
}
|
101
112
|
}
|
102
113
|
|
103
|
-
static
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
114
|
+
static void
|
115
|
+
target_free_db(struct Target * target)
|
116
|
+
{
|
117
|
+
free(target->mat);
|
118
|
+
target->mat = NULL;
|
119
|
+
free(target->typ);
|
120
|
+
target->typ = NULL;
|
121
|
+
}
|
122
|
+
|
123
|
+
static void
|
124
|
+
target_setup_db(struct Target * target, struct Record * record)
|
125
|
+
{
|
126
|
+
target->n_bits = record->n_bits;
|
127
|
+
target->n_bytes = record->n_bytes;
|
128
|
+
if(target->max_length < (record->n_bits * record->n_bytes)){
|
129
|
+
if(target->max_length != 0){ target_free_db(target); }
|
130
|
+
|
131
|
+
target->mat = talloc(sizeof(long) * record->n_bits * record->n_bytes);
|
132
|
+
target->typ = talloc(sizeof(long) * record->n_bits);
|
133
|
+
target->max_length = record->n_bits * record->n_bytes;
|
108
134
|
}
|
109
|
-
return ntz(x[i]) + words;
|
110
135
|
}
|
111
136
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
137
|
+
static void
|
138
|
+
state_push_result(struct State * state)
|
139
|
+
{
|
140
|
+
if(state->res_max_len < state->res_counter){
|
141
|
+
state->res_max_len = state->res_max_len * 2;
|
142
|
+
state->res = (long *) trealloc(state->res, state->res_max_len);
|
143
|
+
}
|
144
|
+
memcpy(state->res + state->res_counter * state->length * sizeof(long),
|
145
|
+
state->mat,
|
146
|
+
state->height * state->n_bytes * sizeof(long));
|
147
|
+
state->res_counter++;
|
148
|
+
}
|
122
149
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
0xffdfffff,
|
146
|
-
0xffbfffff,
|
147
|
-
0xff7fffff,
|
148
|
-
0xfeffffff,
|
149
|
-
0xfdffffff,
|
150
|
-
0xfbffffff,
|
151
|
-
0xf7ffffff,
|
152
|
-
0xefffffff,
|
153
|
-
0xdfffffff,
|
154
|
-
0xbfffffff,
|
155
|
-
0x7fffffff,
|
156
|
-
};
|
150
|
+
static VALUE
|
151
|
+
state_get_result(struct State * state)
|
152
|
+
{
|
153
|
+
VALUE result_array;
|
154
|
+
VALUE tmp;
|
155
|
+
int i, j;
|
156
|
+
int counter;
|
157
|
+
|
158
|
+
result_array = rb_ary_new();
|
159
|
+
|
160
|
+
for(i = 0 ; i < state->res_counter ; i++){
|
161
|
+
tmp = rb_ary_new();
|
162
|
+
counter = i * state->n_bytes * state->height * sizeof(long);
|
163
|
+
for(j = 0 ; j < state->height ; j++){
|
164
|
+
rb_ary_push(tmp,
|
165
|
+
INT2FIX(m_ntz(state->res + counter + j * state->n_bytes,
|
166
|
+
state->n_bytes)));
|
167
|
+
}
|
168
|
+
rb_ary_push(result_array, tmp);
|
169
|
+
}
|
170
|
+
return result_array;
|
171
|
+
}
|
157
172
|
|
158
|
-
|
159
|
-
|
173
|
+
static void
|
174
|
+
state_free(struct State * state)
|
160
175
|
{
|
161
|
-
|
162
|
-
|
163
|
-
|
176
|
+
free(state->mat);
|
177
|
+
free(state->res);
|
178
|
+
state->mat = NULL;
|
179
|
+
}
|
164
180
|
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
int
|
181
|
+
static void
|
182
|
+
state_allocate(struct State * state, struct Query * query, struct Target * target)
|
183
|
+
{
|
184
|
+
int i;
|
169
185
|
|
170
|
-
|
171
|
-
|
186
|
+
state->height = query->len;
|
187
|
+
state->width = target->n_bits;
|
188
|
+
state->n_bytes = target->n_bytes;
|
189
|
+
state->res_counter = 0;
|
172
190
|
|
173
|
-
|
174
|
-
int n_words;// number of words needed for storing 'pb' bits.
|
175
|
-
long refine_mm;// pointer for mm(match matrix) used in refinment step.
|
191
|
+
if(state->max_length < query->len * target->n_bytes){
|
176
192
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
for(i = 0 ; i < 10 ; i++)
|
182
|
-
f[i] = 0;
|
193
|
+
if(state->max_length != 0){
|
194
|
+
printf("state->free called max_length : %d\n", state->max_length);
|
195
|
+
state_free(state);
|
196
|
+
}
|
183
197
|
|
184
|
-
|
198
|
+
state->mat = (long *)talloc((query->len + 2) *// Depth
|
199
|
+
target->n_bytes * // Width
|
200
|
+
state->height * // Height
|
201
|
+
sizeof(long)); // sizeof(long)
|
202
|
+
|
203
|
+
state->res_max_len = (query->len + 2) *// Depth
|
204
|
+
target->n_bytes * // Width
|
205
|
+
state->height * // Height
|
206
|
+
sizeof(long) * 100;
|
207
|
+
state->res = (long *)talloc(state->res_max_len); // sizeof(long)
|
208
|
+
state->max_length = query->len * target->n_bytes;
|
209
|
+
}
|
210
|
+
state->length = query->len * target->n_bytes;
|
211
|
+
state->depth = -1;
|
185
212
|
|
186
|
-
|
187
|
-
|
213
|
+
for(i = 0 ; i < state->length ; i++){ state->mat[i] = 0;}
|
214
|
+
}
|
188
215
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
216
|
+
static void
|
217
|
+
state_setup(struct State * state, struct Query * query, struct Target * target)
|
218
|
+
{
|
219
|
+
int i, j;
|
220
|
+
for(i = 0 ; i < query->len ; i++){
|
221
|
+
for(j = 0 ; j < target->n_bits ; j++){
|
222
|
+
if (query->type[i] == target->typ[j]){
|
223
|
+
BITON(state->mat, i, j, target->n_bytes);
|
224
|
+
}
|
198
225
|
}
|
226
|
+
}
|
227
|
+
}
|
199
228
|
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
/* printf("k : %d d: %d\n", k, d); */
|
209
|
-
for(j = 0 ; j < n_words ; j++){
|
210
|
-
if(j == (k / 32)){
|
211
|
-
for(i = 0 ; i < pa ; i++){
|
212
|
-
mm[i * n_words + j] = mm[(i - pa) * n_words + j] & reverse_bit[k - (k / 32) * 32];
|
213
|
-
}
|
214
|
-
mm[d * n_words + j] = bit_mask[k - (k / 32) * 32];
|
215
|
-
}else{
|
216
|
-
for(i = 0 ; i < pa ; i++){
|
217
|
-
mm[i * n_words + j] = mm[(i - pa) * n_words + j];
|
218
|
-
}
|
219
|
-
mm[d * n_words + j] = ZERO;
|
229
|
+
static void
|
230
|
+
state_setup_block(struct State * state)
|
231
|
+
{
|
232
|
+
int i, j;
|
233
|
+
for(i = 0 ; i < state->height ; i++){
|
234
|
+
for(j = 0 ; j < state->width ; j++){
|
235
|
+
if (rb_yield_values(2, INT2FIX(i), INT2FIX(j))){
|
236
|
+
BITON(state->mat, i, j, state->n_bytes);
|
220
237
|
}
|
221
238
|
}
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
239
|
+
}
|
240
|
+
}
|
241
|
+
|
242
|
+
static void
|
243
|
+
state_push(struct State * state)
|
244
|
+
{
|
245
|
+
memmove(state->mat + state->length,
|
246
|
+
state->mat,
|
247
|
+
state->length * sizeof(long) );
|
248
|
+
state->mat += state->length;
|
249
|
+
state->depth++;
|
250
|
+
}
|
251
|
+
|
252
|
+
static void
|
253
|
+
state_pop(struct State * state)
|
254
|
+
{
|
255
|
+
state->mat -= state->length;
|
256
|
+
state->depth--;
|
257
|
+
}
|
258
|
+
|
259
|
+
inline static long
|
260
|
+
has_bit(long * mat, int height, int width, int n_bytes){
|
261
|
+
return (mat[height * n_bytes + width / ARCH] & (1 << (width % ARCH)));
|
262
|
+
}
|
263
|
+
|
264
|
+
/*
|
265
|
+
* Hot spot
|
266
|
+
*/
|
267
|
+
inline static void
|
268
|
+
refine(struct State * state, struct Query * query, struct Target * target){
|
269
|
+
int i, j, k, l, m, bit_removed;
|
270
|
+
bit_removed = 1;
|
271
|
+
while(bit_removed){
|
272
|
+
bit_removed = 0;// false
|
273
|
+
for(i = 0 ; i < query->len ; i++){
|
274
|
+
for(j = 0 ; j < target->n_bits ; j++){
|
275
|
+
if(has_bit(state->mat, i, j, target->n_bytes)){
|
276
|
+
for(k = 0 ; k < query->num[i] ; k++){
|
277
|
+
m = 0;
|
278
|
+
for(l = 0 ; l < target->n_bytes ; l++){
|
279
|
+
if((state->mat[query->ptr[i][k] * target->n_bytes + l] &
|
280
|
+
target->mat[j * target->n_bytes + l]) != 0){
|
281
|
+
m++;
|
282
|
+
}
|
246
283
|
}
|
247
|
-
if(
|
248
|
-
|
249
|
-
|
284
|
+
if(m == 0){
|
285
|
+
BITOFF(state->mat, i, j, target->n_bytes);
|
286
|
+
bit_removed = 1;
|
250
287
|
}
|
251
288
|
}
|
252
|
-
|
253
289
|
}
|
254
|
-
kk++;
|
255
290
|
}
|
256
|
-
// Idea for optimization
|
257
|
-
// every 32 bit is tested here.
|
258
|
-
kk = 0;
|
259
|
-
dd++;
|
260
291
|
}
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
292
|
+
}
|
293
|
+
}
|
294
|
+
|
295
|
+
static void
|
296
|
+
state_clear_bits(long * l, int h, int w, int n_bytes, int height){
|
297
|
+
int i;
|
298
|
+
for(i = 0 ; i < n_bytes ; i++){ l[i + h * n_bytes] = 0; }
|
299
|
+
for(i = 0 ; i < height ; i++){ BITOFF(l, i, w, n_bytes); }
|
300
|
+
BITON(l, h, w, n_bytes);
|
301
|
+
}
|
302
|
+
|
303
|
+
#define TRUE 1
|
304
|
+
#define FALSE 0
|
305
|
+
|
306
|
+
inline static int
|
307
|
+
state_is_valid(struct State * state){
|
308
|
+
int i, j, n_bytes, flag;
|
309
|
+
// n_bytes = NBYTES(state->length);
|
310
|
+
for(i = 0 ; i < state->height ; i++){
|
311
|
+
flag = 0;
|
312
|
+
for(j = 0 ; j < state->n_bytes ; j++){
|
313
|
+
if(state->mat[i * state->n_bytes + j] != 0){
|
314
|
+
flag++;
|
274
315
|
}
|
275
316
|
}
|
317
|
+
if(flag == 0)
|
318
|
+
return FALSE;
|
319
|
+
}
|
320
|
+
return TRUE;
|
321
|
+
}
|
276
322
|
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
while(k > pb){
|
301
|
-
if(d == 0){
|
302
|
-
return FAIL;
|
323
|
+
static void
|
324
|
+
search_by_ullmann(struct State * state, struct Query * query, struct Target * target){
|
325
|
+
int k;
|
326
|
+
// Idea for optimization
|
327
|
+
//show(state->mat, query->len, target->n_bits);
|
328
|
+
if(state->depth == state->height - 1){
|
329
|
+
//printf("FOUND!\n");
|
330
|
+
state_push_result(state);
|
331
|
+
//show(state->mat, query->len, target->n_bits);
|
332
|
+
}else{
|
333
|
+
for(k = 0 ; k < target->n_bits ; k++){
|
334
|
+
if(has_bit(state->mat,
|
335
|
+
state->depth + 1,
|
336
|
+
k,
|
337
|
+
target->n_bytes)){
|
338
|
+
state_push(state);
|
339
|
+
state_clear_bits(state->mat, state->depth, k, target->n_bytes, query->len);
|
340
|
+
//show(state->mat, query->len, target->n_bits);
|
341
|
+
refine(state, query, target);
|
342
|
+
//show(state->mat, query->len, target->n_bits);
|
343
|
+
if(state_is_valid(state) == TRUE){
|
344
|
+
//show(state->mat, query->len, target->n_bits);
|
345
|
+
search_by_ullmann(state, query, target);
|
303
346
|
}
|
304
|
-
|
305
|
-
k = f[d];
|
306
|
-
h[k / 32] &= reverse_bit[k - (k / 32) * 32];//remove bit
|
307
|
-
k++;
|
308
|
-
while(h[k / 32] & bit_mask[k - (k / 32) * 32])
|
309
|
-
k++;
|
347
|
+
state_pop(state);
|
310
348
|
}
|
311
|
-
h[k / 32] |= bit_mask[k - (k / 32) * 32];//add bit
|
312
349
|
}
|
313
350
|
}
|
314
|
-
//printf("d : %d k : %d FAIL!\n", d, k);
|
315
|
-
return FAIL;
|
316
351
|
}
|
317
352
|
|
318
|
-
static void
|
319
|
-
|
320
|
-
|
353
|
+
static void
|
354
|
+
db_load(struct CompoundDB * db, struct Query * query){
|
355
|
+
|
356
|
+
int new_n_bits;
|
357
|
+
int new_n_bytes;
|
358
|
+
int mat_ptr;
|
359
|
+
|
360
|
+
struct Target target;
|
361
|
+
struct State state;
|
362
|
+
struct Record record;
|
363
|
+
|
364
|
+
int i, j;
|
321
365
|
|
322
|
-
|
366
|
+
target.n_bits = 0;
|
367
|
+
target.n_bytes = 0;
|
368
|
+
target.max_length = 0;
|
369
|
+
state.max_length = 0;
|
323
370
|
|
324
|
-
for(
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
371
|
+
for(;;){
|
372
|
+
if(feof(db->idx) || feof(db->mat) || feof(db->mat)){
|
373
|
+
printf("Database broken!\n");
|
374
|
+
return;
|
375
|
+
}
|
376
|
+
|
377
|
+
fread(& record, sizeof(struct Record), 1, db->idx);
|
378
|
+
if(record.n_bits == -1){
|
379
|
+
return;
|
380
|
+
}
|
381
|
+
target_setup_db(& target, & record);
|
382
|
+
if(record.information != -1){
|
383
|
+
|
384
|
+
fread(target.mat, sizeof(long), target.n_bits * target.n_bytes, db->mat);
|
385
|
+
fread(target.typ, sizeof(long), target.n_bits, db->typ);
|
386
|
+
|
387
|
+
state_allocate(& state, query, & target);
|
388
|
+
state_setup(& state, query, & target);
|
389
|
+
//show(state.mat, query->len, target.n_bits);
|
390
|
+
search_by_ullmann(& state, query, & target);
|
391
|
+
}else{
|
392
|
+
fread(target.typ, sizeof(long), target.n_bytes, db->typ);
|
393
|
+
printf("atom_number : %d\n", target.typ[0]);
|
330
394
|
}
|
331
|
-
//printf("\n");
|
332
395
|
}
|
396
|
+
target_free_db(& target);
|
397
|
+
state_free(& state);
|
333
398
|
}
|
334
399
|
|
335
|
-
static
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
400
|
+
static void
|
401
|
+
query_setup(VALUE mol, struct Query * query){
|
402
|
+
VALUE atom_type_str;
|
403
|
+
VALUE adj_index;
|
404
|
+
VALUE edges;
|
340
405
|
|
341
|
-
|
342
|
-
long * mm;//[800000];
|
343
|
-
long * m;
|
406
|
+
int i, j, k;
|
344
407
|
|
345
|
-
//
|
346
|
-
|
347
|
-
|
348
|
-
VALUE mapping;
|
408
|
+
// allocating and setting atom type
|
409
|
+
atom_type_str = rb_funcall(mol, rb_intern("typ_str"), 0);
|
410
|
+
Check_Type(atom_type_str, T_STRING);
|
349
411
|
|
350
|
-
|
351
|
-
|
352
|
-
|
412
|
+
query->len = RSTRING(atom_type_str)->len / sizeof(long);
|
413
|
+
query->type = (long *)talloc(query->len * sizeof(long));
|
414
|
+
memcpy(query->type, RSTRING(atom_type_str)->ptr, sizeof(long) * query->len);
|
353
415
|
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
if(n_pb > n_pa){
|
358
|
-
return Qfalse;
|
359
|
-
}
|
416
|
+
// allocatting and setting index
|
417
|
+
adj_index = rb_funcall(mol, rb_intern("adjacent_index"), 0);
|
418
|
+
Check_Type(adj_index, T_ARRAY);
|
360
419
|
|
361
|
-
|
420
|
+
edges = rb_funcall(mol, rb_intern("edges"), 0);
|
421
|
+
Check_Type(edges, T_ARRAY);
|
362
422
|
|
363
|
-
|
423
|
+
query->edge_len = RARRAY(edges)->len;
|
364
424
|
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
memcpy(mm, (long *)RSTRING(match)->ptr, RSTRING(match)->len); // BUG!!
|
425
|
+
query->ptr = (int **) talloc(query->len * sizeof(int **) );
|
426
|
+
query->num = (int * ) talloc(query->len * sizeof(int * ) );
|
427
|
+
query->idx = (int * ) talloc(query->edge_len * sizeof(int * ) * 2 );
|
370
428
|
|
371
|
-
|
429
|
+
k = 0;
|
430
|
+
for(i = 0 ; i < query->len ; i++){
|
431
|
+
Check_Type(rb_ary_entry(adj_index, i), T_ARRAY);
|
432
|
+
query->num[i] = RARRAY(rb_ary_entry(adj_index, i))->len;
|
433
|
+
query->ptr[i] = query->idx + k;
|
434
|
+
for(j = 0 ; j < query->num[i] ; j++){
|
435
|
+
Check_Type(rb_ary_entry(rb_ary_entry(adj_index, i), j), T_FIXNUM);
|
436
|
+
query->idx[k] = FIX2INT(rb_ary_entry(rb_ary_entry(adj_index, i), j));
|
437
|
+
k++;
|
438
|
+
}
|
439
|
+
}
|
372
440
|
|
373
|
-
|
441
|
+
}
|
374
442
|
|
375
|
-
|
376
|
-
|
443
|
+
static void
|
444
|
+
query_free(struct Query * query){
|
445
|
+
free(query->type);
|
446
|
+
free(query->ptr);
|
447
|
+
free(query->num);
|
448
|
+
free(query->idx);
|
449
|
+
|
450
|
+
query->type = NULL;
|
451
|
+
query->ptr = NULL;
|
452
|
+
query->num = NULL;
|
453
|
+
query->idx = NULL;
|
454
|
+
}
|
377
455
|
|
378
|
-
|
456
|
+
static VALUE
|
457
|
+
db_search(VALUE self, VALUE database_name, VALUE q_mol, VALUE block)
|
458
|
+
{
|
459
|
+
char * filename;
|
460
|
+
struct CompoundDB db;
|
461
|
+
struct Query query;
|
379
462
|
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
for(i = 0 ; i < n_pb ; i++){
|
386
|
-
rb_ary_push(mapping, INT2FIX(ntz_m(mm + n_words * n_pb * n_pb + i * n_words, n_pa)));
|
387
|
-
}
|
388
|
-
return mapping;
|
463
|
+
filename = StringValuePtr(database_name);
|
464
|
+
|
465
|
+
if(strlen(filename) > 40){
|
466
|
+
rb_raise(rb_eException, "length of database name must less than 40!");
|
389
467
|
}
|
390
|
-
|
468
|
+
|
469
|
+
query_setup(q_mol, & query);
|
470
|
+
|
471
|
+
db.mat = db_file_open(filename, ".mat");
|
472
|
+
db.idx = db_file_open(filename, ".idx");
|
473
|
+
db.typ = db_file_open(filename, ".typ");
|
474
|
+
|
475
|
+
db_load(& db, & query);
|
476
|
+
|
477
|
+
query_free(& query);
|
478
|
+
|
479
|
+
fclose(db.mat);
|
480
|
+
fclose(db.idx);
|
481
|
+
fclose(db.typ);
|
391
482
|
}
|
392
483
|
|
393
|
-
|
484
|
+
static void
|
485
|
+
target_setup(VALUE t_mol, struct Target * target){
|
486
|
+
VALUE bit_mat;
|
487
|
+
VALUE bit_str;
|
488
|
+
VALUE atom_types;
|
394
489
|
|
395
|
-
|
396
|
-
|
397
|
-
|
490
|
+
int i;
|
491
|
+
|
492
|
+
atom_types = rb_funcall(t_mol, rb_intern("typ_str"), 0);
|
493
|
+
Check_Type(atom_types, T_STRING);
|
494
|
+
|
495
|
+
target->n_bits = RSTRING(atom_types)->len / sizeof(long);
|
496
|
+
target->typ = (long *)talloc(target->n_bits * sizeof(long));
|
497
|
+
memcpy(target->typ, RSTRING(atom_types)->ptr, target->n_bits * sizeof(long));
|
498
|
+
|
499
|
+
/*
|
500
|
+
* Set up adjacency matrix
|
501
|
+
*/
|
502
|
+
bit_mat = rb_funcall(t_mol, rb_intern("bit_mat"), 0);
|
503
|
+
bit_str = rb_funcall(bit_mat, rb_intern("bit_str"), 0);
|
504
|
+
|
505
|
+
target->n_bytes = NBYTES(target->n_bits);
|
398
506
|
|
399
|
-
|
400
|
-
|
507
|
+
target->mat = (long *)talloc(target->n_bytes * target->n_bits * sizeof(long));
|
508
|
+
memcpy(target->mat, RSTRING(bit_str)->ptr, RSTRING(bit_str)->len);
|
401
509
|
}
|
402
510
|
|
403
|
-
|
404
|
-
|
405
|
-
|
511
|
+
static void
|
512
|
+
target_free(struct Target * target){
|
513
|
+
free(target->typ);
|
514
|
+
free(target->mat);
|
515
|
+
}
|
516
|
+
|
517
|
+
static VALUE
|
518
|
+
mol_by_mol(VALUE self, VALUE q_mol, VALUE t_mol)
|
519
|
+
{
|
520
|
+
struct Query query;
|
521
|
+
struct Target target;
|
522
|
+
struct State state;
|
523
|
+
VALUE result;
|
524
|
+
|
525
|
+
target.max_length = 0;
|
526
|
+
state.max_length = 0;
|
527
|
+
|
528
|
+
query_setup( q_mol, & query );
|
529
|
+
target_setup( t_mol, & target );
|
406
530
|
|
407
|
-
|
408
|
-
rb_define_method(subcomp_cGraph, "subcomp_match_by_ullmann", subcomp_match_by_ullmann, 5);
|
531
|
+
state_allocate(& state, & query, & target);
|
409
532
|
|
410
|
-
|
533
|
+
if(rb_block_given_p() == Qtrue){
|
534
|
+
state_setup_block(& state);
|
535
|
+
}
|
536
|
+
else{
|
537
|
+
state_setup(& state, & query, & target);
|
538
|
+
}
|
539
|
+
|
540
|
+
search_by_ullmann(& state, & query, & target);
|
541
|
+
result = state_get_result(& state);
|
542
|
+
|
543
|
+
query_free(& query);
|
544
|
+
target_free(& target);
|
545
|
+
state_free(& state);
|
546
|
+
|
547
|
+
return result;
|
548
|
+
}
|
411
549
|
|
412
|
-
|
550
|
+
void Init_subcomp(){
|
551
|
+
VALUE subcomp_mChem;
|
413
552
|
|
414
|
-
|
415
|
-
rb_define_singleton_method(
|
553
|
+
subcomp_mChem = rb_define_module("Chem");
|
554
|
+
rb_define_singleton_method(subcomp_mChem, "match_by_ullmann", mol_by_mol, 2);
|
555
|
+
rb_define_singleton_method(subcomp_mChem, "db_search", db_search, 2);
|
556
|
+
//define_bitdb_method();
|
416
557
|
}
|