rejuicer 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +8 -0
- data/README.rdoc +191 -0
- data/Rakefile +28 -0
- data/ext/extconf.rb +2 -0
- data/ext/rejuicer_set.c +405 -0
- data/ext/rejuicer_set.h +79 -0
- data/lib/rejuicer.rb +70 -0
- metadata +86 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,191 @@
|
|
1
|
+
= Rejuicer
|
2
|
+
|
3
|
+
= Description
|
4
|
+
Rejuicer is easy search engine.
|
5
|
+
|
6
|
+
= How to
|
7
|
+
|
8
|
+
== Initialize and index target set
|
9
|
+
index = Rejuicer.new(:odd_flag, :remainder_3, :remainder_5)
|
10
|
+
|
11
|
+
== Indexing
|
12
|
+
work = Struct.new(:id, :odd_flag, :remainder_3, :remainder_5)
|
13
|
+
mother = (0...10000).inject([]){|m, i| m << work.new(i, i % 2 == 0, i % 3, i % 5)}
|
14
|
+
|
15
|
+
index.set(mother)
|
16
|
+
|
17
|
+
== Search
|
18
|
+
index.search(:odd_flag => false) #=> [1,3,5,7,9...,9997,9999]
|
19
|
+
index.search(:remainder_3 => 2, :remainder_5 => 4) #=> [14,29,44,59,...,9974,9989]
|
20
|
+
|
21
|
+
== INSTALL:
|
22
|
+
|
23
|
+
sudo gem install rejuicer
|
24
|
+
|
25
|
+
== LICENSE:
|
26
|
+
|
27
|
+
GNU LESSER GENERAL PUBLIC LICENSE
|
28
|
+
Version 3, 29 June 2007
|
29
|
+
|
30
|
+
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
31
|
+
Everyone is permitted to copy and distribute verbatim copies
|
32
|
+
of this license document, but changing it is not allowed.
|
33
|
+
|
34
|
+
|
35
|
+
This version of the GNU Lesser General Public License incorporates
|
36
|
+
the terms and conditions of version 3 of the GNU General Public
|
37
|
+
License, supplemented by the additional permissions listed below.
|
38
|
+
|
39
|
+
0. Additional Definitions.
|
40
|
+
|
41
|
+
As used herein, "this License" refers to version 3 of the GNU Lesser
|
42
|
+
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
43
|
+
General Public License.
|
44
|
+
|
45
|
+
"The Library" refers to a covered work governed by this License,
|
46
|
+
other than an Application or a Combined Work as defined below.
|
47
|
+
|
48
|
+
An "Application" is any work that makes use of an interface provided
|
49
|
+
by the Library, but which is not otherwise based on the Library.
|
50
|
+
Defining a subclass of a class defined by the Library is deemed a mode
|
51
|
+
of using an interface provided by the Library.
|
52
|
+
|
53
|
+
A "Combined Work" is a work produced by combining or linking an
|
54
|
+
Application with the Library. The particular version of the Library
|
55
|
+
with which the Combined Work was made is also called the "Linked
|
56
|
+
Version".
|
57
|
+
|
58
|
+
The "Minimal Corresponding Source" for a Combined Work means the
|
59
|
+
Corresponding Source for the Combined Work, excluding any source code
|
60
|
+
for portions of the Combined Work that, considered in isolation, are
|
61
|
+
based on the Application, and not on the Linked Version.
|
62
|
+
|
63
|
+
The "Corresponding Application Code" for a Combined Work means the
|
64
|
+
object code and/or source code for the Application, including any data
|
65
|
+
and utility programs needed for reproducing the Combined Work from the
|
66
|
+
Application, but excluding the System Libraries of the Combined Work.
|
67
|
+
|
68
|
+
1. Exception to Section 3 of the GNU GPL.
|
69
|
+
|
70
|
+
You may convey a covered work under sections 3 and 4 of this License
|
71
|
+
without being bound by section 3 of the GNU GPL.
|
72
|
+
|
73
|
+
2. Conveying Modified Versions.
|
74
|
+
|
75
|
+
If you modify a copy of the Library, and, in your modifications, a
|
76
|
+
facility refers to a function or data to be supplied by an Application
|
77
|
+
that uses the facility (other than as an argument passed when the
|
78
|
+
facility is invoked), then you may convey a copy of the modified
|
79
|
+
version:
|
80
|
+
|
81
|
+
a) under this License, provided that you make a good faith effort to
|
82
|
+
ensure that, in the event an Application does not supply the
|
83
|
+
function or data, the facility still operates, and performs
|
84
|
+
whatever part of its purpose remains meaningful, or
|
85
|
+
|
86
|
+
b) under the GNU GPL, with none of the additional permissions of
|
87
|
+
this License applicable to that copy.
|
88
|
+
|
89
|
+
3. Object Code Incorporating Material from Library Header Files.
|
90
|
+
|
91
|
+
The object code form of an Application may incorporate material from
|
92
|
+
a header file that is part of the Library. You may convey such object
|
93
|
+
code under terms of your choice, provided that, if the incorporated
|
94
|
+
material is not limited to numerical parameters, data structure
|
95
|
+
layouts and accessors, or small macros, inline functions and templates
|
96
|
+
(ten or fewer lines in length), you do both of the following:
|
97
|
+
|
98
|
+
a) Give prominent notice with each copy of the object code that the
|
99
|
+
Library is used in it and that the Library and its use are
|
100
|
+
covered by this License.
|
101
|
+
|
102
|
+
b) Accompany the object code with a copy of the GNU GPL and this license
|
103
|
+
document.
|
104
|
+
|
105
|
+
4. Combined Works.
|
106
|
+
|
107
|
+
You may convey a Combined Work under terms of your choice that,
|
108
|
+
taken together, effectively do not restrict modification of the
|
109
|
+
portions of the Library contained in the Combined Work and reverse
|
110
|
+
engineering for debugging such modifications, if you also do each of
|
111
|
+
the following:
|
112
|
+
|
113
|
+
a) Give prominent notice with each copy of the Combined Work that
|
114
|
+
the Library is used in it and that the Library and its use are
|
115
|
+
covered by this License.
|
116
|
+
|
117
|
+
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
118
|
+
document.
|
119
|
+
|
120
|
+
c) For a Combined Work that displays copyright notices during
|
121
|
+
execution, include the copyright notice for the Library among
|
122
|
+
these notices, as well as a reference directing the user to the
|
123
|
+
copies of the GNU GPL and this license document.
|
124
|
+
|
125
|
+
d) Do one of the following:
|
126
|
+
|
127
|
+
0) Convey the Minimal Corresponding Source under the terms of this
|
128
|
+
License, and the Corresponding Application Code in a form
|
129
|
+
suitable for, and under terms that permit, the user to
|
130
|
+
recombine or relink the Application with a modified version of
|
131
|
+
the Linked Version to produce a modified Combined Work, in the
|
132
|
+
manner specified by section 6 of the GNU GPL for conveying
|
133
|
+
Corresponding Source.
|
134
|
+
|
135
|
+
1) Use a suitable shared library mechanism for linking with the
|
136
|
+
Library. A suitable mechanism is one that (a) uses at run time
|
137
|
+
a copy of the Library already present on the user's computer
|
138
|
+
system, and (b) will operate properly with a modified version
|
139
|
+
of the Library that is interface-compatible with the Linked
|
140
|
+
Version.
|
141
|
+
|
142
|
+
e) Provide Installation Information, but only if you would otherwise
|
143
|
+
be required to provide such information under section 6 of the
|
144
|
+
GNU GPL, and only to the extent that such information is
|
145
|
+
necessary to install and execute a modified version of the
|
146
|
+
Combined Work produced by recombining or relinking the
|
147
|
+
Application with a modified version of the Linked Version. (If
|
148
|
+
you use option 4d0, the Installation Information must accompany
|
149
|
+
the Minimal Corresponding Source and Corresponding Application
|
150
|
+
Code. If you use option 4d1, you must provide the Installation
|
151
|
+
Information in the manner specified by section 6 of the GNU GPL
|
152
|
+
for conveying Corresponding Source.)
|
153
|
+
|
154
|
+
5. Combined Libraries.
|
155
|
+
|
156
|
+
You may place library facilities that are a work based on the
|
157
|
+
Library side by side in a single library together with other library
|
158
|
+
facilities that are not Applications and are not covered by this
|
159
|
+
License, and convey such a combined library under terms of your
|
160
|
+
choice, if you do both of the following:
|
161
|
+
|
162
|
+
a) Accompany the combined library with a copy of the same work based
|
163
|
+
on the Library, uncombined with any other library facilities,
|
164
|
+
conveyed under the terms of this License.
|
165
|
+
|
166
|
+
b) Give prominent notice with the combined library that part of it
|
167
|
+
is a work based on the Library, and explaining where to find the
|
168
|
+
accompanying uncombined form of the same work.
|
169
|
+
|
170
|
+
6. Revised Versions of the GNU Lesser General Public License.
|
171
|
+
|
172
|
+
The Free Software Foundation may publish revised and/or new versions
|
173
|
+
of the GNU Lesser General Public License from time to time. Such new
|
174
|
+
versions will be similar in spirit to the present version, but may
|
175
|
+
differ in detail to address new problems or concerns.
|
176
|
+
|
177
|
+
Each version is given a distinguishing version number. If the
|
178
|
+
Library as you received it specifies that a certain numbered version
|
179
|
+
of the GNU Lesser General Public License "or any later version"
|
180
|
+
applies to it, you have the option of following the terms and
|
181
|
+
conditions either of that published version or of any later version
|
182
|
+
published by the Free Software Foundation. If the Library as you
|
183
|
+
received it does not specify a version number of the GNU Lesser
|
184
|
+
General Public License, you may choose any version of the GNU Lesser
|
185
|
+
General Public License ever published by the Free Software Foundation.
|
186
|
+
|
187
|
+
If the Library as you received it specifies that a proxy can decide
|
188
|
+
whether future versions of the GNU Lesser General Public License shall
|
189
|
+
apply, that proxy's public statement of acceptance of any version is
|
190
|
+
permanent authorization for you to choose that version for the
|
191
|
+
Library.
|
data/Rakefile
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
%w[rubygems rake rake/clean fileutils newgem rubigen].each { |f| require f }
|
2
|
+
require File.dirname(__FILE__) + '/lib/rejuicer'
|
3
|
+
|
4
|
+
# Generate all the Rake tasks
|
5
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
6
|
+
$hoe = Hoe.new('rejuicer', Rejuicer.version) do |p|
|
7
|
+
p.developer('Tsukasa OISHI', 'tsukasa.oishi@gmail.com')
|
8
|
+
p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
|
9
|
+
p.rubyforge_name = p.name # TODO this is default value
|
10
|
+
p.extra_dev_deps = [
|
11
|
+
['newgem', ">= #{::Newgem::VERSION}"]
|
12
|
+
]
|
13
|
+
|
14
|
+
pec_extras = {
|
15
|
+
:extensions => ['ext/extconf.rb'],
|
16
|
+
}
|
17
|
+
|
18
|
+
p.clean_globs |= %w[**/.DS_Store tmp *.log]
|
19
|
+
path = (p.rubyforge_name == p.name) ? p.rubyforge_name : "\#{p.rubyforge_name}/\#{p.name}"
|
20
|
+
p.remote_rdoc_dir = File.join(path.gsub(/^#{p.rubyforge_name}\/?/,''), 'rdoc')
|
21
|
+
p.rsync_args = '-av --delete --ignore-errors'
|
22
|
+
end
|
23
|
+
|
24
|
+
require 'newgem/tasks' # load /tasks/*.rake
|
25
|
+
Dir['tasks/**/*.rake'].each { |t| load t }
|
26
|
+
|
27
|
+
# TODO - want other tests/tasks run by default? Add them to the list
|
28
|
+
# task :default => [:spec, :features]
|
data/ext/extconf.rb
ADDED
data/ext/rejuicer_set.c
ADDED
@@ -0,0 +1,405 @@
|
|
1
|
+
//************************************
|
2
|
+
// rejuicer.c
|
3
|
+
//
|
4
|
+
// Tsukasa OISHI
|
5
|
+
//
|
6
|
+
// 2010/03/17
|
7
|
+
//************************************
|
8
|
+
|
9
|
+
#include <stdio.h>
|
10
|
+
#include <stdlib.h>
|
11
|
+
#include <string.h>
|
12
|
+
#include <ruby.h>
|
13
|
+
#include "rejuicer_set.h"
|
14
|
+
|
15
|
+
//
|
16
|
+
// initialize
|
17
|
+
//
|
18
|
+
void init_root_node(root_node root)
|
19
|
+
{
|
20
|
+
int i;
|
21
|
+
|
22
|
+
for(i = 0; i < ROOT_NODE_SIZE; i++) {
|
23
|
+
root->index[i] = (void*)NULL;
|
24
|
+
}
|
25
|
+
root->size = 0;
|
26
|
+
root->children_size = 0;
|
27
|
+
}
|
28
|
+
|
29
|
+
void *init_branch_node(int level, unsigned int value)
|
30
|
+
{
|
31
|
+
return init_branch_node2(level, (value / INDEX_PER_SIZE[level - 1]));
|
32
|
+
}
|
33
|
+
|
34
|
+
void *init_branch_node2(int level, unsigned int num)
|
35
|
+
{
|
36
|
+
int i;
|
37
|
+
branch_node branch;
|
38
|
+
|
39
|
+
if (!(branch = (branch_node)malloc(sizeof(struct _branch_node)))) {
|
40
|
+
rb_raise(rb_eStandardError, "memory is not enough");
|
41
|
+
}
|
42
|
+
|
43
|
+
for(i = 0; i < BRANCH_NODE_SIZE; i++) {
|
44
|
+
branch->index[i] = (void*)NULL;
|
45
|
+
}
|
46
|
+
|
47
|
+
branch->level = level;
|
48
|
+
branch->num = num;
|
49
|
+
branch->children_size = 0;
|
50
|
+
|
51
|
+
return (void*)branch;
|
52
|
+
}
|
53
|
+
|
54
|
+
void *init_leaf_node(unsigned int value)
|
55
|
+
{
|
56
|
+
return init_leaf_node2(value / INDEX_PER_SIZE[LAST_BRANCH_LEVEL]);
|
57
|
+
}
|
58
|
+
|
59
|
+
void *init_leaf_node2(unsigned int num)
|
60
|
+
{
|
61
|
+
leaf_node leaf;
|
62
|
+
|
63
|
+
if(!(leaf = (leaf_node)malloc(sizeof(struct _leaf_node)))) {
|
64
|
+
rb_raise(rb_eStandardError, "memory is not enough");
|
65
|
+
}
|
66
|
+
|
67
|
+
leaf->num = num;
|
68
|
+
leaf->data = 0;
|
69
|
+
|
70
|
+
return (void*)leaf;
|
71
|
+
}
|
72
|
+
|
73
|
+
//
|
74
|
+
// insert element into set
|
75
|
+
//
|
76
|
+
void add_num(root_node root, unsigned int value)
|
77
|
+
{
|
78
|
+
unsigned int quotient, remainder;
|
79
|
+
|
80
|
+
quotient = value / INDEX_PER_SIZE[0];
|
81
|
+
remainder = value % INDEX_PER_SIZE[0];
|
82
|
+
|
83
|
+
if(!(root->index[quotient])) {
|
84
|
+
root->index[quotient] = init_branch_node(1, quotient);
|
85
|
+
root->children_size++;
|
86
|
+
}
|
87
|
+
|
88
|
+
if (search_and_insert((branch_node)root->index[quotient], 1, remainder, value)) {
|
89
|
+
root->size++;
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
int search_and_insert(branch_node branch, int level, unsigned int value, unsigned int original)
|
94
|
+
{
|
95
|
+
unsigned int quotient, remainder;
|
96
|
+
|
97
|
+
quotient = value / INDEX_PER_SIZE[level];
|
98
|
+
remainder = value % INDEX_PER_SIZE[level];
|
99
|
+
|
100
|
+
if(!(branch->index[quotient])) {
|
101
|
+
branch->children_size++;
|
102
|
+
if(level == LAST_BRANCH_LEVEL) {
|
103
|
+
branch->index[quotient] = init_leaf_node(original);
|
104
|
+
} else {
|
105
|
+
branch->index[quotient] = init_branch_node(level + 1, original);
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
if (level == LAST_BRANCH_LEVEL) {
|
110
|
+
return search_and_insert_at_leaf((leaf_node)branch->index[quotient], remainder);
|
111
|
+
} else {
|
112
|
+
return search_and_insert((branch_node)branch->index[quotient], level + 1, remainder, original);
|
113
|
+
}
|
114
|
+
}
|
115
|
+
|
116
|
+
int search_and_insert_at_leaf(leaf_node leaf, unsigned int value)
|
117
|
+
{
|
118
|
+
int exist_flag = 0;
|
119
|
+
unsigned int target_bit;
|
120
|
+
|
121
|
+
target_bit = 1 << value;
|
122
|
+
|
123
|
+
if (!(leaf->data & target_bit)) {
|
124
|
+
leaf->data |= target_bit;
|
125
|
+
exist_flag = 1;
|
126
|
+
}
|
127
|
+
|
128
|
+
return exist_flag;
|
129
|
+
}
|
130
|
+
|
131
|
+
//
|
132
|
+
// output Array object from internal set
|
133
|
+
//
|
134
|
+
void to_array(root_node root, VALUE array)
|
135
|
+
{
|
136
|
+
int i, count;
|
137
|
+
|
138
|
+
for(i = 0, count = 0; i < ROOT_NODE_SIZE || count < root->children_size; i++) {
|
139
|
+
if (root->index[i]) {
|
140
|
+
search_and_get_array((branch_node)root->index[i], array);
|
141
|
+
count++;
|
142
|
+
}
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
void search_and_get_array(branch_node branch, VALUE array)
|
147
|
+
{
|
148
|
+
int i, count;
|
149
|
+
|
150
|
+
if (branch->level == LAST_BRANCH_LEVEL) {
|
151
|
+
for(i = 0, count = 0; i < BRANCH_NODE_SIZE || count < branch->children_size; i++) {
|
152
|
+
if (branch->index[i]) {
|
153
|
+
search_and_get_array_at_leaf((leaf_node)branch->index[i], array);
|
154
|
+
count++;
|
155
|
+
}
|
156
|
+
}
|
157
|
+
} else {
|
158
|
+
for(i = 0, count = 0; i < BRANCH_NODE_SIZE || count < branch->children_size; i++) {
|
159
|
+
if (branch->index[i]) {
|
160
|
+
search_and_get_array((branch_node)branch->index[i], array);
|
161
|
+
count++;
|
162
|
+
}
|
163
|
+
}
|
164
|
+
}
|
165
|
+
}
|
166
|
+
|
167
|
+
void search_and_get_array_at_leaf(leaf_node leaf, VALUE array)
|
168
|
+
{
|
169
|
+
int i = 0;
|
170
|
+
unsigned int x;
|
171
|
+
|
172
|
+
x = leaf->data;
|
173
|
+
|
174
|
+
while(x) {
|
175
|
+
if (x & 1) {
|
176
|
+
rb_ary_push(array, UINT2NUM(leaf->num * INDEX_PER_SIZE[LAST_BRANCH_LEVEL] + i));
|
177
|
+
}
|
178
|
+
x = x >> 1;
|
179
|
+
i++;
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
//
|
184
|
+
// intersection
|
185
|
+
//
|
186
|
+
void intersection(root_node ret_set, root_node set0, root_node set1)
|
187
|
+
{
|
188
|
+
int i, count;
|
189
|
+
root_node base, other;
|
190
|
+
|
191
|
+
if (set0->size == 0 || set1->size == 0) {
|
192
|
+
return;
|
193
|
+
} else if (set0->size > set1->size) {
|
194
|
+
base = set1;
|
195
|
+
other = set0;
|
196
|
+
} else {
|
197
|
+
base = set0;
|
198
|
+
other = set1;
|
199
|
+
}
|
200
|
+
|
201
|
+
for (i = 0, count = 0; i < ROOT_NODE_SIZE || count < base->children_size; i++) {
|
202
|
+
if (base->index[i]) {
|
203
|
+
count++;
|
204
|
+
if (other->index[i]) {
|
205
|
+
ret_set->index[i] = init_branch_node2(((branch_node)base->index[i])->level, ((branch_node)base->index[i])->num);
|
206
|
+
intersection_branch_node(ret_set, (branch_node)ret_set->index[i], (branch_node)base->index[i], (branch_node)other->index[i]);
|
207
|
+
}
|
208
|
+
}
|
209
|
+
}
|
210
|
+
}
|
211
|
+
|
212
|
+
void intersection_branch_node(root_node root, branch_node ret_set, branch_node base, branch_node other)
|
213
|
+
{
|
214
|
+
int i, count;
|
215
|
+
|
216
|
+
for (i = 0, count = 0; i < BRANCH_NODE_SIZE || count < base->children_size; i++) {
|
217
|
+
if (base->index[i]) {
|
218
|
+
count++;
|
219
|
+
if (other->index[i]) {
|
220
|
+
if (base->level == LAST_BRANCH_LEVEL) {
|
221
|
+
ret_set->index[i] = init_leaf_node2(((leaf_node)base->index[i])->num);
|
222
|
+
intersection_leaf_node(root, (leaf_node)ret_set->index[i], (leaf_node)base->index[i], (leaf_node)other->index[i]);
|
223
|
+
} else {
|
224
|
+
ret_set->index[i] = init_branch_node2(((branch_node)base->index[i])->level, ((branch_node)base->index[i])->num);
|
225
|
+
intersection_branch_node(root, (branch_node)ret_set->index[i], (branch_node)base->index[i], (branch_node)other->index[i]);
|
226
|
+
}
|
227
|
+
}
|
228
|
+
}
|
229
|
+
}
|
230
|
+
}
|
231
|
+
|
232
|
+
void intersection_leaf_node(root_node root, leaf_node ret_set, leaf_node base, leaf_node other)
|
233
|
+
{
|
234
|
+
ret_set->data = base->data & other->data;
|
235
|
+
root->size += bit_count(ret_set->data);
|
236
|
+
}
|
237
|
+
|
238
|
+
unsigned int bit_count(unsigned int x)
|
239
|
+
{
|
240
|
+
x = x - ((x >> 1) & 0x55555555);
|
241
|
+
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
|
242
|
+
x = (x + (x >> 4)) & 0x0f0f0f0f;
|
243
|
+
x = x + (x >> 8);
|
244
|
+
x = x + (x >> 16);
|
245
|
+
return x & 0x0000003F;
|
246
|
+
}
|
247
|
+
|
248
|
+
//
|
249
|
+
// memory free
|
250
|
+
//
|
251
|
+
void destroy_all(root_node root)
|
252
|
+
{
|
253
|
+
int i, count;
|
254
|
+
|
255
|
+
for(i = 0, count = 0; i < ROOT_NODE_SIZE || count < root->children_size; i++) {
|
256
|
+
if (root->index[i]) {
|
257
|
+
destroy_branch((branch_node)root->index[i]);
|
258
|
+
count++;
|
259
|
+
}
|
260
|
+
}
|
261
|
+
free(root);
|
262
|
+
}
|
263
|
+
|
264
|
+
void destroy_branch(branch_node branch)
|
265
|
+
{
|
266
|
+
int i, count;
|
267
|
+
|
268
|
+
if (branch->level == LAST_BRANCH_LEVEL) {
|
269
|
+
for(i = 0, count = 0; i < BRANCH_NODE_SIZE || count < branch->children_size; i++) {
|
270
|
+
if (branch->index[i]) {
|
271
|
+
free((leaf_node)branch->index[i]);
|
272
|
+
count++;
|
273
|
+
}
|
274
|
+
}
|
275
|
+
} else {
|
276
|
+
for(i = 0, count = 0; i < BRANCH_NODE_SIZE || count < branch->children_size; i++) {
|
277
|
+
if (branch->index[i]) {
|
278
|
+
destroy_branch((branch_node)branch->index[i]);
|
279
|
+
count++;
|
280
|
+
}
|
281
|
+
}
|
282
|
+
}
|
283
|
+
free(branch);
|
284
|
+
}
|
285
|
+
|
286
|
+
//-----------------------------------------------------------
|
287
|
+
// Ruby Methods
|
288
|
+
// ----------------------------------------------------------
|
289
|
+
|
290
|
+
/**
|
291
|
+
* new
|
292
|
+
**/
|
293
|
+
static VALUE t_new(int argc, VALUE *argv, VALUE klass)
|
294
|
+
{
|
295
|
+
VALUE num;
|
296
|
+
VALUE obj;
|
297
|
+
root_node root;
|
298
|
+
|
299
|
+
obj = Data_Make_Struct(klass, struct _root_node, NULL, destroy_all, root);
|
300
|
+
init_root_node(root);
|
301
|
+
|
302
|
+
if (argc == 1) {
|
303
|
+
while((num = rb_ary_shift(argv[0])) != Qnil) {
|
304
|
+
add_num(root, NUM2UINT(num));
|
305
|
+
}
|
306
|
+
}
|
307
|
+
|
308
|
+
return obj;
|
309
|
+
}
|
310
|
+
|
311
|
+
/**
|
312
|
+
* add
|
313
|
+
**/
|
314
|
+
static VALUE t_add(VALUE self, VALUE value)
|
315
|
+
{
|
316
|
+
root_node root;
|
317
|
+
unsigned int num;
|
318
|
+
|
319
|
+
Data_Get_Struct(self, struct _root_node, root);
|
320
|
+
if ((num = NUM2UINT(value)) != Qnil) {
|
321
|
+
add_num(root, num);
|
322
|
+
}
|
323
|
+
|
324
|
+
return self;
|
325
|
+
}
|
326
|
+
|
327
|
+
/**
|
328
|
+
* intersection
|
329
|
+
**/
|
330
|
+
static VALUE t_intersection(VALUE self, VALUE array)
|
331
|
+
{
|
332
|
+
root_node set0, set1, ret_set;
|
333
|
+
VALUE ret;
|
334
|
+
|
335
|
+
ret = Data_Make_Struct(rb_cRejuicerSet, struct _root_node, NULL, destroy_all, ret_set);
|
336
|
+
init_root_node(ret_set);
|
337
|
+
|
338
|
+
Data_Get_Struct(self, struct _root_node, set0);
|
339
|
+
Data_Get_Struct(array, struct _root_node, set1);
|
340
|
+
|
341
|
+
intersection(ret_set, set0, set1);
|
342
|
+
|
343
|
+
return ret;
|
344
|
+
}
|
345
|
+
|
346
|
+
/**
|
347
|
+
* to_a
|
348
|
+
**/
|
349
|
+
static VALUE t_to_a(VALUE self)
|
350
|
+
{
|
351
|
+
int i;
|
352
|
+
root_node root;
|
353
|
+
VALUE array;
|
354
|
+
|
355
|
+
Data_Get_Struct(self, struct _root_node, root);
|
356
|
+
array = rb_ary_new2(root->size);
|
357
|
+
|
358
|
+
to_array(root, array);
|
359
|
+
|
360
|
+
return array;
|
361
|
+
}
|
362
|
+
|
363
|
+
/**
|
364
|
+
* size
|
365
|
+
**/
|
366
|
+
static VALUE t_size(VALUE self)
|
367
|
+
{
|
368
|
+
root_node root;
|
369
|
+
|
370
|
+
Data_Get_Struct(self, struct _root_node, root);
|
371
|
+
|
372
|
+
return INT2NUM(root->size);
|
373
|
+
}
|
374
|
+
|
375
|
+
/**
|
376
|
+
* empty?
|
377
|
+
**/
|
378
|
+
static VALUE t_empty(VALUE self)
|
379
|
+
{
|
380
|
+
root_node root;
|
381
|
+
|
382
|
+
Data_Get_Struct(self, struct _root_node, root);
|
383
|
+
|
384
|
+
if (root->size == 0) {
|
385
|
+
return Qtrue;
|
386
|
+
} else {
|
387
|
+
return Qfalse;
|
388
|
+
}
|
389
|
+
}
|
390
|
+
|
391
|
+
/**
|
392
|
+
* define class
|
393
|
+
**/
|
394
|
+
void Init_rejuicer_set(void) {
|
395
|
+
rb_cRejuicerSet = rb_define_class("RejuicerSet", rb_cObject);
|
396
|
+
rb_define_singleton_method(rb_cRejuicerSet, "new", t_new, -1);
|
397
|
+
rb_define_method(rb_cRejuicerSet, "add", t_add, 1);
|
398
|
+
rb_define_method(rb_cRejuicerSet, "intersection", t_intersection, 1);
|
399
|
+
rb_define_method(rb_cRejuicerSet, "to_a", t_to_a, 0);
|
400
|
+
rb_define_method(rb_cRejuicerSet, "size", t_size, 0);
|
401
|
+
rb_define_method(rb_cRejuicerSet, "empty?", t_empty, 0);
|
402
|
+
rb_define_alias(rb_cRejuicerSet, "<<", "add");
|
403
|
+
rb_define_alias(rb_cRejuicerSet, "&", "intersection");
|
404
|
+
rb_define_alias(rb_cRejuicerSet, "length", "size");
|
405
|
+
}
|
data/ext/rejuicer_set.h
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
//************************************
|
2
|
+
// rejuicer.h
|
3
|
+
//
|
4
|
+
// Tsukasa OISHI
|
5
|
+
//
|
6
|
+
// 2010/03/17
|
7
|
+
//************************************
|
8
|
+
|
9
|
+
// max children size at node
|
10
|
+
#define ROOT_NODE_SIZE 8
|
11
|
+
#define BRANCH_NODE_SIZE 256
|
12
|
+
// tree bottom level
|
13
|
+
#define LAST_BRANCH_LEVEL 3
|
14
|
+
|
15
|
+
// RejuicerSet class object
|
16
|
+
static VALUE rb_cRejuicerSet;
|
17
|
+
|
18
|
+
// size of a child
|
19
|
+
static unsigned int INDEX_PER_SIZE[] = {0x20000000, 0x200000, 0x2000, 0x20};
|
20
|
+
|
21
|
+
// leaf node
|
22
|
+
typedef struct _leaf_node {
|
23
|
+
unsigned int num; // offset
|
24
|
+
unsigned int data; // bit operation
|
25
|
+
} *leaf_node;
|
26
|
+
|
27
|
+
// branch node
|
28
|
+
typedef struct _branch_node {
|
29
|
+
unsigned int num; // offset
|
30
|
+
unsigned int level;
|
31
|
+
unsigned int children_size;
|
32
|
+
void *index[BRANCH_NODE_SIZE]; // children pointer
|
33
|
+
} *branch_node;
|
34
|
+
|
35
|
+
typedef struct _root_node {
|
36
|
+
unsigned int size; // number of elements in set
|
37
|
+
unsigned int children_size;
|
38
|
+
void *index[ROOT_NODE_SIZE]; // children pointer
|
39
|
+
} *root_node;
|
40
|
+
|
41
|
+
// initialize
|
42
|
+
void init_root_node(root_node);
|
43
|
+
void *init_branch_node(int, unsigned int);
|
44
|
+
void *init_branch_node2(int, unsigned int);
|
45
|
+
void *init_leaf_node(unsigned int);
|
46
|
+
void *init_leaf_node2(unsigned int);
|
47
|
+
|
48
|
+
// insert element into set
|
49
|
+
void add_num(root_node, unsigned int);
|
50
|
+
int search_and_insert(branch_node, int, unsigned int, unsigned int);
|
51
|
+
int search_and_insert_at_leaf(leaf_node, unsigned int);
|
52
|
+
|
53
|
+
// output Array object from internal set
|
54
|
+
void to_array(root_node, VALUE);
|
55
|
+
void search_and_get_array(branch_node, VALUE);
|
56
|
+
void search_and_get_array_at_leaf(leaf_node, VALUE);
|
57
|
+
|
58
|
+
// intersection
|
59
|
+
void intersection(root_node, root_node, root_node);
|
60
|
+
void intersection_branch_node(root_node, branch_node, branch_node, branch_node);
|
61
|
+
void intersection_leaf_node(root_node, leaf_node, leaf_node, leaf_node);
|
62
|
+
unsigned int bit_count(unsigned int);
|
63
|
+
|
64
|
+
// memory free
|
65
|
+
void destroy_all(root_node);
|
66
|
+
void destroy_branch(branch_node);
|
67
|
+
|
68
|
+
//-----------------------------------------------------------
|
69
|
+
// Ruby Methods
|
70
|
+
// ----------------------------------------------------------
|
71
|
+
|
72
|
+
static VALUE t_new(int, VALUE *, VALUE);
|
73
|
+
static VALUE t_add(VALUE, VALUE);
|
74
|
+
static VALUE t_intersection(VALUE, VALUE);
|
75
|
+
static VALUE t_to_a(VALUE);
|
76
|
+
static VALUE t_size(VALUE);
|
77
|
+
static VALUE t_empty(VALUE);
|
78
|
+
void Init_rejuicer_set(void);
|
79
|
+
|
data/lib/rejuicer.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'rejuicer_set'
|
2
|
+
#
|
3
|
+
# Rejuicer is easy search engine.
|
4
|
+
# RejuicerSet is used to set operation.
|
5
|
+
#
|
6
|
+
# work = Struct.new(:id, :odd_flag, :remainder_3, :remainder_5)
|
7
|
+
# mother = (0...10000).inject([]){|m, i| m << work.new(i, i % 2 == 0, i % 3, i % 5)}
|
8
|
+
#
|
9
|
+
# index = Rejuicer.new(:odd_flag, :remainder_3, :remainder_5)
|
10
|
+
# index.set(mother)
|
11
|
+
#
|
12
|
+
# index.search(:odd_flag => false) #=> [1,3,5,7,9...,9997,9999]
|
13
|
+
# index.search(:remainder_3 => 2, :remainder_5 => 4) #=> [14,29,44,59,...,9974,9989]
|
14
|
+
#
|
15
|
+
class Rejuicer
|
16
|
+
#
|
17
|
+
# args :
|
18
|
+
# methods or attributes name
|
19
|
+
# that index target
|
20
|
+
#
|
21
|
+
def initialize(*args)
|
22
|
+
@tree = args.inject({}){|t,i| t[i.to_sym] = {};t}
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# indexing
|
27
|
+
# array : objects
|
28
|
+
# id_attr : index id
|
29
|
+
#
|
30
|
+
def set(array, id_attr = :id)
|
31
|
+
array.each{|a| add(a, id_attr)}
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
# indexing
|
36
|
+
# obj : object
|
37
|
+
# id_attr : index id
|
38
|
+
#
|
39
|
+
def add(obj, id_attr = :id)
|
40
|
+
@tree.keys.each do |k|
|
41
|
+
begin
|
42
|
+
at = obj.__send__(k)
|
43
|
+
obj_id = obj.__send__(id_attr)
|
44
|
+
rescue NoMethodError
|
45
|
+
next
|
46
|
+
end
|
47
|
+
|
48
|
+
@tree[k.to_sym][at] ||= RejuicerSet.new
|
49
|
+
@tree[k.to_sym][at] << obj_id
|
50
|
+
end
|
51
|
+
end
|
52
|
+
alias :<< :add
|
53
|
+
|
54
|
+
#
|
55
|
+
# search
|
56
|
+
# conditions: search target
|
57
|
+
#
|
58
|
+
def search(conditions)
|
59
|
+
raise ArgumentError if conditions.empty?
|
60
|
+
|
61
|
+
f_cond = conditions.shift
|
62
|
+
f_set = @tree[f_cond.first.to_sym][f_cond.last]
|
63
|
+
return f_set.to_a if conditions.empty?
|
64
|
+
|
65
|
+
ret = conditions.sort_by{|cond| @tree[cond.first.to_sym][cond.last].size}.inject(f_set) do |work, cond|
|
66
|
+
work & @tree[cond.first.to_sym][cond.last]
|
67
|
+
end
|
68
|
+
ret.to_a
|
69
|
+
end
|
70
|
+
end
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rejuicer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Tsukasa OISHI
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-04-05 00:00:00 +09:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: newgem
|
17
|
+
type: :development
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.2.3
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: hoe
|
27
|
+
type: :development
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.8.0
|
34
|
+
version:
|
35
|
+
description: Rejuicer.
|
36
|
+
email:
|
37
|
+
- tsukasa.oishi@gmail.com
|
38
|
+
executables: []
|
39
|
+
|
40
|
+
extensions:
|
41
|
+
- ext/extconf.rb
|
42
|
+
extra_rdoc_files:
|
43
|
+
- History.txt
|
44
|
+
- Manifest.txt
|
45
|
+
- README.rdoc
|
46
|
+
files:
|
47
|
+
- History.txt
|
48
|
+
- Manifest.txt
|
49
|
+
- README.rdoc
|
50
|
+
- Rakefile
|
51
|
+
- lib/rejuicer.rb
|
52
|
+
- ext/extconf.rb
|
53
|
+
- ext/rejuicer_set.h
|
54
|
+
- ext/rejuicer_set.c
|
55
|
+
has_rdoc: true
|
56
|
+
homepage: http://www.kaeruspoon.net/
|
57
|
+
licenses: []
|
58
|
+
|
59
|
+
post_install_message:
|
60
|
+
rdoc_options:
|
61
|
+
- --main
|
62
|
+
- README.rdoc
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
- ext
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "0"
|
71
|
+
version:
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: "0"
|
77
|
+
version:
|
78
|
+
requirements: []
|
79
|
+
|
80
|
+
rubyforge_project: rejuicer
|
81
|
+
rubygems_version: 1.3.5
|
82
|
+
signing_key:
|
83
|
+
specification_version: 2
|
84
|
+
summary: Rejuicer.
|
85
|
+
test_files: []
|
86
|
+
|