ruby_odeum 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +504 -0
- data/LICENSE +504 -0
- data/README +50 -0
- data/bin/odeum_mgr +106 -0
- data/doc/rdoc/classes/Odeum.html +235 -0
- data/doc/rdoc/classes/Odeum.src/M000010.html +25 -0
- data/doc/rdoc/classes/Odeum.src/M000011.html +22 -0
- data/doc/rdoc/classes/Odeum.src/M000012.html +27 -0
- data/doc/rdoc/classes/Odeum.src/M000013.html +27 -0
- data/doc/rdoc/classes/Odeum.src/M000014.html +28 -0
- data/doc/rdoc/classes/Odeum/Document.html +382 -0
- data/doc/rdoc/classes/Odeum/Document.src/M000040.html +25 -0
- data/doc/rdoc/classes/Odeum/Document.src/M000041.html +22 -0
- data/doc/rdoc/classes/Odeum/Document.src/M000042.html +23 -0
- data/doc/rdoc/classes/Odeum/Document.src/M000043.html +23 -0
- data/doc/rdoc/classes/Odeum/Document.src/M000044.html +24 -0
- data/doc/rdoc/classes/Odeum/Document.src/M000045.html +32 -0
- data/doc/rdoc/classes/Odeum/Document.src/M000046.html +22 -0
- data/doc/rdoc/classes/Odeum/Document.src/M000047.html +22 -0
- data/doc/rdoc/classes/Odeum/Document.src/M000048.html +22 -0
- data/doc/rdoc/classes/Odeum/Document.src/M000049.html +22 -0
- data/doc/rdoc/classes/Odeum/Document.src/M000050.html +24 -0
- data/doc/rdoc/classes/Odeum/Document.src/M000051.html +27 -0
- data/doc/rdoc/classes/Odeum/Index.html +662 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000015.html +46 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000016.html +33 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000017.html +35 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000018.html +23 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000019.html +22 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000020.html +22 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000021.html +22 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000022.html +22 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000023.html +22 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000024.html +29 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000025.html +23 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000026.html +24 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000027.html +23 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000028.html +26 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000029.html +24 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000030.html +20 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000031.html +22 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000032.html +22 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000033.html +22 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000034.html +22 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000035.html +20 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000036.html +20 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000037.html +22 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000038.html +22 -0
- data/doc/rdoc/classes/Odeum/Index.src/M000039.html +22 -0
- data/doc/rdoc/classes/OdeumTest.html +257 -0
- data/doc/rdoc/classes/OdeumTest.src/M000001.html +18 -0
- data/doc/rdoc/classes/OdeumTest.src/M000002.html +19 -0
- data/doc/rdoc/classes/OdeumTest.src/M000003.html +27 -0
- data/doc/rdoc/classes/OdeumTest.src/M000004.html +25 -0
- data/doc/rdoc/classes/OdeumTest.src/M000005.html +44 -0
- data/doc/rdoc/classes/OdeumTest.src/M000006.html +20 -0
- data/doc/rdoc/classes/OdeumTest.src/M000007.html +39 -0
- data/doc/rdoc/classes/OdeumTest.src/M000008.html +59 -0
- data/doc/rdoc/classes/OdeumTest.src/M000009.html +41 -0
- data/doc/rdoc/created.rid +1 -0
- data/doc/rdoc/files/COPYING.html +756 -0
- data/doc/rdoc/files/LICENSE.html +756 -0
- data/doc/rdoc/files/README.html +175 -0
- data/doc/rdoc/files/ext/odeum_index/odeum_index_c.html +101 -0
- data/doc/rdoc/files/test/test_odeum_rb.html +109 -0
- data/doc/rdoc/fr_class_index.html +30 -0
- data/doc/rdoc/fr_file_index.html +31 -0
- data/doc/rdoc/fr_method_index.html +77 -0
- data/doc/rdoc/index.html +24 -0
- data/doc/rdoc/rdoc-style.css +208 -0
- data/ext/odeum_index/cabin.c +2735 -0
- data/ext/odeum_index/cabin.h +1040 -0
- data/ext/odeum_index/curia.c +1114 -0
- data/ext/odeum_index/curia.h +430 -0
- data/ext/odeum_index/depot.c +1910 -0
- data/ext/odeum_index/depot.h +439 -0
- data/ext/odeum_index/extconf.rb +10 -0
- data/ext/odeum_index/myconf.c +668 -0
- data/ext/odeum_index/myconf.h +523 -0
- data/ext/odeum_index/odeum.c +1743 -0
- data/ext/odeum_index/odeum.h +541 -0
- data/ext/odeum_index/odeum_index.c +991 -0
- data/ext/odeum_index/villa.c +1923 -0
- data/ext/odeum_index/villa.h +470 -0
- data/ext/odeum_index/vista.c +159 -0
- data/ext/odeum_index/vista.h +111 -0
- data/test/test_odeum.rb +174 -0
- metadata +138 -0
@@ -0,0 +1,991 @@
|
|
1
|
+
/*************************************************************************************************
|
2
|
+
* Implementation of Curia for Ruby
|
3
|
+
* Copyright (C) 2000-2005 Mikio Hirabayashi
|
4
|
+
* This file is part of QDBM, Quick Database Manager.
|
5
|
+
* QDBM is free software; you can redistribute it and/or modify it under the terms of the GNU
|
6
|
+
* Lesser General Public License as published by the Free Software Foundation; either version
|
7
|
+
* 2.1 of the License or any later version. QDBM is distributed in the hope that it will be
|
8
|
+
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
9
|
+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
10
|
+
* details.
|
11
|
+
* You should have received a copy of the GNU Lesser General Public License along with QDBM; if
|
12
|
+
* not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
13
|
+
* 02111-1307 USA.
|
14
|
+
*************************************************************************************************/
|
15
|
+
|
16
|
+
|
17
|
+
#include "ruby.h"
|
18
|
+
#include <odeum.h>
|
19
|
+
|
20
|
+
#define FALSE 0
|
21
|
+
|
22
|
+
static VALUE mOdeum;
|
23
|
+
static VALUE cIndex;
|
24
|
+
static VALUE cDocument;
|
25
|
+
static VALUE cPair;
|
26
|
+
|
27
|
+
|
28
|
+
/* A union used to do nothing more than wrap the ODEUM, ODDOC, and ODPAIR structures
|
29
|
+
* so that we can get at them in an alloc method before they need to be initialized.
|
30
|
+
*/
|
31
|
+
|
32
|
+
typedef union OdeumWrapper {
|
33
|
+
ODEUM *odeum;
|
34
|
+
ODDOC *oddoc;
|
35
|
+
} OdeumWrapper;
|
36
|
+
|
37
|
+
/** Convenience macors to get at the different types we store in T_DATA stuff. */
|
38
|
+
#define RAISE_NOT_NULL(T) if(T == NULL) rb_raise(rb_eStandardError, "NULL found for " # T " when shouldn't be.");
|
39
|
+
#define DATA_GET_WRAPPER(from,name) OdeumWrapper *name = NULL; Data_Get_Struct(from, OdeumWrapper, name); RAISE_NOT_NULL(name);
|
40
|
+
#define DATA_GET_ODEUM(from,name) DATA_GET_WRAPPER(from,name##wrapper); RAISE_NOT_NULL(name##wrapper); ODEUM *name = name##wrapper->odeum; RAISE_NOT_NULL(name);
|
41
|
+
#define DATA_GET_ODDOC(from,name) DATA_GET_WRAPPER(from,name##wrapper); RAISE_NOT_NULL(name##wrapper); ODDOC *name = name##wrapper->oddoc; RAISE_NOT_NULL(name);
|
42
|
+
|
43
|
+
#define REQUIRE_TYPE(V, T) if(TYPE(V) != T) rb_raise(rb_eTypeError, "Wrong argument type for " # V " required " # T);
|
44
|
+
|
45
|
+
|
46
|
+
/* Converts from a CBLIST of char * strings to a Ruby Array of Strings. */
|
47
|
+
VALUE CBLIST_2_array(const CBLIST *list)
|
48
|
+
{
|
49
|
+
int count = cblistnum(list);
|
50
|
+
int i = 0;
|
51
|
+
VALUE ary = rb_ary_new();
|
52
|
+
|
53
|
+
for(i = 0; i < count; i++) {
|
54
|
+
int sp = 0;
|
55
|
+
const char *val = cblistval(list, i, &sp);
|
56
|
+
rb_ary_push(ary, rb_str_new(val, sp));
|
57
|
+
}
|
58
|
+
|
59
|
+
return ary;
|
60
|
+
}
|
61
|
+
|
62
|
+
/** Converts an array of strings to a CBLIST. */
|
63
|
+
CBLIST *array_2_CBLIST(VALUE ary)
|
64
|
+
{
|
65
|
+
long i = 0;
|
66
|
+
CBLIST *result = cblistopen();
|
67
|
+
VALUE str;
|
68
|
+
|
69
|
+
for(i = 0; (str = rb_ary_entry(ary, i)) != Qnil; i++) {
|
70
|
+
cblistpush(result, RSTRING(str)->ptr, RSTRING(str)->len);
|
71
|
+
}
|
72
|
+
|
73
|
+
return result;
|
74
|
+
}
|
75
|
+
|
76
|
+
/** Converts a CBMAP of char * strings into a Ruby Hash of Strings. */
|
77
|
+
VALUE CBMAP_2_hash(CBMAP *map)
|
78
|
+
{
|
79
|
+
int key_size = 0;
|
80
|
+
int val_size = 0;
|
81
|
+
const char *map_key = NULL;
|
82
|
+
const char *map_val = NULL;
|
83
|
+
VALUE hash = rb_hash_new();
|
84
|
+
VALUE key;
|
85
|
+
VALUE val;
|
86
|
+
|
87
|
+
cbmapiterinit(map);
|
88
|
+
while((map_key = cbmapiternext(map, &key_size)) != NULL) {
|
89
|
+
map_val = cbmapget(map, map_key, key_size, &val_size);
|
90
|
+
|
91
|
+
key = rb_str_new(map_key, key_size);
|
92
|
+
val = rb_str_new(map_val, val_size);
|
93
|
+
|
94
|
+
rb_hash_aset(hash, key, val);
|
95
|
+
}
|
96
|
+
|
97
|
+
return hash;
|
98
|
+
}
|
99
|
+
|
100
|
+
/** Builds a new document from a created ODDOC. This is needed since the Index_* functions
|
101
|
+
* will return an ODDOC pointer, but the only function to construct a Document normally is
|
102
|
+
* with oddocopen which requires a URI. This solves the problem by using the "naked"
|
103
|
+
* Document_initialize, and then attaches the doc to it.
|
104
|
+
*/
|
105
|
+
VALUE Document_from_ODDOC(ODDOC *doc)
|
106
|
+
{
|
107
|
+
VALUE uri[1];
|
108
|
+
VALUE new_doc;
|
109
|
+
|
110
|
+
uri[0] = Qnil;
|
111
|
+
new_doc = rb_class_new_instance(1, uri, cDocument);
|
112
|
+
|
113
|
+
DATA_GET_WRAPPER(new_doc, wrapper);
|
114
|
+
wrapper->oddoc = doc;
|
115
|
+
|
116
|
+
return new_doc;
|
117
|
+
}
|
118
|
+
|
119
|
+
|
120
|
+
/**
|
121
|
+
* Frees the ODEUM struct contained in the wrapper, closing it if it is not already
|
122
|
+
* NULL. This is the best we can do for automatically closing the ODEUM if it wasn't
|
123
|
+
* done explicitly with Odeum.close.
|
124
|
+
*/
|
125
|
+
void Index_free(void *ptr)
|
126
|
+
{
|
127
|
+
OdeumWrapper *wrapper = (OdeumWrapper *)ptr;
|
128
|
+
if(wrapper->odeum) {
|
129
|
+
// didn't explicitly close, so do it for them
|
130
|
+
odclose(wrapper->odeum);
|
131
|
+
wrapper->odeum = NULL;
|
132
|
+
}
|
133
|
+
// now safe to free it
|
134
|
+
free(wrapper);
|
135
|
+
}
|
136
|
+
|
137
|
+
|
138
|
+
/** Allocates only the OdeumWrapper we use to wrap the pointers we need for internal
|
139
|
+
* operation. This is needed because we can't create the ODEUM struct until we know
|
140
|
+
* the name of the catalog, so we have to "defer" until Odeum.initialize.
|
141
|
+
*/
|
142
|
+
VALUE Index_alloc(VALUE klass)
|
143
|
+
{
|
144
|
+
OdeumWrapper *wrapper = NULL;
|
145
|
+
VALUE obj = Data_Make_Struct(klass, OdeumWrapper, NULL, Index_free, wrapper);
|
146
|
+
return obj;
|
147
|
+
}
|
148
|
+
|
149
|
+
|
150
|
+
/**
|
151
|
+
* call-seq:
|
152
|
+
* Index.new(name, mode) -> Index
|
153
|
+
*
|
154
|
+
* Creates an Index with the given name according to mode. The name will be used
|
155
|
+
* as the basis for a local directory which will contain the database for the documents.
|
156
|
+
*
|
157
|
+
* Possible modes might be:
|
158
|
+
*
|
159
|
+
* - Odeum::OWRITER -- Opens as a writer.
|
160
|
+
* - Odeum::OREADER -- Read-only.
|
161
|
+
* - Odeum::OCREAT -- Or'd in to OWRITER to indicate that you want it created if not existing.
|
162
|
+
* - Odeum::ONOLOCK -- Opens without locking on the directory.
|
163
|
+
*
|
164
|
+
* Opening as OWRITER creates an exclusive lock on the database dir, but OREADER
|
165
|
+
* opens with a shared lock. A thread will block until the lock is achieved, but
|
166
|
+
* none of this has been tested in Ruby with Ruby's in-process threads.
|
167
|
+
*/
|
168
|
+
VALUE Index_initialize(VALUE self, VALUE name, VALUE mode)
|
169
|
+
{
|
170
|
+
DATA_GET_WRAPPER(self, wrapper);
|
171
|
+
REQUIRE_TYPE(name, T_STRING);
|
172
|
+
REQUIRE_TYPE(mode, T_FIXNUM);
|
173
|
+
|
174
|
+
wrapper->odeum = odopen(RSTRING(name)->ptr, FIX2INT(mode));
|
175
|
+
if(wrapper->odeum == NULL) {
|
176
|
+
// there was an error, find out what it was
|
177
|
+
rb_raise(rb_eStandardError, "Failed to open requested database.");
|
178
|
+
}
|
179
|
+
|
180
|
+
return self;
|
181
|
+
}
|
182
|
+
|
183
|
+
/**
|
184
|
+
* call-seq:
|
185
|
+
* Index.close -> true/false
|
186
|
+
*
|
187
|
+
* Closes the Index explicitly. It will be closed by the GC when Index_free
|
188
|
+
* is finally called, but don't rely on this as it is not reliable enough.
|
189
|
+
* Don't use the index after this, it will throw an exception or possibly crash.
|
190
|
+
*/
|
191
|
+
VALUE Index_close(VALUE self)
|
192
|
+
{
|
193
|
+
int result = 0;
|
194
|
+
DATA_GET_WRAPPER(self, wrapper);
|
195
|
+
|
196
|
+
// must set the wrapper->odeum to NULL so that Index_free does not try to close it again
|
197
|
+
result = odclose(wrapper->odeum);
|
198
|
+
wrapper->odeum = NULL;
|
199
|
+
|
200
|
+
return result == FALSE ? Qfalse : Qtrue;
|
201
|
+
}
|
202
|
+
|
203
|
+
|
204
|
+
/**
|
205
|
+
* call-seq:
|
206
|
+
* index.put(doc, wmax, over) -> true/false
|
207
|
+
*
|
208
|
+
* Puts the Document doc into the Index, and indexes a maximum of wmax
|
209
|
+
* words in the document. If over is true than the document is overwritten
|
210
|
+
* in the database. Otherwise, if the document already exists in the
|
211
|
+
* database and over== nil/false then the method will return false as
|
212
|
+
* an error.
|
213
|
+
*/
|
214
|
+
VALUE Index_put(VALUE self, VALUE doc, VALUE wmax, VALUE over)
|
215
|
+
{
|
216
|
+
int res = 0;
|
217
|
+
DATA_GET_ODEUM(self, odeum);
|
218
|
+
DATA_GET_ODDOC(doc, oddoc);
|
219
|
+
|
220
|
+
REQUIRE_TYPE(wmax, T_FIXNUM);
|
221
|
+
|
222
|
+
res = odput(odeum, oddoc, FIX2INT(wmax), !(over == Qnil || over == Qfalse));
|
223
|
+
return res == FALSE ? Qfalse : Qtrue;
|
224
|
+
}
|
225
|
+
|
226
|
+
|
227
|
+
/**
|
228
|
+
* call-seq:
|
229
|
+
* index.delete(uri) -> true/false
|
230
|
+
*
|
231
|
+
* Deletes the document given by the uri. The Index must be opened
|
232
|
+
* as a writer, and the call will return false if no such document exists.
|
233
|
+
*/
|
234
|
+
VALUE Index_delete(VALUE self, VALUE uri) {
|
235
|
+
DATA_GET_ODEUM(self, odeum);
|
236
|
+
REQUIRE_TYPE(uri, T_STRING);
|
237
|
+
|
238
|
+
int res = odout(odeum, RSTRING(uri)->ptr);
|
239
|
+
return res == FALSE ? Qfalse : Qtrue;
|
240
|
+
}
|
241
|
+
|
242
|
+
|
243
|
+
/**
|
244
|
+
* call-seq:
|
245
|
+
* index.delete_by_id(id) -> true/false
|
246
|
+
*
|
247
|
+
* Deletes a document based on its id.
|
248
|
+
*/
|
249
|
+
VALUE Index_delete_by_id(VALUE self, VALUE id) {
|
250
|
+
DATA_GET_ODEUM(self, odeum);
|
251
|
+
REQUIRE_TYPE(id, T_FIXNUM);
|
252
|
+
|
253
|
+
int res = odoutbyid(odeum, FIX2INT(id));
|
254
|
+
return res == FALSE ? Qfalse : Qtrue;
|
255
|
+
}
|
256
|
+
|
257
|
+
|
258
|
+
/**
|
259
|
+
* call-seq:
|
260
|
+
* index.get(uri) -> Document
|
261
|
+
*
|
262
|
+
* Gets a Document based on the uri, or returns nil.
|
263
|
+
*/
|
264
|
+
VALUE Index_get(VALUE self, VALUE uri) {
|
265
|
+
DATA_GET_ODEUM(self, odeum);
|
266
|
+
REQUIRE_TYPE(uri, T_STRING);
|
267
|
+
|
268
|
+
ODDOC *oddoc = odget(odeum, RSTRING(uri)->ptr);
|
269
|
+
if(oddoc == NULL)
|
270
|
+
return Qnil;
|
271
|
+
else
|
272
|
+
return Document_from_ODDOC(oddoc);
|
273
|
+
}
|
274
|
+
|
275
|
+
|
276
|
+
/**
|
277
|
+
* call-seq:
|
278
|
+
* index.get_by_id(id) -> Document
|
279
|
+
*
|
280
|
+
* Gets a Document based on its id, or nil if that document isn't there.
|
281
|
+
*/
|
282
|
+
VALUE Index_get_by_id(VALUE self, VALUE id) {
|
283
|
+
DATA_GET_ODEUM(self, odeum);
|
284
|
+
REQUIRE_TYPE(id, T_FIXNUM);
|
285
|
+
|
286
|
+
ODDOC *oddoc = odgetbyid(odeum, FIX2INT(id));
|
287
|
+
|
288
|
+
if(oddoc == NULL)
|
289
|
+
return Qnil;
|
290
|
+
else
|
291
|
+
return Document_from_ODDOC(oddoc);
|
292
|
+
}
|
293
|
+
|
294
|
+
|
295
|
+
/**
|
296
|
+
* call-seq:
|
297
|
+
* index.get_id_by_uri(id)
|
298
|
+
*
|
299
|
+
* Returns just the id of the document with the given uri.
|
300
|
+
*/
|
301
|
+
VALUE Index_get_id_by_uri(VALUE self, VALUE uri) {
|
302
|
+
DATA_GET_ODEUM(self, odeum);
|
303
|
+
REQUIRE_TYPE(uri, T_STRING);
|
304
|
+
|
305
|
+
int res = odgetidbyuri(odeum, RSTRING(uri)->ptr);
|
306
|
+
return INT2FIX(res);
|
307
|
+
}
|
308
|
+
|
309
|
+
|
310
|
+
/**
|
311
|
+
* call-seq:
|
312
|
+
* index.check(id)
|
313
|
+
*
|
314
|
+
* Checks if a document with the given id is in the database.
|
315
|
+
*/
|
316
|
+
VALUE Index_check(VALUE self, VALUE id) {
|
317
|
+
DATA_GET_ODEUM(self, odeum);
|
318
|
+
REQUIRE_TYPE(id, T_FIXNUM);
|
319
|
+
|
320
|
+
int res = odcheck(odeum, FIX2INT(id));
|
321
|
+
return res == FALSE ? Qfalse : Qtrue;
|
322
|
+
}
|
323
|
+
|
324
|
+
|
325
|
+
/**
|
326
|
+
* call-seq:
|
327
|
+
* index.search(word, max) -> [[id,score], ... ]
|
328
|
+
*
|
329
|
+
* The big payoff method which actually searches for the documents
|
330
|
+
* that have the given word mentioned. The result of the search is
|
331
|
+
* an array of "tuples" with [id, score] and sorted in score order
|
332
|
+
* from descending order of their scores. If there are no results
|
333
|
+
* then it returns an empty array.
|
334
|
+
*
|
335
|
+
* If the search attempt fails for some reason then an exception is thrown,
|
336
|
+
* but an empty result is NOT a failure (that returns an empty hash).
|
337
|
+
*/
|
338
|
+
VALUE Index_search(VALUE self, VALUE word, VALUE max) {
|
339
|
+
DATA_GET_ODEUM(self, odeum);
|
340
|
+
REQUIRE_TYPE(word, T_STRING);
|
341
|
+
REQUIRE_TYPE(max, T_FIXNUM);
|
342
|
+
|
343
|
+
int num_returned = 0;
|
344
|
+
int i = 0;
|
345
|
+
VALUE results;
|
346
|
+
|
347
|
+
ODPAIR *pairs = odsearch(odeum, RSTRING(word)->ptr, FIX2INT(max), &num_returned);
|
348
|
+
if(pairs == NULL) {
|
349
|
+
// nothing found
|
350
|
+
rb_raise(rb_eStandardError, "Search failure.");
|
351
|
+
}
|
352
|
+
|
353
|
+
results = rb_ary_new();
|
354
|
+
|
355
|
+
// convert the array of ODPAIR structs into an array of result tuples
|
356
|
+
for(i = 0; i < num_returned; i++) {
|
357
|
+
VALUE ary = rb_ary_new();
|
358
|
+
rb_ary_push(ary, INT2FIX(pairs[i].id));
|
359
|
+
rb_ary_push(ary, INT2FIX(pairs[i].score));
|
360
|
+
rb_ary_push(results, ary);
|
361
|
+
}
|
362
|
+
|
363
|
+
free(pairs);
|
364
|
+
return results;
|
365
|
+
}
|
366
|
+
|
367
|
+
/**
|
368
|
+
* call-seq:
|
369
|
+
* index.search_doc_count(word) -> Fixnum
|
370
|
+
*
|
371
|
+
* Returns the number of document matching the given word. If the word
|
372
|
+
* does not match anything then it returns -1.
|
373
|
+
*/
|
374
|
+
VALUE Index_search_doc_count(VALUE self, VALUE word) {
|
375
|
+
DATA_GET_ODEUM(self, odeum);
|
376
|
+
REQUIRE_TYPE(word, T_STRING);
|
377
|
+
|
378
|
+
int res = odsearchdnum(odeum, RSTRING(word)->ptr);
|
379
|
+
return INT2FIX(res);
|
380
|
+
}
|
381
|
+
|
382
|
+
|
383
|
+
/**
|
384
|
+
* call-seq:
|
385
|
+
* index.iterator -> true/false
|
386
|
+
*
|
387
|
+
* Begins an iterator loop to process documents in the system.
|
388
|
+
* An iterator/next pattern is used due to the difficulty of getting
|
389
|
+
* memory collection correct inside an each/block design.
|
390
|
+
*/
|
391
|
+
VALUE Index_iterator(VALUE self) {
|
392
|
+
DATA_GET_ODEUM(self, odeum);
|
393
|
+
int res = oditerinit(odeum);
|
394
|
+
|
395
|
+
return res == FALSE ? Qfalse : Qtrue;
|
396
|
+
}
|
397
|
+
|
398
|
+
/**
|
399
|
+
* call-seq:
|
400
|
+
* index.next -> Document
|
401
|
+
*
|
402
|
+
* Returns the next document or nil if there was an error. Must call
|
403
|
+
* Index.iterator first.
|
404
|
+
*/
|
405
|
+
VALUE Index_next(VALUE self) {
|
406
|
+
DATA_GET_ODEUM(self, odeum);
|
407
|
+
|
408
|
+
ODDOC *doc = oditernext(odeum);
|
409
|
+
if(doc == NULL)
|
410
|
+
return Qnil;
|
411
|
+
|
412
|
+
VALUE doc_obj = Document_from_ODDOC(doc);
|
413
|
+
|
414
|
+
return doc_obj;
|
415
|
+
}
|
416
|
+
|
417
|
+
/**
|
418
|
+
* call-seq:
|
419
|
+
* index.sync -> true/false
|
420
|
+
*
|
421
|
+
* Synchronizes any changes you have made with the database. If you
|
422
|
+
* don't do this every once in a while then the memory load will get
|
423
|
+
* to great. I found that every 1000 documents or so is a good trade-off.
|
424
|
+
*
|
425
|
+
* Returns true if everything worked, or false otherwise.
|
426
|
+
*/
|
427
|
+
VALUE Index_sync(VALUE self) {
|
428
|
+
DATA_GET_ODEUM(self, odeum);
|
429
|
+
int res = odsync(odeum);
|
430
|
+
return res == FALSE ? Qfalse : Qtrue;
|
431
|
+
}
|
432
|
+
|
433
|
+
|
434
|
+
/**
|
435
|
+
* call-seq:
|
436
|
+
* index.optimize -> true/false
|
437
|
+
*
|
438
|
+
* Purges deleted documents from the index. I found that if you
|
439
|
+
* call this while you are updating documents then it stops adding
|
440
|
+
* documents after the optimize call.
|
441
|
+
*/
|
442
|
+
VALUE Index_optimize(VALUE self) {
|
443
|
+
DATA_GET_ODEUM(self, odeum);
|
444
|
+
int res = odoptimize(odeum);
|
445
|
+
return res == FALSE ? Qfalse : Qtrue;
|
446
|
+
}
|
447
|
+
|
448
|
+
|
449
|
+
/**
|
450
|
+
* call-seq:
|
451
|
+
* index.name -> String
|
452
|
+
*/
|
453
|
+
VALUE Index_name(VALUE self) {
|
454
|
+
DATA_GET_ODEUM(self, odeum);
|
455
|
+
char *name = odname(odeum);
|
456
|
+
VALUE result = rb_str_new2(name);
|
457
|
+
free(name);
|
458
|
+
return result;
|
459
|
+
}
|
460
|
+
|
461
|
+
/**
|
462
|
+
* call-seq:
|
463
|
+
* index.size -> Fixnum
|
464
|
+
*
|
465
|
+
* Returns the size of the database files or -1 if there's a failure.
|
466
|
+
*/
|
467
|
+
VALUE Index_size(VALUE self) {
|
468
|
+
DATA_GET_ODEUM(self, odeum);
|
469
|
+
double res = odfsiz(odeum);
|
470
|
+
return rb_float_new(res);
|
471
|
+
}
|
472
|
+
|
473
|
+
|
474
|
+
/**
|
475
|
+
* call-seq:
|
476
|
+
* index.bucket_count -> Fixnum
|
477
|
+
*
|
478
|
+
* Returns the total number of elements of the bucket arrays, or -1 on failure.
|
479
|
+
*/
|
480
|
+
VALUE Index_bucket_count(VALUE self) {
|
481
|
+
DATA_GET_ODEUM(self, odeum);
|
482
|
+
int res = odbnum(odeum);
|
483
|
+
return INT2FIX(res);
|
484
|
+
}
|
485
|
+
|
486
|
+
/**
|
487
|
+
* call-seq:
|
488
|
+
* index.buckets_used -> Fixnum
|
489
|
+
*
|
490
|
+
* The total number of used elements of the bucket arrays, or -1 if failure.
|
491
|
+
*/
|
492
|
+
VALUE Index_buckets_used(VALUE self) {
|
493
|
+
DATA_GET_ODEUM(self, odeum);
|
494
|
+
int res = odbusenum(odeum);
|
495
|
+
return INT2FIX(res);
|
496
|
+
}
|
497
|
+
|
498
|
+
|
499
|
+
/**
|
500
|
+
* call-seq:
|
501
|
+
* index.doc_count -> Fixnum
|
502
|
+
*
|
503
|
+
* Number of documents stored in the database, or -1 on failure.
|
504
|
+
*/
|
505
|
+
VALUE Index_doc_count(VALUE self) {
|
506
|
+
DATA_GET_ODEUM(self, odeum);
|
507
|
+
int res = oddnum(odeum);
|
508
|
+
return INT2FIX(res);
|
509
|
+
}
|
510
|
+
|
511
|
+
|
512
|
+
/**
|
513
|
+
* call-seq:
|
514
|
+
* index.word_count -> Fixnum
|
515
|
+
*/
|
516
|
+
VALUE Index_wnum(VALUE self) {
|
517
|
+
DATA_GET_ODEUM(self, odeum);
|
518
|
+
int res = odwnum(odeum);
|
519
|
+
return INT2FIX(res);
|
520
|
+
}
|
521
|
+
|
522
|
+
|
523
|
+
/**
|
524
|
+
* call-seq:
|
525
|
+
* index.writable -> true/false
|
526
|
+
*/
|
527
|
+
VALUE Index_writable(VALUE self) {
|
528
|
+
DATA_GET_ODEUM(self, odeum);
|
529
|
+
int res = odwritable(odeum);
|
530
|
+
return res == FALSE ? Qfalse : Qtrue;
|
531
|
+
}
|
532
|
+
|
533
|
+
|
534
|
+
/**
|
535
|
+
* call-seq:
|
536
|
+
* index.fatal_error -> Fixnum
|
537
|
+
*
|
538
|
+
* Returns true if there's a fatal error or false otherwise.
|
539
|
+
*/
|
540
|
+
VALUE Index_fatal_error(VALUE self) {
|
541
|
+
DATA_GET_ODEUM(self, odeum);
|
542
|
+
int err = odfatalerror(odeum);
|
543
|
+
return err == FALSE ? Qfalse : Qtrue;
|
544
|
+
}
|
545
|
+
|
546
|
+
|
547
|
+
/**
|
548
|
+
* call-seq:
|
549
|
+
* index.inode -> Fixnum
|
550
|
+
*
|
551
|
+
* The inode number of the database directory.
|
552
|
+
*/
|
553
|
+
VALUE Index_inode(VALUE self) {
|
554
|
+
DATA_GET_ODEUM(self, odeum);
|
555
|
+
int inode = odinode(odeum);
|
556
|
+
return INT2FIX(inode);
|
557
|
+
}
|
558
|
+
|
559
|
+
|
560
|
+
/**
|
561
|
+
* call-seq:
|
562
|
+
* index.mtime -> Fixnum
|
563
|
+
*
|
564
|
+
* The mtime of the database directory.
|
565
|
+
*/
|
566
|
+
VALUE Index_mtime(VALUE self) {
|
567
|
+
DATA_GET_ODEUM(self, odeum);
|
568
|
+
int mtime = odmtime(odeum);
|
569
|
+
return INT2FIX(mtime);
|
570
|
+
}
|
571
|
+
|
572
|
+
|
573
|
+
/**
|
574
|
+
* call-seq:
|
575
|
+
* Odeum::merge(new_name, other_databases) -> true/false
|
576
|
+
*
|
577
|
+
* Merges the databases listed in other_databases (Array of Strings)
|
578
|
+
* into the new database new_name.
|
579
|
+
* If two or more documents have the same URI then the first one is
|
580
|
+
* adopted and the others are ignored.
|
581
|
+
*/
|
582
|
+
VALUE Odeum_merge(VALUE self, VALUE name, VALUE elemnames) {
|
583
|
+
REQUIRE_TYPE(name, T_STRING);
|
584
|
+
REQUIRE_TYPE(elemnames, T_ARRAY);
|
585
|
+
|
586
|
+
CBLIST *elems = array_2_CBLIST(elemnames);
|
587
|
+
int res = odmerge(RSTRING(name)->ptr, elems);
|
588
|
+
cblistclose(elems);
|
589
|
+
return res == FALSE ? Qfalse : Qtrue;
|
590
|
+
}
|
591
|
+
|
592
|
+
|
593
|
+
/**
|
594
|
+
* call-seq:
|
595
|
+
* Odeum::remove(name) -> true/false
|
596
|
+
*
|
597
|
+
* Removes the database directory and everything in it.
|
598
|
+
*/
|
599
|
+
VALUE Odeum_remove(VALUE self, VALUE name) {
|
600
|
+
REQUIRE_TYPE(name, T_STRING);
|
601
|
+
|
602
|
+
int res = odremove(RSTRING(name)->ptr);
|
603
|
+
return res == FALSE ? Qfalse : Qtrue;
|
604
|
+
}
|
605
|
+
|
606
|
+
|
607
|
+
/**
|
608
|
+
* call-seq:
|
609
|
+
* Odeum::breaktext(test) -> [word1, word2, word3]
|
610
|
+
*
|
611
|
+
* Breaks a string into an array of words that are separated by
|
612
|
+
* space characters and such delimiters as period, commaa, etc.
|
613
|
+
* You should also check out StringScanner as a more flexible
|
614
|
+
* alternative. This function must do a lot of data copying and
|
615
|
+
* other things in order to convert from Odeum internal types to Ruby
|
616
|
+
* types.
|
617
|
+
*/
|
618
|
+
VALUE Odeum_breaktext(VALUE self, VALUE text) {
|
619
|
+
REQUIRE_TYPE(text, T_STRING);
|
620
|
+
|
621
|
+
CBLIST *result = odbreaktext(RSTRING(text)->ptr);
|
622
|
+
VALUE list = CBLIST_2_array(result);
|
623
|
+
cblistclose(result);
|
624
|
+
return list;
|
625
|
+
}
|
626
|
+
|
627
|
+
|
628
|
+
/**
|
629
|
+
* call-seq:
|
630
|
+
* Odeum::normalizeword(asis) -> normal
|
631
|
+
*
|
632
|
+
* Given a word from breaktext (which is considered "as-is")
|
633
|
+
* it will "normalize" it in a consistent way which is suitable
|
634
|
+
* for searching. The normalization effectively strips puntuation
|
635
|
+
* and spacing, and then lowercases the word. If there is nothing
|
636
|
+
* but "removed" chars in the asis string then the return is empty.
|
637
|
+
* Check for this so you don't try to search for nothing.
|
638
|
+
*/
|
639
|
+
VALUE Odeum_normalizeword(VALUE self, VALUE asis) {
|
640
|
+
REQUIRE_TYPE(asis, T_STRING);
|
641
|
+
|
642
|
+
char *result = odnormalizeword(RSTRING(asis)->ptr);
|
643
|
+
VALUE res_str = rb_str_new2(result);
|
644
|
+
free(result);
|
645
|
+
return res_str;
|
646
|
+
}
|
647
|
+
|
648
|
+
|
649
|
+
/**
|
650
|
+
* call-seq:
|
651
|
+
* Odeum::settuning(ibnum, idnum, cbnum, csiz) -> nil
|
652
|
+
*
|
653
|
+
* ibnum=32749: Number of buckets for inverted indexes.
|
654
|
+
* idnum=7: Division number of inverted index.
|
655
|
+
* cbnum=262139: Number of buckets for dirty buffers.
|
656
|
+
* csiz=8388608: Maximum bytes to use memory for dirty buffers.
|
657
|
+
*
|
658
|
+
* This is set globally for all Indexes. Not sure what would happen
|
659
|
+
* if you changed this mid-stream, so don't. Make sure everything is closed.
|
660
|
+
*/
|
661
|
+
VALUE Odeum_settuning(VALUE self, VALUE ibnum, VALUE idnum, VALUE cbnum, VALUE csiz) {
|
662
|
+
REQUIRE_TYPE(ibnum, T_FIXNUM);
|
663
|
+
REQUIRE_TYPE(idnum, T_FIXNUM);
|
664
|
+
REQUIRE_TYPE(cbnum, T_FIXNUM);
|
665
|
+
REQUIRE_TYPE(csiz, T_FIXNUM);
|
666
|
+
|
667
|
+
odsettuning(FIX2INT(ibnum), FIX2INT(idnum), FIX2INT(cbnum), FIX2INT(csiz));
|
668
|
+
return Qnil;
|
669
|
+
}
|
670
|
+
|
671
|
+
|
672
|
+
|
673
|
+
/**
|
674
|
+
* Frees the internal wrapper and properly cleans up the ODDOC.
|
675
|
+
*/
|
676
|
+
void Document_free(void *ptr) {
|
677
|
+
OdeumWrapper *wrapper = (OdeumWrapper *)ptr;
|
678
|
+
|
679
|
+
if(wrapper->oddoc) {
|
680
|
+
// didn't explicitly close, so do it for them
|
681
|
+
oddocclose(wrapper->oddoc);
|
682
|
+
wrapper->oddoc = NULL;
|
683
|
+
}
|
684
|
+
// now safe to free it
|
685
|
+
free(wrapper);
|
686
|
+
}
|
687
|
+
|
688
|
+
|
689
|
+
/**
|
690
|
+
* Allocates the wrapper only, leaving the actual allocation for Document_initialize.
|
691
|
+
*/
|
692
|
+
VALUE Document_alloc(VALUE klass) {
|
693
|
+
OdeumWrapper *wrapper = NULL;
|
694
|
+
VALUE obj = Data_Make_Struct(klass, OdeumWrapper, NULL, Document_free, wrapper);
|
695
|
+
return obj;
|
696
|
+
}
|
697
|
+
|
698
|
+
/**
|
699
|
+
* call-seq:
|
700
|
+
* Document.new uri -> Document
|
701
|
+
*
|
702
|
+
* The uri should be specified if you're calling this. Internally the
|
703
|
+
* Ruby/Odeum library kind of "cheats" and passes a Qnil for the uri
|
704
|
+
* so that the ODDOC can be assigned externally. You should not
|
705
|
+
* (and probably cannot) do this from Ruby.
|
706
|
+
*/
|
707
|
+
VALUE Document_initialize(VALUE self, VALUE uri) {
|
708
|
+
DATA_GET_WRAPPER(self, wrapper);
|
709
|
+
|
710
|
+
if(NIL_P(uri)) {
|
711
|
+
wrapper->oddoc = NULL;
|
712
|
+
} else {
|
713
|
+
REQUIRE_TYPE(uri, T_STRING);
|
714
|
+
wrapper->oddoc = oddocopen(RSTRING(uri)->ptr);
|
715
|
+
}
|
716
|
+
|
717
|
+
return self;
|
718
|
+
}
|
719
|
+
|
720
|
+
|
721
|
+
/**
|
722
|
+
* call-seq:
|
723
|
+
* doc.close -> nil
|
724
|
+
*
|
725
|
+
* Explicitly closes a document. Because of what I can only decide is a bug
|
726
|
+
* in how an each iterator works, you must explicitly close a document
|
727
|
+
* if you are not storing it and you are in an each. There are probably
|
728
|
+
* subtle things about Ruby memory management I'm missing, but my tests
|
729
|
+
* show that all Document objects created with Index.get do not get
|
730
|
+
* garbage collected until they exit a block.
|
731
|
+
*/
|
732
|
+
VALUE Document_close(VALUE self) {
|
733
|
+
DATA_GET_WRAPPER(self, wrapper);
|
734
|
+
oddocclose(wrapper->oddoc);
|
735
|
+
wrapper->oddoc = NULL; // must set to null to prevent double free
|
736
|
+
return Qnil;
|
737
|
+
}
|
738
|
+
|
739
|
+
|
740
|
+
/**
|
741
|
+
* call-seq:
|
742
|
+
* doc[attr] = value
|
743
|
+
*
|
744
|
+
* Adds meta-data to the document. They should be Strings only.
|
745
|
+
*/
|
746
|
+
VALUE Document_addattr(VALUE self, VALUE name, VALUE value) {
|
747
|
+
DATA_GET_ODDOC(self, doc);
|
748
|
+
REQUIRE_TYPE(name, T_STRING);
|
749
|
+
REQUIRE_TYPE(value, T_STRING);
|
750
|
+
|
751
|
+
oddocaddattr(doc, RSTRING(name)->ptr, RSTRING(value)->ptr);
|
752
|
+
return self;
|
753
|
+
}
|
754
|
+
|
755
|
+
|
756
|
+
/**
|
757
|
+
* call-seq:
|
758
|
+
* document.add_content(content) -> document
|
759
|
+
*
|
760
|
+
* Takes the contents, breaks the words up, and then puts them in the document
|
761
|
+
* in normalized form. This is the common pattern that people use a Document
|
762
|
+
* with. You may also use Document.addword to add one word a time, and
|
763
|
+
* Document.add_word_list to add a list of words.
|
764
|
+
*
|
765
|
+
* It uses the default Odeum::breaktext method to break up the text,
|
766
|
+
* so don't use this if you need specialized stuffs.
|
767
|
+
*
|
768
|
+
* If the process of normalizing a word creates an empty word, then it
|
769
|
+
* is not added to the document's words. This usually happens for
|
770
|
+
* punctation that isn't usualy searched for anyway.
|
771
|
+
*/
|
772
|
+
VALUE Document_add_content(VALUE self, VALUE content) {
|
773
|
+
DATA_GET_ODDOC(self, doc);
|
774
|
+
REQUIRE_TYPE(content, T_STRING);
|
775
|
+
|
776
|
+
CBLIST *words = odbreaktext(RSTRING(content)->ptr);
|
777
|
+
|
778
|
+
// go through words and add them
|
779
|
+
int count = cblistnum(words);
|
780
|
+
int i = 0;
|
781
|
+
|
782
|
+
for(i = 0; i < count; i++) {
|
783
|
+
int sp = 0;
|
784
|
+
const char *asis = cblistval(words, i, &sp);
|
785
|
+
char *norm = odnormalizeword(asis);
|
786
|
+
// only add words that normalize to some content
|
787
|
+
if(strlen(norm) > 0) {
|
788
|
+
oddocaddword(doc, norm, asis);
|
789
|
+
}
|
790
|
+
free(norm);
|
791
|
+
}
|
792
|
+
|
793
|
+
cblistclose(words);
|
794
|
+
|
795
|
+
return self;
|
796
|
+
}
|
797
|
+
|
798
|
+
/**
|
799
|
+
* call-seq:
|
800
|
+
* document.add_word_list(asis) -> document
|
801
|
+
*
|
802
|
+
* Takes an array of "as-is" words, normalizes them, and puts them in the document.
|
803
|
+
* It assumes that the array is composed of asis words and normalizes them
|
804
|
+
* before putting them in the document.
|
805
|
+
*/
|
806
|
+
VALUE Document_add_word_list(VALUE self, VALUE asis) {
|
807
|
+
DATA_GET_ODDOC(self, doc);
|
808
|
+
REQUIRE_TYPE(asis, T_ARRAY);
|
809
|
+
VALUE str;
|
810
|
+
int i = 0;
|
811
|
+
|
812
|
+
for(i = 0; (str = rb_ary_entry(asis, i)) != Qnil; i++) {
|
813
|
+
char *result = odnormalizeword(RSTRING(str)->ptr);
|
814
|
+
oddocaddword(doc, result, RSTRING(str)->ptr);
|
815
|
+
free(result);
|
816
|
+
}
|
817
|
+
|
818
|
+
return self;
|
819
|
+
}
|
820
|
+
|
821
|
+
|
822
|
+
/**
|
823
|
+
* call-seq:
|
824
|
+
* document.addword(normal, asis)
|
825
|
+
*
|
826
|
+
* The basic call to add a normal and asis version of a word to the
|
827
|
+
* document for indexing.
|
828
|
+
*/
|
829
|
+
VALUE Document_addword(VALUE self, VALUE normal, VALUE asis) {
|
830
|
+
DATA_GET_ODDOC(self, doc);
|
831
|
+
REQUIRE_TYPE(normal, T_STRING);
|
832
|
+
REQUIRE_TYPE(asis, T_STRING);
|
833
|
+
|
834
|
+
oddocaddword(doc, RSTRING(normal)->ptr, RSTRING(asis)->ptr);
|
835
|
+
return self;
|
836
|
+
}
|
837
|
+
|
838
|
+
|
839
|
+
/**
|
840
|
+
* call-seq:
|
841
|
+
* document.id -> Fixnum
|
842
|
+
*
|
843
|
+
* Gives you the Odeum::Index id used to for the document.
|
844
|
+
*/
|
845
|
+
VALUE Document_id(VALUE self) {
|
846
|
+
DATA_GET_ODDOC(self, doc);
|
847
|
+
int id = oddocid(doc);
|
848
|
+
return INT2FIX(id);
|
849
|
+
}
|
850
|
+
|
851
|
+
|
852
|
+
/**
|
853
|
+
* call-seq:
|
854
|
+
* document.uri -> String
|
855
|
+
*
|
856
|
+
* Gets the uri that this document represents.
|
857
|
+
*/
|
858
|
+
VALUE Document_uri(VALUE self) {
|
859
|
+
DATA_GET_ODDOC(self, doc);
|
860
|
+
VALUE uri = rb_str_new2(oddocuri(doc));
|
861
|
+
return uri;
|
862
|
+
}
|
863
|
+
|
864
|
+
|
865
|
+
/**
|
866
|
+
* call-seq:
|
867
|
+
* document[name] -> String
|
868
|
+
*
|
869
|
+
* Gets the meta-data attribute for the given name. The name must
|
870
|
+
* be a String.
|
871
|
+
*/
|
872
|
+
VALUE Document_getattr(VALUE self, VALUE name) {
|
873
|
+
DATA_GET_ODDOC(self, doc);
|
874
|
+
REQUIRE_TYPE(name, T_STRING);
|
875
|
+
|
876
|
+
VALUE attr = rb_str_new2(oddocgetattr(doc, RSTRING(name)->ptr));
|
877
|
+
return attr;
|
878
|
+
}
|
879
|
+
|
880
|
+
|
881
|
+
/**
|
882
|
+
* call-seq:
|
883
|
+
* document.normal_words -> [word1, word2, ... ]
|
884
|
+
*
|
885
|
+
* Returns the list of "normal" words in this document.
|
886
|
+
*/
|
887
|
+
VALUE Document_normal_words(VALUE self) {
|
888
|
+
DATA_GET_ODDOC(self, doc);
|
889
|
+
const CBLIST *list = oddocnwords(doc);
|
890
|
+
return CBLIST_2_array(list);
|
891
|
+
}
|
892
|
+
|
893
|
+
|
894
|
+
/**
|
895
|
+
* call-seq:
|
896
|
+
* document.asis_words -> [word1, word2, ...]
|
897
|
+
*
|
898
|
+
* Returns all of the asis or "appearance form" words in the document.
|
899
|
+
*/
|
900
|
+
VALUE Document_asis_words(VALUE self) {
|
901
|
+
DATA_GET_ODDOC(self, doc);
|
902
|
+
const CBLIST *list = oddocawords(doc);
|
903
|
+
return CBLIST_2_array(list);
|
904
|
+
}
|
905
|
+
|
906
|
+
|
907
|
+
/**
|
908
|
+
* call-seq:
|
909
|
+
* document.scores(max, index) -> { word => score, word => score, ...}
|
910
|
+
*
|
911
|
+
* Get the normalized words and their scores in the document. The
|
912
|
+
* strange thing is that the scores are returned as Strings, but they
|
913
|
+
* are decimal strings.
|
914
|
+
*/
|
915
|
+
VALUE Document_scores(VALUE self, VALUE max, VALUE odeum_obj) {
|
916
|
+
DATA_GET_ODDOC(self, doc);
|
917
|
+
DATA_GET_ODEUM(odeum_obj, odeum);
|
918
|
+
REQUIRE_TYPE(max, T_FIXNUM);
|
919
|
+
|
920
|
+
CBMAP *scores = oddocscores(doc, FIX2INT(max), odeum);
|
921
|
+
VALUE map = CBMAP_2_hash(scores);
|
922
|
+
cbmapclose(scores);
|
923
|
+
return map;
|
924
|
+
}
|
925
|
+
|
926
|
+
|
927
|
+
|
928
|
+
Init_odeum_index(){
|
929
|
+
// Module Odeum
|
930
|
+
mOdeum = rb_define_module("Odeum");
|
931
|
+
|
932
|
+
// module Odeum functions
|
933
|
+
rb_define_singleton_method(mOdeum, "merge", Odeum_merge, 2);
|
934
|
+
rb_define_singleton_method(mOdeum, "remove", Odeum_remove, 1);
|
935
|
+
rb_define_singleton_method(mOdeum, "breaktext", Odeum_breaktext, 1);
|
936
|
+
rb_define_singleton_method(mOdeum, "normalizeword", Odeum_normalizeword, 1);
|
937
|
+
rb_define_singleton_method(mOdeum, "settuning", Odeum_settuning, 4);
|
938
|
+
|
939
|
+
rb_define_const(mOdeum, "OREADER", INT2FIX(OD_OREADER));
|
940
|
+
rb_define_const(mOdeum, "OWRITER", INT2FIX(OD_OWRITER));
|
941
|
+
rb_define_const(mOdeum, "OCREAT", INT2FIX(OD_OCREAT));
|
942
|
+
rb_define_const(mOdeum, "OTRUNC", INT2FIX(OD_OTRUNC));
|
943
|
+
rb_define_const(mOdeum, "ONOLCK", INT2FIX(OD_ONOLCK));
|
944
|
+
|
945
|
+
cIndex = rb_define_class_under(mOdeum, "Index", rb_cObject);
|
946
|
+
|
947
|
+
rb_define_alloc_func(cIndex, Index_alloc);
|
948
|
+
rb_define_method(cIndex, "initialize",Index_initialize, 2);
|
949
|
+
rb_define_method(cIndex, "close",Index_close, 0);
|
950
|
+
rb_define_method(cIndex, "put",Index_put, 3);
|
951
|
+
rb_define_method(cIndex, "delete",Index_delete, 1);
|
952
|
+
rb_define_method(cIndex, "delete_by_id",Index_delete_by_id, 1);
|
953
|
+
rb_define_method(cIndex, "get",Index_get, 1);
|
954
|
+
rb_define_method(cIndex, "get_by_id",Index_get_by_id, 1);
|
955
|
+
rb_define_method(cIndex, "get_id_by_uri",Index_get_id_by_uri, 1);
|
956
|
+
rb_define_method(cIndex, "check",Index_check, 1);
|
957
|
+
rb_define_method(cIndex, "search",Index_search, 2);
|
958
|
+
rb_define_method(cIndex, "search_doc_count",Index_search_doc_count, 1);
|
959
|
+
rb_define_method(cIndex, "iterator",Index_iterator, 0);
|
960
|
+
rb_define_method(cIndex, "next",Index_next, 0);
|
961
|
+
rb_define_method(cIndex, "sync",Index_sync, 0);
|
962
|
+
rb_define_method(cIndex, "optimize",Index_optimize, 0);
|
963
|
+
rb_define_method(cIndex, "name",Index_name, 0);
|
964
|
+
rb_define_method(cIndex, "size",Index_size, 0);
|
965
|
+
rb_define_method(cIndex, "bucket_count",Index_bucket_count, 0);
|
966
|
+
rb_define_method(cIndex, "buckets_used",Index_buckets_used, 0);
|
967
|
+
rb_define_method(cIndex, "doc_count",Index_doc_count, 0);
|
968
|
+
rb_define_method(cIndex, "word_count",Index_wnum, 0);
|
969
|
+
rb_define_method(cIndex, "writable",Index_writable, 0);
|
970
|
+
rb_define_method(cIndex, "fatal_error",Index_fatal_error, 0);
|
971
|
+
rb_define_method(cIndex, "inode",Index_inode, 0);
|
972
|
+
rb_define_method(cIndex, "mtime",Index_mtime, 0);
|
973
|
+
|
974
|
+
// define class Document
|
975
|
+
cDocument = rb_define_class_under(mOdeum, "Document", rb_cObject);
|
976
|
+
rb_define_alloc_func(cDocument, Document_alloc);
|
977
|
+
|
978
|
+
rb_define_method(cDocument, "initialize",Document_initialize, 1);
|
979
|
+
rb_define_method(cDocument, "[]=",Document_addattr, 2);
|
980
|
+
rb_define_method(cDocument, "[]",Document_getattr, 1);
|
981
|
+
|
982
|
+
rb_define_method(cDocument, "addword",Document_addword, 2);
|
983
|
+
rb_define_method(cDocument, "add_word_list",Document_add_word_list, 1);
|
984
|
+
rb_define_method(cDocument, "add_content",Document_add_content, 1);
|
985
|
+
rb_define_method(cDocument, "id",Document_id, 0);
|
986
|
+
rb_define_method(cDocument, "uri",Document_uri, 0);
|
987
|
+
rb_define_method(cDocument, "normal_words",Document_normal_words, 0);
|
988
|
+
rb_define_method(cDocument, "asis_words",Document_asis_words, 0);
|
989
|
+
rb_define_method(cDocument, "scores",Document_scores, 2);
|
990
|
+
rb_define_method(cDocument, "close", Document_close, 0);
|
991
|
+
}
|