ferret 0.11.6 → 0.11.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +10 -22
- data/RELEASE_CHANGES +137 -0
- data/RELEASE_NOTES +60 -0
- data/Rakefile +379 -274
- data/TODO +100 -8
- data/bin/ferret-browser +0 -0
- data/ext/BZLIB_blocksort.c +1094 -0
- data/ext/BZLIB_bzlib.c +1578 -0
- data/ext/BZLIB_compress.c +672 -0
- data/ext/BZLIB_crctable.c +104 -0
- data/ext/BZLIB_decompress.c +626 -0
- data/ext/BZLIB_huffman.c +205 -0
- data/ext/BZLIB_randtable.c +84 -0
- data/ext/{api.c → STEMMER_api.c} +7 -10
- data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
- data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
- data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
- data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
- data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
- data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
- data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
- data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
- data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
- data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
- data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
- data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
- data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
- data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
- data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
- data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
- data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
- data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
- data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
- data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
- data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
- data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
- data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
- data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
- data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
- data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
- data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
- data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
- data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
- data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
- data/ext/analysis.c +276 -121
- data/ext/analysis.h +190 -143
- data/ext/api.h +3 -4
- data/ext/array.c +5 -3
- data/ext/array.h +52 -43
- data/ext/bitvector.c +38 -482
- data/ext/bitvector.h +446 -124
- data/ext/bzlib.h +282 -0
- data/ext/bzlib_private.h +503 -0
- data/ext/compound_io.c +23 -22
- data/ext/config.h +21 -11
- data/ext/document.c +43 -40
- data/ext/document.h +31 -21
- data/ext/except.c +20 -38
- data/ext/except.h +89 -76
- data/ext/extconf.rb +3 -2
- data/ext/ferret.c +49 -35
- data/ext/ferret.h +14 -11
- data/ext/field_index.c +262 -0
- data/ext/field_index.h +52 -0
- data/ext/filter.c +11 -10
- data/ext/fs_store.c +65 -47
- data/ext/global.c +245 -165
- data/ext/global.h +252 -54
- data/ext/hash.c +200 -243
- data/ext/hash.h +205 -163
- data/ext/hashset.c +118 -96
- data/ext/hashset.h +110 -82
- data/ext/header.h +19 -19
- data/ext/helper.c +11 -10
- data/ext/helper.h +14 -6
- data/ext/index.c +745 -366
- data/ext/index.h +503 -529
- data/ext/internal.h +1020 -0
- data/ext/lang.c +10 -0
- data/ext/lang.h +35 -15
- data/ext/mempool.c +5 -4
- data/ext/mempool.h +30 -22
- data/ext/modules.h +35 -7
- data/ext/multimapper.c +43 -2
- data/ext/multimapper.h +32 -23
- data/ext/posh.c +0 -0
- data/ext/posh.h +4 -38
- data/ext/priorityqueue.c +10 -12
- data/ext/priorityqueue.h +33 -21
- data/ext/q_boolean.c +22 -9
- data/ext/q_const_score.c +3 -2
- data/ext/q_filtered_query.c +15 -12
- data/ext/q_fuzzy.c +147 -135
- data/ext/q_match_all.c +3 -2
- data/ext/q_multi_term.c +28 -32
- data/ext/q_parser.c +451 -173
- data/ext/q_phrase.c +158 -79
- data/ext/q_prefix.c +16 -18
- data/ext/q_range.c +363 -31
- data/ext/q_span.c +130 -141
- data/ext/q_term.c +21 -21
- data/ext/q_wildcard.c +19 -23
- data/ext/r_analysis.c +369 -242
- data/ext/r_index.c +421 -434
- data/ext/r_qparser.c +142 -92
- data/ext/r_search.c +790 -407
- data/ext/r_store.c +44 -44
- data/ext/r_utils.c +264 -96
- data/ext/ram_store.c +29 -23
- data/ext/scanner.c +895 -0
- data/ext/scanner.h +36 -0
- data/ext/scanner_mb.c +6701 -0
- data/ext/scanner_utf8.c +4415 -0
- data/ext/search.c +210 -87
- data/ext/search.h +556 -488
- data/ext/similarity.c +17 -16
- data/ext/similarity.h +51 -44
- data/ext/sort.c +157 -354
- data/ext/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/stem_UTF_8_hungarian.h +16 -0
- data/ext/stem_UTF_8_romanian.h +16 -0
- data/ext/stem_UTF_8_turkish.h +16 -0
- data/ext/stopwords.c +287 -278
- data/ext/store.c +57 -51
- data/ext/store.h +308 -286
- data/ext/symbol.c +10 -0
- data/ext/symbol.h +23 -0
- data/ext/term_vectors.c +14 -293
- data/ext/threading.h +22 -22
- data/ext/win32.h +12 -4
- data/lib/ferret.rb +2 -1
- data/lib/ferret/browser.rb +1 -1
- data/lib/ferret/field_symbol.rb +94 -0
- data/lib/ferret/index.rb +221 -34
- data/lib/ferret/number_tools.rb +6 -6
- data/lib/ferret/version.rb +3 -0
- data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
- data/test/test_helper.rb +7 -2
- data/test/test_installed.rb +1 -0
- data/test/threading/thread_safety_index_test.rb +10 -1
- data/test/threading/thread_safety_read_write_test.rb +4 -7
- data/test/threading/thread_safety_test.rb +0 -0
- data/test/unit/analysis/tc_analyzer.rb +29 -27
- data/test/unit/analysis/tc_token_stream.rb +23 -16
- data/test/unit/index/tc_index.rb +116 -11
- data/test/unit/index/tc_index_reader.rb +27 -27
- data/test/unit/index/tc_index_writer.rb +10 -0
- data/test/unit/index/th_doc.rb +38 -21
- data/test/unit/search/tc_filter.rb +31 -10
- data/test/unit/search/tc_index_searcher.rb +6 -0
- data/test/unit/search/tm_searcher.rb +53 -1
- data/test/unit/store/tc_fs_store.rb +40 -2
- data/test/unit/store/tc_ram_store.rb +0 -0
- data/test/unit/store/tm_store.rb +0 -0
- data/test/unit/store/tm_store_lock.rb +7 -6
- data/test/unit/tc_field_symbol.rb +26 -0
- data/test/unit/ts_analysis.rb +0 -0
- data/test/unit/ts_index.rb +0 -0
- data/test/unit/ts_store.rb +0 -0
- data/test/unit/ts_utils.rb +0 -0
- data/test/unit/utils/tc_number_tools.rb +0 -0
- data/test/utils/content_generator.rb +226 -0
- metadata +262 -221
- data/ext/inc/lang.h +0 -48
- data/ext/inc/threading.h +0 -31
- data/ext/stem_ISO_8859_1_english.c +0 -1156
- data/ext/stem_ISO_8859_1_french.c +0 -1276
- data/ext/stem_ISO_8859_1_italian.c +0 -1091
- data/ext/stem_ISO_8859_1_norwegian.c +0 -296
- data/ext/stem_ISO_8859_1_spanish.c +0 -1119
- data/ext/stem_ISO_8859_1_swedish.c +0 -307
- data/ext/stem_UTF_8_danish.c +0 -344
- data/ext/stem_UTF_8_english.c +0 -1176
- data/ext/stem_UTF_8_french.c +0 -1296
- data/ext/stem_UTF_8_italian.c +0 -1113
- data/ext/stem_UTF_8_norwegian.c +0 -302
- data/ext/stem_UTF_8_portuguese.c +0 -1055
- data/ext/stem_UTF_8_russian.c +0 -709
- data/ext/stem_UTF_8_spanish.c +0 -1137
- data/ext/stem_UTF_8_swedish.c +0 -313
- data/lib/ferret_version.rb +0 -3
data/ext/r_store.c
CHANGED
@@ -15,11 +15,11 @@ VALUE cFSDirectory;
|
|
15
15
|
****************************************************************************/
|
16
16
|
|
17
17
|
void
|
18
|
-
|
18
|
+
frb_unwrap_locks(Store *store)
|
19
19
|
{
|
20
|
-
|
21
|
-
for (
|
22
|
-
void *lock =
|
20
|
+
HashSetEntry *hse = store->locks->first;
|
21
|
+
for (; hse; hse = hse->next) {
|
22
|
+
void *lock = hse->elem;
|
23
23
|
VALUE rlock = object_get(lock);
|
24
24
|
if (rlock != Qnil) {
|
25
25
|
object_del(lock);
|
@@ -29,7 +29,7 @@ frt_unwrap_locks(Store *store)
|
|
29
29
|
}
|
30
30
|
|
31
31
|
void
|
32
|
-
|
32
|
+
frb_lock_free(void *p)
|
33
33
|
{
|
34
34
|
Lock *lock = (Lock *)p;
|
35
35
|
object_del(p);
|
@@ -37,10 +37,10 @@ frt_lock_free(void *p)
|
|
37
37
|
}
|
38
38
|
|
39
39
|
void
|
40
|
-
|
40
|
+
frb_lock_mark(void *p)
|
41
41
|
{
|
42
42
|
Lock *lock = (Lock *)p;
|
43
|
-
|
43
|
+
frb_gc_mark(lock->store);
|
44
44
|
}
|
45
45
|
|
46
46
|
#define GET_LOCK(lock, self) Data_Get_Struct(self, Lock, lock)
|
@@ -64,7 +64,7 @@ frt_lock_mark(void *p)
|
|
64
64
|
* Lock::LockError otherwise.
|
65
65
|
*/
|
66
66
|
static VALUE
|
67
|
-
|
67
|
+
frb_lock_obtain(int argc, VALUE *argv, VALUE self)
|
68
68
|
{
|
69
69
|
VALUE rtimeout;
|
70
70
|
int timeout = 1;
|
@@ -96,7 +96,7 @@ frt_lock_obtain(int argc, VALUE *argv, VALUE self)
|
|
96
96
|
* Lock::LockError otherwise.
|
97
97
|
*/
|
98
98
|
static VALUE
|
99
|
-
|
99
|
+
frb_lock_while_locked(int argc, VALUE *argv, VALUE self)
|
100
100
|
{
|
101
101
|
VALUE rtimeout;
|
102
102
|
int timeout = 1;
|
@@ -120,7 +120,7 @@ frt_lock_while_locked(int argc, VALUE *argv, VALUE self)
|
|
120
120
|
* Returns true if the lock has been obtained.
|
121
121
|
*/
|
122
122
|
static VALUE
|
123
|
-
|
123
|
+
frb_lock_is_locked(VALUE self)
|
124
124
|
{
|
125
125
|
Lock *lock;
|
126
126
|
GET_LOCK(lock, self);
|
@@ -135,7 +135,7 @@ frt_lock_is_locked(VALUE self)
|
|
135
135
|
* the lock.
|
136
136
|
*/
|
137
137
|
static VALUE
|
138
|
-
|
138
|
+
frb_lock_release(VALUE self)
|
139
139
|
{
|
140
140
|
Lock *lock;
|
141
141
|
GET_LOCK(lock, self);
|
@@ -150,9 +150,9 @@ frt_lock_release(VALUE self)
|
|
150
150
|
****************************************************************************/
|
151
151
|
|
152
152
|
void
|
153
|
-
|
153
|
+
frb_dir_free(Store *store)
|
154
154
|
{
|
155
|
-
|
155
|
+
frb_unwrap_locks(store);
|
156
156
|
object_del(store);
|
157
157
|
store_deref(store);
|
158
158
|
}
|
@@ -166,7 +166,7 @@ frt_dir_free(Store *store)
|
|
166
166
|
* behaviour may change in future.
|
167
167
|
*/
|
168
168
|
static VALUE
|
169
|
-
|
169
|
+
frb_dir_close(VALUE self)
|
170
170
|
{
|
171
171
|
Store *store = DATA_PTR(self);
|
172
172
|
int ref_cnt = FIX2INT(rb_ivar_get(self, id_ref_cnt)) - 1;
|
@@ -174,7 +174,7 @@ frt_dir_close(VALUE self)
|
|
174
174
|
if (ref_cnt < 0) {
|
175
175
|
Frt_Unwrap_Struct(self);
|
176
176
|
object_del(store);
|
177
|
-
|
177
|
+
frb_unwrap_locks(store);
|
178
178
|
store_deref(store);
|
179
179
|
}
|
180
180
|
return Qnil;
|
@@ -187,7 +187,7 @@ frt_dir_close(VALUE self)
|
|
187
187
|
* Return true if a file with the name +file_name+ exists in the directory.
|
188
188
|
*/
|
189
189
|
static VALUE
|
190
|
-
|
190
|
+
frb_dir_exists(VALUE self, VALUE rfname)
|
191
191
|
{
|
192
192
|
Store *store = DATA_PTR(self);
|
193
193
|
StringValue(rfname);
|
@@ -201,7 +201,7 @@ frt_dir_exists(VALUE self, VALUE rfname)
|
|
201
201
|
* Create an empty file in the directory with the name +file_name+.
|
202
202
|
*/
|
203
203
|
static VALUE
|
204
|
-
|
204
|
+
frb_dir_touch(VALUE self, VALUE rfname)
|
205
205
|
{
|
206
206
|
Store *store = DATA_PTR(self);
|
207
207
|
StringValue(rfname);
|
@@ -216,7 +216,7 @@ frt_dir_touch(VALUE self, VALUE rfname)
|
|
216
216
|
* Remove file +file_name+ from the directory. Returns true if successful.
|
217
217
|
*/
|
218
218
|
static VALUE
|
219
|
-
|
219
|
+
frb_dir_delete(VALUE self, VALUE rfname)
|
220
220
|
{
|
221
221
|
Store *store = DATA_PTR(self);
|
222
222
|
StringValue(rfname);
|
@@ -230,7 +230,7 @@ frt_dir_delete(VALUE self, VALUE rfname)
|
|
230
230
|
* Return a count of the number of files in the directory.
|
231
231
|
*/
|
232
232
|
static VALUE
|
233
|
-
|
233
|
+
frb_dir_file_count(VALUE self)
|
234
234
|
{
|
235
235
|
Store *store = DATA_PTR(self);
|
236
236
|
return INT2FIX(store->count(store));
|
@@ -243,7 +243,7 @@ frt_dir_file_count(VALUE self)
|
|
243
243
|
* Delete all files in the directory. It gives you a clean slate.
|
244
244
|
*/
|
245
245
|
static VALUE
|
246
|
-
|
246
|
+
frb_dir_refresh(VALUE self)
|
247
247
|
{
|
248
248
|
Store *store = DATA_PTR(self);
|
249
249
|
store->clear_all(store);
|
@@ -258,7 +258,7 @@ frt_dir_refresh(VALUE self)
|
|
258
258
|
* doesn't exist or there is some other type of IOError.
|
259
259
|
*/
|
260
260
|
static VALUE
|
261
|
-
|
261
|
+
frb_dir_rename(VALUE self, VALUE rfrom, VALUE rto)
|
262
262
|
{
|
263
263
|
Store *store = DATA_PTR(self);
|
264
264
|
StringValue(rfrom);
|
@@ -277,14 +277,14 @@ frt_dir_rename(VALUE self, VALUE rfrom, VALUE rto)
|
|
277
277
|
* reserved for lock files
|
278
278
|
*/
|
279
279
|
static VALUE
|
280
|
-
|
280
|
+
frb_dir_make_lock(VALUE self, VALUE rlock_name)
|
281
281
|
{
|
282
282
|
VALUE rlock;
|
283
283
|
Lock *lock;
|
284
284
|
Store *store = DATA_PTR(self);
|
285
285
|
StringValue(rlock_name);
|
286
286
|
lock = open_lock(store, rs2s(rlock_name));
|
287
|
-
rlock = Data_Wrap_Struct(cLock, &
|
287
|
+
rlock = Data_Wrap_Struct(cLock, &frb_lock_mark, &frb_lock_free, lock);
|
288
288
|
object_add(lock, rlock);
|
289
289
|
return rlock;
|
290
290
|
}
|
@@ -308,7 +308,7 @@ frt_dir_make_lock(VALUE self, VALUE rlock_name)
|
|
308
308
|
* dir:: Directory to load into memory
|
309
309
|
*/
|
310
310
|
static VALUE
|
311
|
-
|
311
|
+
frb_ramdir_init(int argc, VALUE *argv, VALUE self)
|
312
312
|
{
|
313
313
|
VALUE rdir;
|
314
314
|
Store *store;
|
@@ -321,7 +321,7 @@ frt_ramdir_init(int argc, VALUE *argv, VALUE self)
|
|
321
321
|
}
|
322
322
|
default: store = open_ram_store();
|
323
323
|
}
|
324
|
-
Frt_Wrap_Struct(self, NULL, &
|
324
|
+
Frt_Wrap_Struct(self, NULL, &frb_dir_free, store);
|
325
325
|
object_add(store, self);
|
326
326
|
rb_ivar_set(self, id_ref_cnt, INT2FIX(0));
|
327
327
|
return self;
|
@@ -348,7 +348,7 @@ frt_ramdir_init(int argc, VALUE *argv, VALUE self)
|
|
348
348
|
* deleted
|
349
349
|
*/
|
350
350
|
static VALUE
|
351
|
-
|
351
|
+
frb_fsdir_new(int argc, VALUE *argv, VALUE klass)
|
352
352
|
{
|
353
353
|
VALUE self, rpath, rcreate;
|
354
354
|
Store *store;
|
@@ -358,7 +358,7 @@ frt_fsdir_new(int argc, VALUE *argv, VALUE klass)
|
|
358
358
|
StringValue(rpath);
|
359
359
|
create = RTEST(rcreate);
|
360
360
|
if (create) {
|
361
|
-
|
361
|
+
frb_create_dir(rpath);
|
362
362
|
}
|
363
363
|
if (!rb_funcall(rb_cFile, id_is_directory, 1, rpath)) {
|
364
364
|
rb_raise(rb_eIOError, "No directory <%s> found. Use :create => true"
|
@@ -367,7 +367,7 @@ frt_fsdir_new(int argc, VALUE *argv, VALUE klass)
|
|
367
367
|
store = open_fs_store(rs2s(rpath));
|
368
368
|
if (create) store->clear_all(store);
|
369
369
|
if ((self = object_get(store)) == Qnil) {
|
370
|
-
self = Data_Wrap_Struct(klass, NULL, &
|
370
|
+
self = Data_Wrap_Struct(klass, NULL, &frb_dir_free, store);
|
371
371
|
object_add(store, self);
|
372
372
|
rb_ivar_set(self, id_ref_cnt, INT2FIX(0));
|
373
373
|
}
|
@@ -408,14 +408,14 @@ Init_Directory(void)
|
|
408
408
|
{
|
409
409
|
cDirectory = rb_define_class_under(mStore, "Directory", rb_cObject);
|
410
410
|
rb_define_const(cDirectory, "LOCK_PREFIX", rb_str_new2(LOCK_PREFIX));
|
411
|
-
rb_define_method(cDirectory, "close",
|
412
|
-
rb_define_method(cDirectory, "exists?",
|
413
|
-
rb_define_method(cDirectory, "touch",
|
414
|
-
rb_define_method(cDirectory, "delete",
|
415
|
-
rb_define_method(cDirectory, "file_count",
|
416
|
-
rb_define_method(cDirectory, "refresh",
|
417
|
-
rb_define_method(cDirectory, "rename",
|
418
|
-
rb_define_method(cDirectory, "make_lock",
|
411
|
+
rb_define_method(cDirectory, "close", frb_dir_close, 0);
|
412
|
+
rb_define_method(cDirectory, "exists?", frb_dir_exists, 1);
|
413
|
+
rb_define_method(cDirectory, "touch", frb_dir_touch, 1);
|
414
|
+
rb_define_method(cDirectory, "delete", frb_dir_delete, 1);
|
415
|
+
rb_define_method(cDirectory, "file_count", frb_dir_file_count, 0);
|
416
|
+
rb_define_method(cDirectory, "refresh", frb_dir_refresh, 0);
|
417
|
+
rb_define_method(cDirectory, "rename", frb_dir_rename, 2);
|
418
|
+
rb_define_method(cDirectory, "make_lock", frb_dir_make_lock, 1);
|
419
419
|
}
|
420
420
|
|
421
421
|
/*
|
@@ -445,10 +445,10 @@ void
|
|
445
445
|
Init_Lock(void)
|
446
446
|
{
|
447
447
|
cLock = rb_define_class_under(mStore, "Lock", rb_cObject);
|
448
|
-
rb_define_method(cLock, "obtain",
|
449
|
-
rb_define_method(cLock, "while_locked",
|
450
|
-
rb_define_method(cLock, "release",
|
451
|
-
rb_define_method(cLock, "locked?",
|
448
|
+
rb_define_method(cLock, "obtain", frb_lock_obtain, -1);
|
449
|
+
rb_define_method(cLock, "while_locked", frb_lock_while_locked, -1);
|
450
|
+
rb_define_method(cLock, "release", frb_lock_release, 0);
|
451
|
+
rb_define_method(cLock, "locked?", frb_lock_is_locked, 0);
|
452
452
|
|
453
453
|
cLockError = rb_define_class_under(cLock, "LockError", rb_eStandardError);
|
454
454
|
}
|
@@ -466,8 +466,8 @@ void
|
|
466
466
|
Init_RAMDirectory(void)
|
467
467
|
{
|
468
468
|
cRAMDirectory = rb_define_class_under(mStore, "RAMDirectory", cDirectory);
|
469
|
-
rb_define_alloc_func(cRAMDirectory,
|
470
|
-
rb_define_method(cRAMDirectory, "initialize",
|
469
|
+
rb_define_alloc_func(cRAMDirectory, frb_data_alloc);
|
470
|
+
rb_define_method(cRAMDirectory, "initialize", frb_ramdir_init, -1);
|
471
471
|
}
|
472
472
|
|
473
473
|
/*
|
@@ -483,8 +483,8 @@ void
|
|
483
483
|
Init_FSDirectory(void)
|
484
484
|
{
|
485
485
|
cFSDirectory = rb_define_class_under(mStore, "FSDirectory", cDirectory);
|
486
|
-
rb_define_alloc_func(cFSDirectory,
|
487
|
-
rb_define_singleton_method(cFSDirectory, "new",
|
486
|
+
rb_define_alloc_func(cFSDirectory, frb_data_alloc);
|
487
|
+
rb_define_singleton_method(cFSDirectory, "new", frb_fsdir_new, -1);
|
488
488
|
}
|
489
489
|
|
490
490
|
/* rdoc hack
|
data/ext/r_utils.c
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
#include "ferret.h"
|
2
2
|
#include "bitvector.h"
|
3
|
+
#include "multimapper.h"
|
4
|
+
#ifdef FRT_RUBY_VERSION_1_9
|
5
|
+
# include <ruby/st.h>
|
6
|
+
#else
|
7
|
+
# include <st.h>
|
8
|
+
#endif
|
3
9
|
|
4
10
|
/*****************
|
5
11
|
*** BitVector ***
|
@@ -7,17 +13,17 @@
|
|
7
13
|
static VALUE cBitVector;
|
8
14
|
|
9
15
|
static void
|
10
|
-
|
16
|
+
frb_bv_free(void *p)
|
11
17
|
{
|
12
18
|
object_del(p);
|
13
19
|
bv_destroy((BitVector *)p);
|
14
20
|
}
|
15
21
|
|
16
22
|
static VALUE
|
17
|
-
|
23
|
+
frb_bv_alloc(VALUE klass)
|
18
24
|
{
|
19
25
|
BitVector *bv = bv_new();
|
20
|
-
VALUE rbv = Data_Wrap_Struct(klass, NULL, &
|
26
|
+
VALUE rbv = Data_Wrap_Struct(klass, NULL, &frb_bv_free, bv);
|
21
27
|
object_add(bv, rbv);
|
22
28
|
return rbv;
|
23
29
|
}
|
@@ -25,11 +31,11 @@ frt_bv_alloc(VALUE klass)
|
|
25
31
|
#define GET_BV(bv, self) Data_Get_Struct(self, BitVector, bv)
|
26
32
|
|
27
33
|
VALUE
|
28
|
-
|
34
|
+
frb_get_bv(BitVector *bv)
|
29
35
|
{
|
30
36
|
VALUE rbv;
|
31
37
|
if ((rbv = object_get(bv)) == Qnil) {
|
32
|
-
rbv = Data_Wrap_Struct(cBitVector, NULL, &
|
38
|
+
rbv = Data_Wrap_Struct(cBitVector, NULL, &frb_bv_free, bv);
|
33
39
|
REF(bv);
|
34
40
|
object_add(bv, rbv);
|
35
41
|
}
|
@@ -38,12 +44,12 @@ frt_get_bv(BitVector *bv)
|
|
38
44
|
|
39
45
|
/*
|
40
46
|
* call-seq:
|
41
|
-
* BitVector.new() ->
|
47
|
+
* BitVector.new() -> new_bit_vector
|
42
48
|
*
|
43
49
|
* Returns a new empty bit vector object
|
44
50
|
*/
|
45
51
|
static VALUE
|
46
|
-
|
52
|
+
frb_bv_init(VALUE self)
|
47
53
|
{
|
48
54
|
return self;
|
49
55
|
}
|
@@ -56,7 +62,7 @@ frt_bv_init(VALUE self)
|
|
56
62
|
* +false+).
|
57
63
|
*/
|
58
64
|
VALUE
|
59
|
-
|
65
|
+
frb_bv_set(VALUE self, VALUE rindex, VALUE rstate)
|
60
66
|
{
|
61
67
|
BitVector *bv;
|
62
68
|
int index = FIX2INT(rindex);
|
@@ -81,9 +87,9 @@ frt_bv_set(VALUE self, VALUE rindex, VALUE rstate)
|
|
81
87
|
* Set the bit at _i_ to *on* (+true+)
|
82
88
|
*/
|
83
89
|
VALUE
|
84
|
-
|
90
|
+
frb_bv_set_on(VALUE self, VALUE rindex)
|
85
91
|
{
|
86
|
-
|
92
|
+
frb_bv_set(self, rindex, Qtrue);
|
87
93
|
return self;
|
88
94
|
}
|
89
95
|
|
@@ -94,9 +100,9 @@ frt_bv_set_on(VALUE self, VALUE rindex)
|
|
94
100
|
* Set the bit at _i_ to *off* (+false+)
|
95
101
|
*/
|
96
102
|
VALUE
|
97
|
-
|
103
|
+
frb_bv_set_off(VALUE self, VALUE rindex)
|
98
104
|
{
|
99
|
-
|
105
|
+
frb_bv_set(self, rindex, Qfalse);
|
100
106
|
return self;
|
101
107
|
}
|
102
108
|
|
@@ -108,7 +114,7 @@ frt_bv_set_off(VALUE self, VALUE rindex)
|
|
108
114
|
* Get the bit value at _i_
|
109
115
|
*/
|
110
116
|
VALUE
|
111
|
-
|
117
|
+
frb_bv_get(VALUE self, VALUE rindex)
|
112
118
|
{
|
113
119
|
BitVector *bv;
|
114
120
|
int index = FIX2INT(rindex);
|
@@ -129,7 +135,7 @@ frt_bv_get(VALUE self, VALUE rindex)
|
|
129
135
|
* instead.
|
130
136
|
*/
|
131
137
|
VALUE
|
132
|
-
|
138
|
+
frb_bv_count(VALUE self)
|
133
139
|
{
|
134
140
|
BitVector *bv;
|
135
141
|
GET_BV(bv, self);
|
@@ -144,7 +150,7 @@ frt_bv_count(VALUE self)
|
|
144
150
|
* all bits set to *off*.
|
145
151
|
*/
|
146
152
|
VALUE
|
147
|
-
|
153
|
+
frb_bv_clear(VALUE self)
|
148
154
|
{
|
149
155
|
BitVector *bv;
|
150
156
|
GET_BV(bv, self);
|
@@ -163,7 +169,7 @@ frt_bv_clear(VALUE self)
|
|
163
169
|
* bits set.
|
164
170
|
*/
|
165
171
|
VALUE
|
166
|
-
|
172
|
+
frb_bv_eql(VALUE self, VALUE other)
|
167
173
|
{
|
168
174
|
BitVector *bv1, *bv2;
|
169
175
|
GET_BV(bv1, self);
|
@@ -179,7 +185,7 @@ frt_bv_eql(VALUE self, VALUE other)
|
|
179
185
|
* cache them.
|
180
186
|
*/
|
181
187
|
VALUE
|
182
|
-
|
188
|
+
frb_bv_hash(VALUE self)
|
183
189
|
{
|
184
190
|
BitVector *bv;
|
185
191
|
GET_BV(bv, self);
|
@@ -195,7 +201,7 @@ frt_bv_hash(VALUE self)
|
|
195
201
|
* +bv2+
|
196
202
|
*/
|
197
203
|
VALUE
|
198
|
-
|
204
|
+
frb_bv_and(VALUE self, VALUE other)
|
199
205
|
{
|
200
206
|
BitVector *bv1, *bv2;
|
201
207
|
GET_BV(bv1, self);
|
@@ -211,7 +217,7 @@ frt_bv_and(VALUE self, VALUE other)
|
|
211
217
|
* +bv2+ in place on +bv1+
|
212
218
|
*/
|
213
219
|
VALUE
|
214
|
-
|
220
|
+
frb_bv_and_x(VALUE self, VALUE other)
|
215
221
|
{
|
216
222
|
BitVector *bv1, *bv2;
|
217
223
|
GET_BV(bv1, self);
|
@@ -229,7 +235,7 @@ frt_bv_and_x(VALUE self, VALUE other)
|
|
229
235
|
* +bv2+
|
230
236
|
*/
|
231
237
|
VALUE
|
232
|
-
|
238
|
+
frb_bv_or(VALUE self, VALUE other)
|
233
239
|
{
|
234
240
|
BitVector *bv1, *bv2;
|
235
241
|
GET_BV(bv1, self);
|
@@ -245,7 +251,7 @@ frt_bv_or(VALUE self, VALUE other)
|
|
245
251
|
* +bv2+ in place on +bv1+
|
246
252
|
*/
|
247
253
|
VALUE
|
248
|
-
|
254
|
+
frb_bv_or_x(VALUE self, VALUE other)
|
249
255
|
{
|
250
256
|
BitVector *bv1, *bv2;
|
251
257
|
GET_BV(bv1, self);
|
@@ -263,7 +269,7 @@ frt_bv_or_x(VALUE self, VALUE other)
|
|
263
269
|
* +bv2+
|
264
270
|
*/
|
265
271
|
VALUE
|
266
|
-
|
272
|
+
frb_bv_xor(VALUE self, VALUE other)
|
267
273
|
{
|
268
274
|
BitVector *bv1, *bv2;
|
269
275
|
GET_BV(bv1, self);
|
@@ -279,7 +285,7 @@ frt_bv_xor(VALUE self, VALUE other)
|
|
279
285
|
* +bv2+ in place on +bv1+
|
280
286
|
*/
|
281
287
|
VALUE
|
282
|
-
|
288
|
+
frb_bv_xor_x(VALUE self, VALUE other)
|
283
289
|
{
|
284
290
|
BitVector *bv1, *bv2;
|
285
291
|
GET_BV(bv1, self);
|
@@ -296,7 +302,7 @@ frt_bv_xor_x(VALUE self, VALUE other)
|
|
296
302
|
* Perform a boolean _not_ operation on +bv+
|
297
303
|
* */
|
298
304
|
VALUE
|
299
|
-
|
305
|
+
frb_bv_not(VALUE self)
|
300
306
|
{
|
301
307
|
BitVector *bv;
|
302
308
|
GET_BV(bv, self);
|
@@ -310,7 +316,7 @@ frt_bv_not(VALUE self)
|
|
310
316
|
* Perform a boolean _not_ operation on +bv+ in-place
|
311
317
|
*/
|
312
318
|
VALUE
|
313
|
-
|
319
|
+
frb_bv_not_x(VALUE self)
|
314
320
|
{
|
315
321
|
BitVector *bv;
|
316
322
|
GET_BV(bv, self);
|
@@ -327,7 +333,7 @@ frt_bv_not_x(VALUE self)
|
|
327
333
|
* necessary for the other scan methods or for the +#each+ method.
|
328
334
|
*/
|
329
335
|
VALUE
|
330
|
-
|
336
|
+
frb_bv_reset_scan(VALUE self)
|
331
337
|
{
|
332
338
|
BitVector *bv;
|
333
339
|
GET_BV(bv, self);
|
@@ -345,7 +351,7 @@ frt_bv_reset_scan(VALUE self)
|
|
345
351
|
* first create the bit vector.
|
346
352
|
*/
|
347
353
|
VALUE
|
348
|
-
|
354
|
+
frb_bv_next(VALUE self)
|
349
355
|
{
|
350
356
|
BitVector *bv;
|
351
357
|
GET_BV(bv, self);
|
@@ -363,7 +369,7 @@ frt_bv_next(VALUE self)
|
|
363
369
|
* automatically reset when you first create the bit vector.
|
364
370
|
*/
|
365
371
|
VALUE
|
366
|
-
|
372
|
+
frb_bv_next_unset(VALUE self)
|
367
373
|
{
|
368
374
|
BitVector *bv;
|
369
375
|
GET_BV(bv, self);
|
@@ -381,7 +387,7 @@ frt_bv_next_unset(VALUE self)
|
|
381
387
|
* use the +#next_unset_from+ method.
|
382
388
|
*/
|
383
389
|
VALUE
|
384
|
-
|
390
|
+
frb_bv_next_from(VALUE self, VALUE rfrom)
|
385
391
|
{
|
386
392
|
BitVector *bv;
|
387
393
|
int from = FIX2INT(rfrom);
|
@@ -403,7 +409,7 @@ frt_bv_next_from(VALUE self, VALUE rfrom)
|
|
403
409
|
* should use the +#next_from+ method.
|
404
410
|
*/
|
405
411
|
VALUE
|
406
|
-
|
412
|
+
frb_bv_next_unset_from(VALUE self, VALUE rfrom)
|
407
413
|
{
|
408
414
|
BitVector *bv;
|
409
415
|
int from = FIX2INT(rfrom);
|
@@ -422,7 +428,7 @@ frt_bv_next_unset_from(VALUE self, VALUE rfrom)
|
|
422
428
|
* order
|
423
429
|
*/
|
424
430
|
VALUE
|
425
|
-
|
431
|
+
frb_bv_each(VALUE self)
|
426
432
|
{
|
427
433
|
BitVector *bv;
|
428
434
|
int bit;
|
@@ -453,7 +459,7 @@ frt_bv_each(VALUE self)
|
|
453
459
|
* bv = [1, 12, 45, 367, 455].inject(BitVector.new) {|bv, i| bv.set(i)}
|
454
460
|
*/
|
455
461
|
VALUE
|
456
|
-
|
462
|
+
frb_bv_to_a(VALUE self)
|
457
463
|
{
|
458
464
|
BitVector *bv;
|
459
465
|
int bit;
|
@@ -527,38 +533,198 @@ Init_BitVector(void)
|
|
527
533
|
{
|
528
534
|
/* BitVector */
|
529
535
|
cBitVector = rb_define_class_under(mUtils, "BitVector", rb_cObject);
|
530
|
-
rb_define_alloc_func(cBitVector,
|
531
|
-
|
532
|
-
rb_define_method(cBitVector, "initialize",
|
533
|
-
rb_define_method(cBitVector, "set",
|
534
|
-
rb_define_method(cBitVector, "unset",
|
535
|
-
rb_define_method(cBitVector, "[]=",
|
536
|
-
rb_define_method(cBitVector, "get",
|
537
|
-
rb_define_method(cBitVector, "[]",
|
538
|
-
rb_define_method(cBitVector, "count",
|
539
|
-
rb_define_method(cBitVector, "clear",
|
540
|
-
rb_define_method(cBitVector, "eql?",
|
541
|
-
rb_define_method(cBitVector, "==",
|
542
|
-
rb_define_method(cBitVector, "hash",
|
543
|
-
rb_define_method(cBitVector, "and!",
|
544
|
-
rb_define_method(cBitVector, "and",
|
545
|
-
rb_define_method(cBitVector, "&",
|
546
|
-
rb_define_method(cBitVector, "or!",
|
547
|
-
rb_define_method(cBitVector, "or",
|
548
|
-
rb_define_method(cBitVector, "|",
|
549
|
-
rb_define_method(cBitVector, "xor!",
|
550
|
-
rb_define_method(cBitVector, "xor",
|
551
|
-
rb_define_method(cBitVector, "^",
|
552
|
-
rb_define_method(cBitVector, "not!",
|
553
|
-
rb_define_method(cBitVector, "not",
|
554
|
-
rb_define_method(cBitVector, "~",
|
555
|
-
rb_define_method(cBitVector, "reset_scan",
|
556
|
-
rb_define_method(cBitVector, "next",
|
557
|
-
rb_define_method(cBitVector, "next_unset",
|
558
|
-
rb_define_method(cBitVector, "next_from",
|
559
|
-
rb_define_method(cBitVector, "next_unset_from",
|
560
|
-
rb_define_method(cBitVector, "each",
|
561
|
-
rb_define_method(cBitVector, "to_a",
|
536
|
+
rb_define_alloc_func(cBitVector, frb_bv_alloc);
|
537
|
+
|
538
|
+
rb_define_method(cBitVector, "initialize", frb_bv_init, 0);
|
539
|
+
rb_define_method(cBitVector, "set", frb_bv_set_on, 1);
|
540
|
+
rb_define_method(cBitVector, "unset", frb_bv_set_off, 1);
|
541
|
+
rb_define_method(cBitVector, "[]=", frb_bv_set, 2);
|
542
|
+
rb_define_method(cBitVector, "get", frb_bv_get, 1);
|
543
|
+
rb_define_method(cBitVector, "[]", frb_bv_get, 1);
|
544
|
+
rb_define_method(cBitVector, "count", frb_bv_count, 0);
|
545
|
+
rb_define_method(cBitVector, "clear", frb_bv_clear, 0);
|
546
|
+
rb_define_method(cBitVector, "eql?", frb_bv_eql, 1);
|
547
|
+
rb_define_method(cBitVector, "==", frb_bv_eql, 1);
|
548
|
+
rb_define_method(cBitVector, "hash", frb_bv_hash, 0);
|
549
|
+
rb_define_method(cBitVector, "and!", frb_bv_and_x, 1);
|
550
|
+
rb_define_method(cBitVector, "and", frb_bv_and, 1);
|
551
|
+
rb_define_method(cBitVector, "&", frb_bv_and, 1);
|
552
|
+
rb_define_method(cBitVector, "or!", frb_bv_or_x, 1);
|
553
|
+
rb_define_method(cBitVector, "or", frb_bv_or, 1);
|
554
|
+
rb_define_method(cBitVector, "|", frb_bv_or, 1);
|
555
|
+
rb_define_method(cBitVector, "xor!", frb_bv_xor_x, 1);
|
556
|
+
rb_define_method(cBitVector, "xor", frb_bv_xor, 1);
|
557
|
+
rb_define_method(cBitVector, "^", frb_bv_xor, 1);
|
558
|
+
rb_define_method(cBitVector, "not!", frb_bv_not_x, 0);
|
559
|
+
rb_define_method(cBitVector, "not", frb_bv_not, 0);
|
560
|
+
rb_define_method(cBitVector, "~", frb_bv_not, 0);
|
561
|
+
rb_define_method(cBitVector, "reset_scan", frb_bv_reset_scan, 0);
|
562
|
+
rb_define_method(cBitVector, "next", frb_bv_next, 0);
|
563
|
+
rb_define_method(cBitVector, "next_unset", frb_bv_next_unset, 0);
|
564
|
+
rb_define_method(cBitVector, "next_from", frb_bv_next_from, 1);
|
565
|
+
rb_define_method(cBitVector, "next_unset_from", frb_bv_next_unset_from, 1);
|
566
|
+
rb_define_method(cBitVector, "each", frb_bv_each, 0);
|
567
|
+
rb_define_method(cBitVector, "to_a", frb_bv_to_a, 0);
|
568
|
+
}
|
569
|
+
|
570
|
+
/*******************
|
571
|
+
*** MultiMapper ***
|
572
|
+
*******************/
|
573
|
+
static VALUE cMultiMapper;
|
574
|
+
|
575
|
+
static void
|
576
|
+
frb_mulmap_free(void *p)
|
577
|
+
{
|
578
|
+
object_del(p);
|
579
|
+
mulmap_destroy((MultiMapper *)p);
|
580
|
+
}
|
581
|
+
|
582
|
+
static VALUE
|
583
|
+
frb_mulmap_alloc(VALUE klass)
|
584
|
+
{
|
585
|
+
MultiMapper *mulmap = mulmap_new();
|
586
|
+
VALUE rmulmap = Data_Wrap_Struct(klass, NULL, &frb_mulmap_free, mulmap);
|
587
|
+
object_add(mulmap, rmulmap);
|
588
|
+
return rmulmap;
|
589
|
+
}
|
590
|
+
|
591
|
+
/* XXX: Duplication from frb_add_mapping_i in r_analysis.c */
|
592
|
+
static INLINE void frb_mulmap_add_mapping_i(MultiMapper *mulmap, VALUE from,
|
593
|
+
const char *to)
|
594
|
+
{
|
595
|
+
switch (TYPE(from)) {
|
596
|
+
case T_STRING:
|
597
|
+
mulmap_add_mapping(mulmap, rs2s(from), to);
|
598
|
+
break;
|
599
|
+
case T_SYMBOL:
|
600
|
+
mulmap_add_mapping(mulmap, rb_id2name(SYM2ID(from)), to);
|
601
|
+
break;
|
602
|
+
default:
|
603
|
+
rb_raise(rb_eArgError,
|
604
|
+
"cannot map from %s with MappingFilter",
|
605
|
+
rs2s(rb_obj_as_string(from)));
|
606
|
+
break;
|
607
|
+
}
|
608
|
+
}
|
609
|
+
|
610
|
+
/* XXX: Duplication from frb_add_mappings_i in r_analysis.c */
|
611
|
+
static int frb_mulmap_add_mappings_i(VALUE key, VALUE value, VALUE arg)
|
612
|
+
{
|
613
|
+
if (key == Qundef) {
|
614
|
+
return ST_CONTINUE;
|
615
|
+
} else {
|
616
|
+
MultiMapper *mulmap = (MultiMapper *)arg;
|
617
|
+
const char *to;
|
618
|
+
switch (TYPE(value)) {
|
619
|
+
case T_STRING:
|
620
|
+
to = rs2s(value);
|
621
|
+
break;
|
622
|
+
case T_SYMBOL:
|
623
|
+
to = rb_id2name(SYM2ID(value));
|
624
|
+
break;
|
625
|
+
default:
|
626
|
+
rb_raise(rb_eArgError,
|
627
|
+
"cannot map to %s with MultiMapper",
|
628
|
+
rs2s(rb_obj_as_string(key)));
|
629
|
+
break;
|
630
|
+
}
|
631
|
+
if (TYPE(key) == T_ARRAY) {
|
632
|
+
int i;
|
633
|
+
for (i = RARRAY_LEN(key) - 1; i >= 0; i--) {
|
634
|
+
frb_mulmap_add_mapping_i(mulmap, RARRAY_PTR(key)[i], to);
|
635
|
+
}
|
636
|
+
}
|
637
|
+
else {
|
638
|
+
frb_mulmap_add_mapping_i(mulmap, key, to);
|
639
|
+
}
|
640
|
+
}
|
641
|
+
return ST_CONTINUE;
|
642
|
+
}
|
643
|
+
|
644
|
+
/*
|
645
|
+
* call-seq:
|
646
|
+
* MultiMapper.new() -> new_multi_mapper
|
647
|
+
*
|
648
|
+
* Returns a new multi-mapper object and compiles it for optimization.
|
649
|
+
*
|
650
|
+
* Note that MultiMapper is immutable.
|
651
|
+
*/
|
652
|
+
static VALUE
|
653
|
+
frb_mulmap_init(VALUE self, VALUE rmappings)
|
654
|
+
{
|
655
|
+
MultiMapper *mulmap = DATA_PTR(self);
|
656
|
+
rb_hash_foreach(rmappings, frb_mulmap_add_mappings_i, (VALUE)mulmap);
|
657
|
+
mulmap_compile(mulmap);
|
658
|
+
|
659
|
+
return self;
|
660
|
+
}
|
661
|
+
|
662
|
+
/*
|
663
|
+
* call-seq:
|
664
|
+
* multi_mapper.map(string) -> mapped_string
|
665
|
+
*
|
666
|
+
* Performs all the mappings on the string.
|
667
|
+
*/
|
668
|
+
VALUE
|
669
|
+
frb_mulmap_map(VALUE self, VALUE rstring)
|
670
|
+
{
|
671
|
+
MultiMapper *mulmap = DATA_PTR(self);
|
672
|
+
char *string = rs2s(rb_obj_as_string(rstring));
|
673
|
+
char *mapped_string = mulmap_dynamic_map(mulmap, string);
|
674
|
+
VALUE rmapped_string = rb_str_new2(mapped_string);
|
675
|
+
free(mapped_string);
|
676
|
+
return rmapped_string;
|
677
|
+
}
|
678
|
+
|
679
|
+
/*
|
680
|
+
* Document-class: Ferret::Utils::MultiMapper
|
681
|
+
*
|
682
|
+
* == Summary
|
683
|
+
*
|
684
|
+
* A MultiMapper performs a list of mappings from one string to another. You
|
685
|
+
* could of course just use gsub to do this but when you are just mapping
|
686
|
+
* strings, this is much faster.
|
687
|
+
*
|
688
|
+
* Note that MultiMapper is immutable.
|
689
|
+
*
|
690
|
+
* == Example
|
691
|
+
*
|
692
|
+
* mapping = {
|
693
|
+
* ['à','á','â','ã','ä','å','ā','ă'] => 'a',
|
694
|
+
* 'æ' => 'ae',
|
695
|
+
* ['ď','đ'] => 'd',
|
696
|
+
* ['ç','ć','č','ĉ','ċ'] => 'c',
|
697
|
+
* ['è','é','ê','ë','ē','ę','ě','ĕ','ė',] => 'e',
|
698
|
+
* ['ƒ'] => 'f',
|
699
|
+
* ['ĝ','ğ','ġ','ģ'] => 'g',
|
700
|
+
* ['ĥ','ħ'] => 'h',
|
701
|
+
* ['ì','ì','í','î','ï','ī','ĩ','ĭ'] => 'i',
|
702
|
+
* ['į','ı','ij','ĵ'] => 'j',
|
703
|
+
* ['ķ','ĸ'] => 'k',
|
704
|
+
* ['ł','ľ','ĺ','ļ','ŀ'] => 'l',
|
705
|
+
* ['ñ','ń','ň','ņ','ʼn','ŋ'] => 'n',
|
706
|
+
* ['ò','ó','ô','õ','ö','ø','ō','ő','ŏ','ŏ'] => 'o',
|
707
|
+
* ['œ'] => 'oek',
|
708
|
+
* ['ą'] => 'q',
|
709
|
+
* ['ŕ','ř','ŗ'] => 'r',
|
710
|
+
* ['ś','š','ş','ŝ','ș'] => 's',
|
711
|
+
* ['ť','ţ','ŧ','ț'] => 't',
|
712
|
+
* ['ù','ú','û','ü','ū','ů','ű','ŭ','ũ','ų'] => 'u',
|
713
|
+
* ['ŵ'] => 'w',
|
714
|
+
* ['ý','ÿ','ŷ'] => 'y',
|
715
|
+
* ['ž','ż','ź'] => 'z'
|
716
|
+
* mapper = MultiMapper.new(mapping)
|
717
|
+
* mapped_string = mapper.map(string)
|
718
|
+
*/
|
719
|
+
static void
|
720
|
+
Init_MultiMapper(void)
|
721
|
+
{
|
722
|
+
/* MultiMapper */
|
723
|
+
cMultiMapper = rb_define_class_under(mUtils, "MultiMapper", rb_cObject);
|
724
|
+
rb_define_alloc_func(cMultiMapper, frb_mulmap_alloc);
|
725
|
+
|
726
|
+
rb_define_method(cMultiMapper, "initialize", frb_mulmap_init, 1);
|
727
|
+
rb_define_method(cMultiMapper, "map", frb_mulmap_map, 1);
|
562
728
|
}
|
563
729
|
|
564
730
|
/*********************
|
@@ -575,7 +741,7 @@ typedef struct PriQ
|
|
575
741
|
|
576
742
|
#define PQ_START_CAPA 32
|
577
743
|
|
578
|
-
static bool
|
744
|
+
static bool frb_pq_lt(VALUE proc, VALUE v1, VALUE v2)
|
579
745
|
{
|
580
746
|
if (proc == Qnil) {
|
581
747
|
return RTEST(rb_funcall(v1, id_lt, 1, v2));
|
@@ -594,7 +760,7 @@ static void pq_up(PriQ *pq)
|
|
594
760
|
|
595
761
|
node = heap[i];
|
596
762
|
|
597
|
-
while ((j > 0) &&
|
763
|
+
while ((j > 0) && frb_pq_lt(pq->proc, node, heap[j])) {
|
598
764
|
heap[i] = heap[j];
|
599
765
|
i = j;
|
600
766
|
j = j >> 1;
|
@@ -611,16 +777,16 @@ static void pq_down(PriQ *pq)
|
|
611
777
|
VALUE *heap = pq->heap;
|
612
778
|
VALUE node = heap[i]; /* save top node */
|
613
779
|
|
614
|
-
if ((k <= size) && (
|
780
|
+
if ((k <= size) && (frb_pq_lt(pq->proc, heap[k], heap[j]))) {
|
615
781
|
j = k;
|
616
782
|
}
|
617
783
|
|
618
|
-
while ((j <= size) &&
|
784
|
+
while ((j <= size) && frb_pq_lt(pq->proc, heap[j], node)) {
|
619
785
|
heap[i] = heap[j]; /* shift up child */
|
620
786
|
i = j;
|
621
787
|
j = i << 1;
|
622
788
|
k = j + 1;
|
623
|
-
if ((k <= size) &&
|
789
|
+
if ((k <= size) && frb_pq_lt(pq->proc, heap[k], heap[j])) {
|
624
790
|
j = k;
|
625
791
|
}
|
626
792
|
}
|
@@ -641,7 +807,7 @@ static void pq_push(PriQ *pq, VALUE elem)
|
|
641
807
|
static VALUE cPriorityQueue;
|
642
808
|
|
643
809
|
static void
|
644
|
-
|
810
|
+
frb_pq_mark(void *p)
|
645
811
|
{
|
646
812
|
PriQ *pq = (PriQ *)p;
|
647
813
|
int i;
|
@@ -650,21 +816,21 @@ frt_pq_mark(void *p)
|
|
650
816
|
}
|
651
817
|
}
|
652
818
|
|
653
|
-
static void
|
819
|
+
static void frb_pq_free(PriQ *pq)
|
654
820
|
{
|
655
821
|
free(pq->heap);
|
656
822
|
free(pq);
|
657
823
|
}
|
658
824
|
|
659
825
|
static VALUE
|
660
|
-
|
826
|
+
frb_pq_alloc(VALUE klass)
|
661
827
|
{
|
662
828
|
PriQ *pq = ALLOC_AND_ZERO(PriQ);
|
663
829
|
pq->capa = PQ_START_CAPA;
|
664
830
|
pq->mem_capa = PQ_START_CAPA;
|
665
831
|
pq->heap = ALLOC_N(VALUE, PQ_START_CAPA);
|
666
832
|
pq->proc = Qnil;
|
667
|
-
return Data_Wrap_Struct(klass, &
|
833
|
+
return Data_Wrap_Struct(klass, &frb_pq_mark, &frb_pq_free, pq);
|
668
834
|
}
|
669
835
|
|
670
836
|
#define GET_PQ(pq, self) Data_Get_Struct(self, PriQ, pq)
|
@@ -681,7 +847,7 @@ frt_pq_alloc(VALUE klass)
|
|
681
847
|
* inserted into the queue.
|
682
848
|
*/
|
683
849
|
static VALUE
|
684
|
-
|
850
|
+
frb_pq_init(int argc, VALUE *argv, VALUE self)
|
685
851
|
{
|
686
852
|
if (argc >= 1) {
|
687
853
|
PriQ *pq;
|
@@ -736,7 +902,7 @@ frt_pq_init(int argc, VALUE *argv, VALUE self)
|
|
736
902
|
* queue is cloned, its contents are not cloned.
|
737
903
|
*/
|
738
904
|
static VALUE
|
739
|
-
|
905
|
+
frb_pq_clone(VALUE self)
|
740
906
|
{
|
741
907
|
PriQ *pq, *new_pq = ALLOC(PriQ);
|
742
908
|
GET_PQ(pq, self);
|
@@ -744,7 +910,7 @@ frt_pq_clone(VALUE self)
|
|
744
910
|
new_pq->heap = ALLOC_N(VALUE, new_pq->mem_capa);
|
745
911
|
memcpy(new_pq->heap, pq->heap, sizeof(VALUE) * (new_pq->size + 1));
|
746
912
|
|
747
|
-
return Data_Wrap_Struct(cPriorityQueue, &
|
913
|
+
return Data_Wrap_Struct(cPriorityQueue, &frb_pq_mark, &frb_pq_free, new_pq);
|
748
914
|
}
|
749
915
|
|
750
916
|
/*
|
@@ -754,7 +920,7 @@ frt_pq_clone(VALUE self)
|
|
754
920
|
* Clears all elements from the priority queue. The size will be reset to 0.
|
755
921
|
*/
|
756
922
|
static VALUE
|
757
|
-
|
923
|
+
frb_pq_clear(VALUE self)
|
758
924
|
{
|
759
925
|
PriQ *pq;
|
760
926
|
GET_PQ(pq, self);
|
@@ -771,14 +937,14 @@ frt_pq_clear(VALUE self)
|
|
771
937
|
* position in the queue according to its priority.
|
772
938
|
*/
|
773
939
|
static VALUE
|
774
|
-
|
940
|
+
frb_pq_insert(VALUE self, VALUE elem)
|
775
941
|
{
|
776
942
|
PriQ *pq;
|
777
943
|
GET_PQ(pq, self);
|
778
944
|
if (pq->size < pq->capa) {
|
779
945
|
pq_push(pq, elem);
|
780
946
|
}
|
781
|
-
else if (pq->size > 0 &&
|
947
|
+
else if (pq->size > 0 && frb_pq_lt(pq->proc, pq->heap[1], elem)) {
|
782
948
|
pq->heap[1] = elem;
|
783
949
|
pq_down(pq);
|
784
950
|
}
|
@@ -795,7 +961,7 @@ frt_pq_insert(VALUE self, VALUE elem)
|
|
795
961
|
* do this by calling the adjust method.
|
796
962
|
*/
|
797
963
|
static VALUE
|
798
|
-
|
964
|
+
frb_pq_adjust(VALUE self)
|
799
965
|
{
|
800
966
|
PriQ *pq;
|
801
967
|
GET_PQ(pq, self);
|
@@ -811,7 +977,7 @@ frt_pq_adjust(VALUE self)
|
|
811
977
|
* queue.
|
812
978
|
*/
|
813
979
|
static VALUE
|
814
|
-
|
980
|
+
frb_pq_top(VALUE self)
|
815
981
|
{
|
816
982
|
PriQ *pq;
|
817
983
|
GET_PQ(pq, self);
|
@@ -825,7 +991,7 @@ frt_pq_top(VALUE self)
|
|
825
991
|
* Returns the top element in the queue removing it from the queue.
|
826
992
|
*/
|
827
993
|
static VALUE
|
828
|
-
|
994
|
+
frb_pq_pop(VALUE self)
|
829
995
|
{
|
830
996
|
PriQ *pq;
|
831
997
|
GET_PQ(pq, self);
|
@@ -851,7 +1017,7 @@ frt_pq_pop(VALUE self)
|
|
851
1017
|
* its _capacity_
|
852
1018
|
*/
|
853
1019
|
static VALUE
|
854
|
-
|
1020
|
+
frb_pq_size(VALUE self)
|
855
1021
|
{
|
856
1022
|
PriQ *pq;
|
857
1023
|
GET_PQ(pq, self);
|
@@ -868,7 +1034,7 @@ frt_pq_size(VALUE self)
|
|
868
1034
|
* _capacity_
|
869
1035
|
*/
|
870
1036
|
static VALUE
|
871
|
-
|
1037
|
+
frb_pq_capa(VALUE self)
|
872
1038
|
{
|
873
1039
|
PriQ *pq;
|
874
1040
|
GET_PQ(pq, self);
|
@@ -922,18 +1088,18 @@ Init_PriorityQueue(void)
|
|
922
1088
|
{
|
923
1089
|
/* PriorityQueue */
|
924
1090
|
cPriorityQueue = rb_define_class_under(mUtils, "PriorityQueue", rb_cObject);
|
925
|
-
rb_define_alloc_func(cPriorityQueue,
|
926
|
-
|
927
|
-
rb_define_method(cPriorityQueue, "initialize",
|
928
|
-
rb_define_method(cPriorityQueue, "clone",
|
929
|
-
rb_define_method(cPriorityQueue, "clear",
|
930
|
-
rb_define_method(cPriorityQueue, "insert",
|
931
|
-
rb_define_method(cPriorityQueue, "<<",
|
932
|
-
rb_define_method(cPriorityQueue, "top",
|
933
|
-
rb_define_method(cPriorityQueue, "pop",
|
934
|
-
rb_define_method(cPriorityQueue, "size",
|
935
|
-
rb_define_method(cPriorityQueue, "capacity",
|
936
|
-
rb_define_method(cPriorityQueue, "adjust",
|
1091
|
+
rb_define_alloc_func(cPriorityQueue, frb_pq_alloc);
|
1092
|
+
|
1093
|
+
rb_define_method(cPriorityQueue, "initialize", frb_pq_init, -1);
|
1094
|
+
rb_define_method(cPriorityQueue, "clone", frb_pq_clone, 0);
|
1095
|
+
rb_define_method(cPriorityQueue, "clear", frb_pq_clear, 0);
|
1096
|
+
rb_define_method(cPriorityQueue, "insert", frb_pq_insert, 1);
|
1097
|
+
rb_define_method(cPriorityQueue, "<<", frb_pq_insert, 1);
|
1098
|
+
rb_define_method(cPriorityQueue, "top", frb_pq_top, 0);
|
1099
|
+
rb_define_method(cPriorityQueue, "pop", frb_pq_pop, 0);
|
1100
|
+
rb_define_method(cPriorityQueue, "size", frb_pq_size, 0);
|
1101
|
+
rb_define_method(cPriorityQueue, "capacity", frb_pq_capa, 0);
|
1102
|
+
rb_define_method(cPriorityQueue, "adjust", frb_pq_adjust, 0);
|
937
1103
|
}
|
938
1104
|
|
939
1105
|
/* rdoc hack
|
@@ -947,6 +1113,7 @@ extern VALUE mFerret = rb_define_module("Ferret");
|
|
947
1113
|
* useful when indexing with Ferret. They are;
|
948
1114
|
*
|
949
1115
|
* * BitVector
|
1116
|
+
* * MultiMapper
|
950
1117
|
* * PriorityQueue
|
951
1118
|
* * => more to come
|
952
1119
|
*
|
@@ -959,5 +1126,6 @@ Init_Utils(void)
|
|
959
1126
|
mUtils = rb_define_module_under(mFerret, "Utils");
|
960
1127
|
|
961
1128
|
Init_BitVector();
|
1129
|
+
Init_MultiMapper();
|
962
1130
|
Init_PriorityQueue();
|
963
1131
|
}
|