ferret 0.11.6 → 0.11.8.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README +10 -22
- data/RELEASE_CHANGES +137 -0
- data/RELEASE_NOTES +60 -0
- data/Rakefile +379 -274
- data/TODO +100 -8
- data/bin/ferret-browser +0 -0
- data/ext/BZLIB_blocksort.c +1094 -0
- data/ext/BZLIB_bzlib.c +1578 -0
- data/ext/BZLIB_compress.c +672 -0
- data/ext/BZLIB_crctable.c +104 -0
- data/ext/BZLIB_decompress.c +626 -0
- data/ext/BZLIB_huffman.c +205 -0
- data/ext/BZLIB_randtable.c +84 -0
- data/ext/{api.c → STEMMER_api.c} +7 -10
- data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
- data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
- data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
- data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
- data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
- data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
- data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
- data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
- data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
- data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
- data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
- data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
- data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
- data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
- data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
- data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
- data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
- data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
- data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
- data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
- data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
- data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
- data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
- data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
- data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
- data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
- data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
- data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
- data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
- data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
- data/ext/analysis.c +276 -121
- data/ext/analysis.h +190 -143
- data/ext/api.h +3 -4
- data/ext/array.c +5 -3
- data/ext/array.h +52 -43
- data/ext/bitvector.c +38 -482
- data/ext/bitvector.h +446 -124
- data/ext/bzlib.h +282 -0
- data/ext/bzlib_private.h +503 -0
- data/ext/compound_io.c +23 -22
- data/ext/config.h +21 -11
- data/ext/document.c +43 -40
- data/ext/document.h +31 -21
- data/ext/except.c +20 -38
- data/ext/except.h +89 -76
- data/ext/extconf.rb +3 -2
- data/ext/ferret.c +49 -35
- data/ext/ferret.h +14 -11
- data/ext/field_index.c +262 -0
- data/ext/field_index.h +52 -0
- data/ext/filter.c +11 -10
- data/ext/fs_store.c +65 -47
- data/ext/global.c +245 -165
- data/ext/global.h +252 -54
- data/ext/hash.c +200 -243
- data/ext/hash.h +205 -163
- data/ext/hashset.c +118 -96
- data/ext/hashset.h +110 -82
- data/ext/header.h +19 -19
- data/ext/helper.c +11 -10
- data/ext/helper.h +14 -6
- data/ext/index.c +745 -366
- data/ext/index.h +503 -529
- data/ext/internal.h +1020 -0
- data/ext/lang.c +10 -0
- data/ext/lang.h +35 -15
- data/ext/mempool.c +5 -4
- data/ext/mempool.h +30 -22
- data/ext/modules.h +35 -7
- data/ext/multimapper.c +43 -2
- data/ext/multimapper.h +32 -23
- data/ext/posh.c +0 -0
- data/ext/posh.h +4 -38
- data/ext/priorityqueue.c +10 -12
- data/ext/priorityqueue.h +33 -21
- data/ext/q_boolean.c +22 -9
- data/ext/q_const_score.c +3 -2
- data/ext/q_filtered_query.c +15 -12
- data/ext/q_fuzzy.c +147 -135
- data/ext/q_match_all.c +3 -2
- data/ext/q_multi_term.c +28 -32
- data/ext/q_parser.c +451 -173
- data/ext/q_phrase.c +158 -79
- data/ext/q_prefix.c +16 -18
- data/ext/q_range.c +363 -31
- data/ext/q_span.c +130 -141
- data/ext/q_term.c +21 -21
- data/ext/q_wildcard.c +19 -23
- data/ext/r_analysis.c +369 -242
- data/ext/r_index.c +421 -434
- data/ext/r_qparser.c +142 -92
- data/ext/r_search.c +790 -407
- data/ext/r_store.c +44 -44
- data/ext/r_utils.c +264 -96
- data/ext/ram_store.c +29 -23
- data/ext/scanner.c +895 -0
- data/ext/scanner.h +36 -0
- data/ext/scanner_mb.c +6701 -0
- data/ext/scanner_utf8.c +4415 -0
- data/ext/search.c +210 -87
- data/ext/search.h +556 -488
- data/ext/similarity.c +17 -16
- data/ext/similarity.h +51 -44
- data/ext/sort.c +157 -354
- data/ext/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/stem_UTF_8_hungarian.h +16 -0
- data/ext/stem_UTF_8_romanian.h +16 -0
- data/ext/stem_UTF_8_turkish.h +16 -0
- data/ext/stopwords.c +287 -278
- data/ext/store.c +57 -51
- data/ext/store.h +308 -286
- data/ext/symbol.c +10 -0
- data/ext/symbol.h +23 -0
- data/ext/term_vectors.c +14 -293
- data/ext/threading.h +22 -22
- data/ext/win32.h +12 -4
- data/lib/ferret.rb +2 -1
- data/lib/ferret/browser.rb +1 -1
- data/lib/ferret/field_symbol.rb +94 -0
- data/lib/ferret/index.rb +221 -34
- data/lib/ferret/number_tools.rb +6 -6
- data/lib/ferret/version.rb +3 -0
- data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
- data/test/test_helper.rb +7 -2
- data/test/test_installed.rb +1 -0
- data/test/threading/thread_safety_index_test.rb +10 -1
- data/test/threading/thread_safety_read_write_test.rb +4 -7
- data/test/threading/thread_safety_test.rb +0 -0
- data/test/unit/analysis/tc_analyzer.rb +29 -27
- data/test/unit/analysis/tc_token_stream.rb +23 -16
- data/test/unit/index/tc_index.rb +116 -11
- data/test/unit/index/tc_index_reader.rb +27 -27
- data/test/unit/index/tc_index_writer.rb +10 -0
- data/test/unit/index/th_doc.rb +38 -21
- data/test/unit/search/tc_filter.rb +31 -10
- data/test/unit/search/tc_index_searcher.rb +6 -0
- data/test/unit/search/tm_searcher.rb +53 -1
- data/test/unit/store/tc_fs_store.rb +40 -2
- data/test/unit/store/tc_ram_store.rb +0 -0
- data/test/unit/store/tm_store.rb +0 -0
- data/test/unit/store/tm_store_lock.rb +7 -6
- data/test/unit/tc_field_symbol.rb +26 -0
- data/test/unit/ts_analysis.rb +0 -0
- data/test/unit/ts_index.rb +0 -0
- data/test/unit/ts_store.rb +0 -0
- data/test/unit/ts_utils.rb +0 -0
- data/test/unit/utils/tc_number_tools.rb +0 -0
- data/test/utils/content_generator.rb +226 -0
- metadata +262 -221
- data/ext/inc/lang.h +0 -48
- data/ext/inc/threading.h +0 -31
- data/ext/stem_ISO_8859_1_english.c +0 -1156
- data/ext/stem_ISO_8859_1_french.c +0 -1276
- data/ext/stem_ISO_8859_1_italian.c +0 -1091
- data/ext/stem_ISO_8859_1_norwegian.c +0 -296
- data/ext/stem_ISO_8859_1_spanish.c +0 -1119
- data/ext/stem_ISO_8859_1_swedish.c +0 -307
- data/ext/stem_UTF_8_danish.c +0 -344
- data/ext/stem_UTF_8_english.c +0 -1176
- data/ext/stem_UTF_8_french.c +0 -1296
- data/ext/stem_UTF_8_italian.c +0 -1113
- data/ext/stem_UTF_8_norwegian.c +0 -302
- data/ext/stem_UTF_8_portuguese.c +0 -1055
- data/ext/stem_UTF_8_russian.c +0 -709
- data/ext/stem_UTF_8_spanish.c +0 -1137
- data/ext/stem_UTF_8_swedish.c +0 -313
- data/lib/ferret_version.rb +0 -3
data/ext/r_store.c
CHANGED
@@ -15,11 +15,11 @@ VALUE cFSDirectory;
|
|
15
15
|
****************************************************************************/
|
16
16
|
|
17
17
|
void
|
18
|
-
|
18
|
+
frb_unwrap_locks(Store *store)
|
19
19
|
{
|
20
|
-
|
21
|
-
for (
|
22
|
-
void *lock =
|
20
|
+
HashSetEntry *hse = store->locks->first;
|
21
|
+
for (; hse; hse = hse->next) {
|
22
|
+
void *lock = hse->elem;
|
23
23
|
VALUE rlock = object_get(lock);
|
24
24
|
if (rlock != Qnil) {
|
25
25
|
object_del(lock);
|
@@ -29,7 +29,7 @@ frt_unwrap_locks(Store *store)
|
|
29
29
|
}
|
30
30
|
|
31
31
|
void
|
32
|
-
|
32
|
+
frb_lock_free(void *p)
|
33
33
|
{
|
34
34
|
Lock *lock = (Lock *)p;
|
35
35
|
object_del(p);
|
@@ -37,10 +37,10 @@ frt_lock_free(void *p)
|
|
37
37
|
}
|
38
38
|
|
39
39
|
void
|
40
|
-
|
40
|
+
frb_lock_mark(void *p)
|
41
41
|
{
|
42
42
|
Lock *lock = (Lock *)p;
|
43
|
-
|
43
|
+
frb_gc_mark(lock->store);
|
44
44
|
}
|
45
45
|
|
46
46
|
#define GET_LOCK(lock, self) Data_Get_Struct(self, Lock, lock)
|
@@ -64,7 +64,7 @@ frt_lock_mark(void *p)
|
|
64
64
|
* Lock::LockError otherwise.
|
65
65
|
*/
|
66
66
|
static VALUE
|
67
|
-
|
67
|
+
frb_lock_obtain(int argc, VALUE *argv, VALUE self)
|
68
68
|
{
|
69
69
|
VALUE rtimeout;
|
70
70
|
int timeout = 1;
|
@@ -96,7 +96,7 @@ frt_lock_obtain(int argc, VALUE *argv, VALUE self)
|
|
96
96
|
* Lock::LockError otherwise.
|
97
97
|
*/
|
98
98
|
static VALUE
|
99
|
-
|
99
|
+
frb_lock_while_locked(int argc, VALUE *argv, VALUE self)
|
100
100
|
{
|
101
101
|
VALUE rtimeout;
|
102
102
|
int timeout = 1;
|
@@ -120,7 +120,7 @@ frt_lock_while_locked(int argc, VALUE *argv, VALUE self)
|
|
120
120
|
* Returns true if the lock has been obtained.
|
121
121
|
*/
|
122
122
|
static VALUE
|
123
|
-
|
123
|
+
frb_lock_is_locked(VALUE self)
|
124
124
|
{
|
125
125
|
Lock *lock;
|
126
126
|
GET_LOCK(lock, self);
|
@@ -135,7 +135,7 @@ frt_lock_is_locked(VALUE self)
|
|
135
135
|
* the lock.
|
136
136
|
*/
|
137
137
|
static VALUE
|
138
|
-
|
138
|
+
frb_lock_release(VALUE self)
|
139
139
|
{
|
140
140
|
Lock *lock;
|
141
141
|
GET_LOCK(lock, self);
|
@@ -150,9 +150,9 @@ frt_lock_release(VALUE self)
|
|
150
150
|
****************************************************************************/
|
151
151
|
|
152
152
|
void
|
153
|
-
|
153
|
+
frb_dir_free(Store *store)
|
154
154
|
{
|
155
|
-
|
155
|
+
frb_unwrap_locks(store);
|
156
156
|
object_del(store);
|
157
157
|
store_deref(store);
|
158
158
|
}
|
@@ -166,7 +166,7 @@ frt_dir_free(Store *store)
|
|
166
166
|
* behaviour may change in future.
|
167
167
|
*/
|
168
168
|
static VALUE
|
169
|
-
|
169
|
+
frb_dir_close(VALUE self)
|
170
170
|
{
|
171
171
|
Store *store = DATA_PTR(self);
|
172
172
|
int ref_cnt = FIX2INT(rb_ivar_get(self, id_ref_cnt)) - 1;
|
@@ -174,7 +174,7 @@ frt_dir_close(VALUE self)
|
|
174
174
|
if (ref_cnt < 0) {
|
175
175
|
Frt_Unwrap_Struct(self);
|
176
176
|
object_del(store);
|
177
|
-
|
177
|
+
frb_unwrap_locks(store);
|
178
178
|
store_deref(store);
|
179
179
|
}
|
180
180
|
return Qnil;
|
@@ -187,7 +187,7 @@ frt_dir_close(VALUE self)
|
|
187
187
|
* Return true if a file with the name +file_name+ exists in the directory.
|
188
188
|
*/
|
189
189
|
static VALUE
|
190
|
-
|
190
|
+
frb_dir_exists(VALUE self, VALUE rfname)
|
191
191
|
{
|
192
192
|
Store *store = DATA_PTR(self);
|
193
193
|
StringValue(rfname);
|
@@ -201,7 +201,7 @@ frt_dir_exists(VALUE self, VALUE rfname)
|
|
201
201
|
* Create an empty file in the directory with the name +file_name+.
|
202
202
|
*/
|
203
203
|
static VALUE
|
204
|
-
|
204
|
+
frb_dir_touch(VALUE self, VALUE rfname)
|
205
205
|
{
|
206
206
|
Store *store = DATA_PTR(self);
|
207
207
|
StringValue(rfname);
|
@@ -216,7 +216,7 @@ frt_dir_touch(VALUE self, VALUE rfname)
|
|
216
216
|
* Remove file +file_name+ from the directory. Returns true if successful.
|
217
217
|
*/
|
218
218
|
static VALUE
|
219
|
-
|
219
|
+
frb_dir_delete(VALUE self, VALUE rfname)
|
220
220
|
{
|
221
221
|
Store *store = DATA_PTR(self);
|
222
222
|
StringValue(rfname);
|
@@ -230,7 +230,7 @@ frt_dir_delete(VALUE self, VALUE rfname)
|
|
230
230
|
* Return a count of the number of files in the directory.
|
231
231
|
*/
|
232
232
|
static VALUE
|
233
|
-
|
233
|
+
frb_dir_file_count(VALUE self)
|
234
234
|
{
|
235
235
|
Store *store = DATA_PTR(self);
|
236
236
|
return INT2FIX(store->count(store));
|
@@ -243,7 +243,7 @@ frt_dir_file_count(VALUE self)
|
|
243
243
|
* Delete all files in the directory. It gives you a clean slate.
|
244
244
|
*/
|
245
245
|
static VALUE
|
246
|
-
|
246
|
+
frb_dir_refresh(VALUE self)
|
247
247
|
{
|
248
248
|
Store *store = DATA_PTR(self);
|
249
249
|
store->clear_all(store);
|
@@ -258,7 +258,7 @@ frt_dir_refresh(VALUE self)
|
|
258
258
|
* doesn't exist or there is some other type of IOError.
|
259
259
|
*/
|
260
260
|
static VALUE
|
261
|
-
|
261
|
+
frb_dir_rename(VALUE self, VALUE rfrom, VALUE rto)
|
262
262
|
{
|
263
263
|
Store *store = DATA_PTR(self);
|
264
264
|
StringValue(rfrom);
|
@@ -277,14 +277,14 @@ frt_dir_rename(VALUE self, VALUE rfrom, VALUE rto)
|
|
277
277
|
* reserved for lock files
|
278
278
|
*/
|
279
279
|
static VALUE
|
280
|
-
|
280
|
+
frb_dir_make_lock(VALUE self, VALUE rlock_name)
|
281
281
|
{
|
282
282
|
VALUE rlock;
|
283
283
|
Lock *lock;
|
284
284
|
Store *store = DATA_PTR(self);
|
285
285
|
StringValue(rlock_name);
|
286
286
|
lock = open_lock(store, rs2s(rlock_name));
|
287
|
-
rlock = Data_Wrap_Struct(cLock, &
|
287
|
+
rlock = Data_Wrap_Struct(cLock, &frb_lock_mark, &frb_lock_free, lock);
|
288
288
|
object_add(lock, rlock);
|
289
289
|
return rlock;
|
290
290
|
}
|
@@ -308,7 +308,7 @@ frt_dir_make_lock(VALUE self, VALUE rlock_name)
|
|
308
308
|
* dir:: Directory to load into memory
|
309
309
|
*/
|
310
310
|
static VALUE
|
311
|
-
|
311
|
+
frb_ramdir_init(int argc, VALUE *argv, VALUE self)
|
312
312
|
{
|
313
313
|
VALUE rdir;
|
314
314
|
Store *store;
|
@@ -321,7 +321,7 @@ frt_ramdir_init(int argc, VALUE *argv, VALUE self)
|
|
321
321
|
}
|
322
322
|
default: store = open_ram_store();
|
323
323
|
}
|
324
|
-
Frt_Wrap_Struct(self, NULL, &
|
324
|
+
Frt_Wrap_Struct(self, NULL, &frb_dir_free, store);
|
325
325
|
object_add(store, self);
|
326
326
|
rb_ivar_set(self, id_ref_cnt, INT2FIX(0));
|
327
327
|
return self;
|
@@ -348,7 +348,7 @@ frt_ramdir_init(int argc, VALUE *argv, VALUE self)
|
|
348
348
|
* deleted
|
349
349
|
*/
|
350
350
|
static VALUE
|
351
|
-
|
351
|
+
frb_fsdir_new(int argc, VALUE *argv, VALUE klass)
|
352
352
|
{
|
353
353
|
VALUE self, rpath, rcreate;
|
354
354
|
Store *store;
|
@@ -358,7 +358,7 @@ frt_fsdir_new(int argc, VALUE *argv, VALUE klass)
|
|
358
358
|
StringValue(rpath);
|
359
359
|
create = RTEST(rcreate);
|
360
360
|
if (create) {
|
361
|
-
|
361
|
+
frb_create_dir(rpath);
|
362
362
|
}
|
363
363
|
if (!rb_funcall(rb_cFile, id_is_directory, 1, rpath)) {
|
364
364
|
rb_raise(rb_eIOError, "No directory <%s> found. Use :create => true"
|
@@ -367,7 +367,7 @@ frt_fsdir_new(int argc, VALUE *argv, VALUE klass)
|
|
367
367
|
store = open_fs_store(rs2s(rpath));
|
368
368
|
if (create) store->clear_all(store);
|
369
369
|
if ((self = object_get(store)) == Qnil) {
|
370
|
-
self = Data_Wrap_Struct(klass, NULL, &
|
370
|
+
self = Data_Wrap_Struct(klass, NULL, &frb_dir_free, store);
|
371
371
|
object_add(store, self);
|
372
372
|
rb_ivar_set(self, id_ref_cnt, INT2FIX(0));
|
373
373
|
}
|
@@ -408,14 +408,14 @@ Init_Directory(void)
|
|
408
408
|
{
|
409
409
|
cDirectory = rb_define_class_under(mStore, "Directory", rb_cObject);
|
410
410
|
rb_define_const(cDirectory, "LOCK_PREFIX", rb_str_new2(LOCK_PREFIX));
|
411
|
-
rb_define_method(cDirectory, "close",
|
412
|
-
rb_define_method(cDirectory, "exists?",
|
413
|
-
rb_define_method(cDirectory, "touch",
|
414
|
-
rb_define_method(cDirectory, "delete",
|
415
|
-
rb_define_method(cDirectory, "file_count",
|
416
|
-
rb_define_method(cDirectory, "refresh",
|
417
|
-
rb_define_method(cDirectory, "rename",
|
418
|
-
rb_define_method(cDirectory, "make_lock",
|
411
|
+
rb_define_method(cDirectory, "close", frb_dir_close, 0);
|
412
|
+
rb_define_method(cDirectory, "exists?", frb_dir_exists, 1);
|
413
|
+
rb_define_method(cDirectory, "touch", frb_dir_touch, 1);
|
414
|
+
rb_define_method(cDirectory, "delete", frb_dir_delete, 1);
|
415
|
+
rb_define_method(cDirectory, "file_count", frb_dir_file_count, 0);
|
416
|
+
rb_define_method(cDirectory, "refresh", frb_dir_refresh, 0);
|
417
|
+
rb_define_method(cDirectory, "rename", frb_dir_rename, 2);
|
418
|
+
rb_define_method(cDirectory, "make_lock", frb_dir_make_lock, 1);
|
419
419
|
}
|
420
420
|
|
421
421
|
/*
|
@@ -445,10 +445,10 @@ void
|
|
445
445
|
Init_Lock(void)
|
446
446
|
{
|
447
447
|
cLock = rb_define_class_under(mStore, "Lock", rb_cObject);
|
448
|
-
rb_define_method(cLock, "obtain",
|
449
|
-
rb_define_method(cLock, "while_locked",
|
450
|
-
rb_define_method(cLock, "release",
|
451
|
-
rb_define_method(cLock, "locked?",
|
448
|
+
rb_define_method(cLock, "obtain", frb_lock_obtain, -1);
|
449
|
+
rb_define_method(cLock, "while_locked", frb_lock_while_locked, -1);
|
450
|
+
rb_define_method(cLock, "release", frb_lock_release, 0);
|
451
|
+
rb_define_method(cLock, "locked?", frb_lock_is_locked, 0);
|
452
452
|
|
453
453
|
cLockError = rb_define_class_under(cLock, "LockError", rb_eStandardError);
|
454
454
|
}
|
@@ -466,8 +466,8 @@ void
|
|
466
466
|
Init_RAMDirectory(void)
|
467
467
|
{
|
468
468
|
cRAMDirectory = rb_define_class_under(mStore, "RAMDirectory", cDirectory);
|
469
|
-
rb_define_alloc_func(cRAMDirectory,
|
470
|
-
rb_define_method(cRAMDirectory, "initialize",
|
469
|
+
rb_define_alloc_func(cRAMDirectory, frb_data_alloc);
|
470
|
+
rb_define_method(cRAMDirectory, "initialize", frb_ramdir_init, -1);
|
471
471
|
}
|
472
472
|
|
473
473
|
/*
|
@@ -483,8 +483,8 @@ void
|
|
483
483
|
Init_FSDirectory(void)
|
484
484
|
{
|
485
485
|
cFSDirectory = rb_define_class_under(mStore, "FSDirectory", cDirectory);
|
486
|
-
rb_define_alloc_func(cFSDirectory,
|
487
|
-
rb_define_singleton_method(cFSDirectory, "new",
|
486
|
+
rb_define_alloc_func(cFSDirectory, frb_data_alloc);
|
487
|
+
rb_define_singleton_method(cFSDirectory, "new", frb_fsdir_new, -1);
|
488
488
|
}
|
489
489
|
|
490
490
|
/* rdoc hack
|
data/ext/r_utils.c
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
#include "ferret.h"
|
2
2
|
#include "bitvector.h"
|
3
|
+
#include "multimapper.h"
|
4
|
+
#ifdef FRT_RUBY_VERSION_1_9
|
5
|
+
# include <ruby/st.h>
|
6
|
+
#else
|
7
|
+
# include <st.h>
|
8
|
+
#endif
|
3
9
|
|
4
10
|
/*****************
|
5
11
|
*** BitVector ***
|
@@ -7,17 +13,17 @@
|
|
7
13
|
static VALUE cBitVector;
|
8
14
|
|
9
15
|
static void
|
10
|
-
|
16
|
+
frb_bv_free(void *p)
|
11
17
|
{
|
12
18
|
object_del(p);
|
13
19
|
bv_destroy((BitVector *)p);
|
14
20
|
}
|
15
21
|
|
16
22
|
static VALUE
|
17
|
-
|
23
|
+
frb_bv_alloc(VALUE klass)
|
18
24
|
{
|
19
25
|
BitVector *bv = bv_new();
|
20
|
-
VALUE rbv = Data_Wrap_Struct(klass, NULL, &
|
26
|
+
VALUE rbv = Data_Wrap_Struct(klass, NULL, &frb_bv_free, bv);
|
21
27
|
object_add(bv, rbv);
|
22
28
|
return rbv;
|
23
29
|
}
|
@@ -25,11 +31,11 @@ frt_bv_alloc(VALUE klass)
|
|
25
31
|
#define GET_BV(bv, self) Data_Get_Struct(self, BitVector, bv)
|
26
32
|
|
27
33
|
VALUE
|
28
|
-
|
34
|
+
frb_get_bv(BitVector *bv)
|
29
35
|
{
|
30
36
|
VALUE rbv;
|
31
37
|
if ((rbv = object_get(bv)) == Qnil) {
|
32
|
-
rbv = Data_Wrap_Struct(cBitVector, NULL, &
|
38
|
+
rbv = Data_Wrap_Struct(cBitVector, NULL, &frb_bv_free, bv);
|
33
39
|
REF(bv);
|
34
40
|
object_add(bv, rbv);
|
35
41
|
}
|
@@ -38,12 +44,12 @@ frt_get_bv(BitVector *bv)
|
|
38
44
|
|
39
45
|
/*
|
40
46
|
* call-seq:
|
41
|
-
* BitVector.new() ->
|
47
|
+
* BitVector.new() -> new_bit_vector
|
42
48
|
*
|
43
49
|
* Returns a new empty bit vector object
|
44
50
|
*/
|
45
51
|
static VALUE
|
46
|
-
|
52
|
+
frb_bv_init(VALUE self)
|
47
53
|
{
|
48
54
|
return self;
|
49
55
|
}
|
@@ -56,7 +62,7 @@ frt_bv_init(VALUE self)
|
|
56
62
|
* +false+).
|
57
63
|
*/
|
58
64
|
VALUE
|
59
|
-
|
65
|
+
frb_bv_set(VALUE self, VALUE rindex, VALUE rstate)
|
60
66
|
{
|
61
67
|
BitVector *bv;
|
62
68
|
int index = FIX2INT(rindex);
|
@@ -81,9 +87,9 @@ frt_bv_set(VALUE self, VALUE rindex, VALUE rstate)
|
|
81
87
|
* Set the bit at _i_ to *on* (+true+)
|
82
88
|
*/
|
83
89
|
VALUE
|
84
|
-
|
90
|
+
frb_bv_set_on(VALUE self, VALUE rindex)
|
85
91
|
{
|
86
|
-
|
92
|
+
frb_bv_set(self, rindex, Qtrue);
|
87
93
|
return self;
|
88
94
|
}
|
89
95
|
|
@@ -94,9 +100,9 @@ frt_bv_set_on(VALUE self, VALUE rindex)
|
|
94
100
|
* Set the bit at _i_ to *off* (+false+)
|
95
101
|
*/
|
96
102
|
VALUE
|
97
|
-
|
103
|
+
frb_bv_set_off(VALUE self, VALUE rindex)
|
98
104
|
{
|
99
|
-
|
105
|
+
frb_bv_set(self, rindex, Qfalse);
|
100
106
|
return self;
|
101
107
|
}
|
102
108
|
|
@@ -108,7 +114,7 @@ frt_bv_set_off(VALUE self, VALUE rindex)
|
|
108
114
|
* Get the bit value at _i_
|
109
115
|
*/
|
110
116
|
VALUE
|
111
|
-
|
117
|
+
frb_bv_get(VALUE self, VALUE rindex)
|
112
118
|
{
|
113
119
|
BitVector *bv;
|
114
120
|
int index = FIX2INT(rindex);
|
@@ -129,7 +135,7 @@ frt_bv_get(VALUE self, VALUE rindex)
|
|
129
135
|
* instead.
|
130
136
|
*/
|
131
137
|
VALUE
|
132
|
-
|
138
|
+
frb_bv_count(VALUE self)
|
133
139
|
{
|
134
140
|
BitVector *bv;
|
135
141
|
GET_BV(bv, self);
|
@@ -144,7 +150,7 @@ frt_bv_count(VALUE self)
|
|
144
150
|
* all bits set to *off*.
|
145
151
|
*/
|
146
152
|
VALUE
|
147
|
-
|
153
|
+
frb_bv_clear(VALUE self)
|
148
154
|
{
|
149
155
|
BitVector *bv;
|
150
156
|
GET_BV(bv, self);
|
@@ -163,7 +169,7 @@ frt_bv_clear(VALUE self)
|
|
163
169
|
* bits set.
|
164
170
|
*/
|
165
171
|
VALUE
|
166
|
-
|
172
|
+
frb_bv_eql(VALUE self, VALUE other)
|
167
173
|
{
|
168
174
|
BitVector *bv1, *bv2;
|
169
175
|
GET_BV(bv1, self);
|
@@ -179,7 +185,7 @@ frt_bv_eql(VALUE self, VALUE other)
|
|
179
185
|
* cache them.
|
180
186
|
*/
|
181
187
|
VALUE
|
182
|
-
|
188
|
+
frb_bv_hash(VALUE self)
|
183
189
|
{
|
184
190
|
BitVector *bv;
|
185
191
|
GET_BV(bv, self);
|
@@ -195,7 +201,7 @@ frt_bv_hash(VALUE self)
|
|
195
201
|
* +bv2+
|
196
202
|
*/
|
197
203
|
VALUE
|
198
|
-
|
204
|
+
frb_bv_and(VALUE self, VALUE other)
|
199
205
|
{
|
200
206
|
BitVector *bv1, *bv2;
|
201
207
|
GET_BV(bv1, self);
|
@@ -211,7 +217,7 @@ frt_bv_and(VALUE self, VALUE other)
|
|
211
217
|
* +bv2+ in place on +bv1+
|
212
218
|
*/
|
213
219
|
VALUE
|
214
|
-
|
220
|
+
frb_bv_and_x(VALUE self, VALUE other)
|
215
221
|
{
|
216
222
|
BitVector *bv1, *bv2;
|
217
223
|
GET_BV(bv1, self);
|
@@ -229,7 +235,7 @@ frt_bv_and_x(VALUE self, VALUE other)
|
|
229
235
|
* +bv2+
|
230
236
|
*/
|
231
237
|
VALUE
|
232
|
-
|
238
|
+
frb_bv_or(VALUE self, VALUE other)
|
233
239
|
{
|
234
240
|
BitVector *bv1, *bv2;
|
235
241
|
GET_BV(bv1, self);
|
@@ -245,7 +251,7 @@ frt_bv_or(VALUE self, VALUE other)
|
|
245
251
|
* +bv2+ in place on +bv1+
|
246
252
|
*/
|
247
253
|
VALUE
|
248
|
-
|
254
|
+
frb_bv_or_x(VALUE self, VALUE other)
|
249
255
|
{
|
250
256
|
BitVector *bv1, *bv2;
|
251
257
|
GET_BV(bv1, self);
|
@@ -263,7 +269,7 @@ frt_bv_or_x(VALUE self, VALUE other)
|
|
263
269
|
* +bv2+
|
264
270
|
*/
|
265
271
|
VALUE
|
266
|
-
|
272
|
+
frb_bv_xor(VALUE self, VALUE other)
|
267
273
|
{
|
268
274
|
BitVector *bv1, *bv2;
|
269
275
|
GET_BV(bv1, self);
|
@@ -279,7 +285,7 @@ frt_bv_xor(VALUE self, VALUE other)
|
|
279
285
|
* +bv2+ in place on +bv1+
|
280
286
|
*/
|
281
287
|
VALUE
|
282
|
-
|
288
|
+
frb_bv_xor_x(VALUE self, VALUE other)
|
283
289
|
{
|
284
290
|
BitVector *bv1, *bv2;
|
285
291
|
GET_BV(bv1, self);
|
@@ -296,7 +302,7 @@ frt_bv_xor_x(VALUE self, VALUE other)
|
|
296
302
|
* Perform a boolean _not_ operation on +bv+
|
297
303
|
* */
|
298
304
|
VALUE
|
299
|
-
|
305
|
+
frb_bv_not(VALUE self)
|
300
306
|
{
|
301
307
|
BitVector *bv;
|
302
308
|
GET_BV(bv, self);
|
@@ -310,7 +316,7 @@ frt_bv_not(VALUE self)
|
|
310
316
|
* Perform a boolean _not_ operation on +bv+ in-place
|
311
317
|
*/
|
312
318
|
VALUE
|
313
|
-
|
319
|
+
frb_bv_not_x(VALUE self)
|
314
320
|
{
|
315
321
|
BitVector *bv;
|
316
322
|
GET_BV(bv, self);
|
@@ -327,7 +333,7 @@ frt_bv_not_x(VALUE self)
|
|
327
333
|
* necessary for the other scan methods or for the +#each+ method.
|
328
334
|
*/
|
329
335
|
VALUE
|
330
|
-
|
336
|
+
frb_bv_reset_scan(VALUE self)
|
331
337
|
{
|
332
338
|
BitVector *bv;
|
333
339
|
GET_BV(bv, self);
|
@@ -345,7 +351,7 @@ frt_bv_reset_scan(VALUE self)
|
|
345
351
|
* first create the bit vector.
|
346
352
|
*/
|
347
353
|
VALUE
|
348
|
-
|
354
|
+
frb_bv_next(VALUE self)
|
349
355
|
{
|
350
356
|
BitVector *bv;
|
351
357
|
GET_BV(bv, self);
|
@@ -363,7 +369,7 @@ frt_bv_next(VALUE self)
|
|
363
369
|
* automatically reset when you first create the bit vector.
|
364
370
|
*/
|
365
371
|
VALUE
|
366
|
-
|
372
|
+
frb_bv_next_unset(VALUE self)
|
367
373
|
{
|
368
374
|
BitVector *bv;
|
369
375
|
GET_BV(bv, self);
|
@@ -381,7 +387,7 @@ frt_bv_next_unset(VALUE self)
|
|
381
387
|
* use the +#next_unset_from+ method.
|
382
388
|
*/
|
383
389
|
VALUE
|
384
|
-
|
390
|
+
frb_bv_next_from(VALUE self, VALUE rfrom)
|
385
391
|
{
|
386
392
|
BitVector *bv;
|
387
393
|
int from = FIX2INT(rfrom);
|
@@ -403,7 +409,7 @@ frt_bv_next_from(VALUE self, VALUE rfrom)
|
|
403
409
|
* should use the +#next_from+ method.
|
404
410
|
*/
|
405
411
|
VALUE
|
406
|
-
|
412
|
+
frb_bv_next_unset_from(VALUE self, VALUE rfrom)
|
407
413
|
{
|
408
414
|
BitVector *bv;
|
409
415
|
int from = FIX2INT(rfrom);
|
@@ -422,7 +428,7 @@ frt_bv_next_unset_from(VALUE self, VALUE rfrom)
|
|
422
428
|
* order
|
423
429
|
*/
|
424
430
|
VALUE
|
425
|
-
|
431
|
+
frb_bv_each(VALUE self)
|
426
432
|
{
|
427
433
|
BitVector *bv;
|
428
434
|
int bit;
|
@@ -453,7 +459,7 @@ frt_bv_each(VALUE self)
|
|
453
459
|
* bv = [1, 12, 45, 367, 455].inject(BitVector.new) {|bv, i| bv.set(i)}
|
454
460
|
*/
|
455
461
|
VALUE
|
456
|
-
|
462
|
+
frb_bv_to_a(VALUE self)
|
457
463
|
{
|
458
464
|
BitVector *bv;
|
459
465
|
int bit;
|
@@ -527,38 +533,198 @@ Init_BitVector(void)
|
|
527
533
|
{
|
528
534
|
/* BitVector */
|
529
535
|
cBitVector = rb_define_class_under(mUtils, "BitVector", rb_cObject);
|
530
|
-
rb_define_alloc_func(cBitVector,
|
531
|
-
|
532
|
-
rb_define_method(cBitVector, "initialize",
|
533
|
-
rb_define_method(cBitVector, "set",
|
534
|
-
rb_define_method(cBitVector, "unset",
|
535
|
-
rb_define_method(cBitVector, "[]=",
|
536
|
-
rb_define_method(cBitVector, "get",
|
537
|
-
rb_define_method(cBitVector, "[]",
|
538
|
-
rb_define_method(cBitVector, "count",
|
539
|
-
rb_define_method(cBitVector, "clear",
|
540
|
-
rb_define_method(cBitVector, "eql?",
|
541
|
-
rb_define_method(cBitVector, "==",
|
542
|
-
rb_define_method(cBitVector, "hash",
|
543
|
-
rb_define_method(cBitVector, "and!",
|
544
|
-
rb_define_method(cBitVector, "and",
|
545
|
-
rb_define_method(cBitVector, "&",
|
546
|
-
rb_define_method(cBitVector, "or!",
|
547
|
-
rb_define_method(cBitVector, "or",
|
548
|
-
rb_define_method(cBitVector, "|",
|
549
|
-
rb_define_method(cBitVector, "xor!",
|
550
|
-
rb_define_method(cBitVector, "xor",
|
551
|
-
rb_define_method(cBitVector, "^",
|
552
|
-
rb_define_method(cBitVector, "not!",
|
553
|
-
rb_define_method(cBitVector, "not",
|
554
|
-
rb_define_method(cBitVector, "~",
|
555
|
-
rb_define_method(cBitVector, "reset_scan",
|
556
|
-
rb_define_method(cBitVector, "next",
|
557
|
-
rb_define_method(cBitVector, "next_unset",
|
558
|
-
rb_define_method(cBitVector, "next_from",
|
559
|
-
rb_define_method(cBitVector, "next_unset_from",
|
560
|
-
rb_define_method(cBitVector, "each",
|
561
|
-
rb_define_method(cBitVector, "to_a",
|
536
|
+
rb_define_alloc_func(cBitVector, frb_bv_alloc);
|
537
|
+
|
538
|
+
rb_define_method(cBitVector, "initialize", frb_bv_init, 0);
|
539
|
+
rb_define_method(cBitVector, "set", frb_bv_set_on, 1);
|
540
|
+
rb_define_method(cBitVector, "unset", frb_bv_set_off, 1);
|
541
|
+
rb_define_method(cBitVector, "[]=", frb_bv_set, 2);
|
542
|
+
rb_define_method(cBitVector, "get", frb_bv_get, 1);
|
543
|
+
rb_define_method(cBitVector, "[]", frb_bv_get, 1);
|
544
|
+
rb_define_method(cBitVector, "count", frb_bv_count, 0);
|
545
|
+
rb_define_method(cBitVector, "clear", frb_bv_clear, 0);
|
546
|
+
rb_define_method(cBitVector, "eql?", frb_bv_eql, 1);
|
547
|
+
rb_define_method(cBitVector, "==", frb_bv_eql, 1);
|
548
|
+
rb_define_method(cBitVector, "hash", frb_bv_hash, 0);
|
549
|
+
rb_define_method(cBitVector, "and!", frb_bv_and_x, 1);
|
550
|
+
rb_define_method(cBitVector, "and", frb_bv_and, 1);
|
551
|
+
rb_define_method(cBitVector, "&", frb_bv_and, 1);
|
552
|
+
rb_define_method(cBitVector, "or!", frb_bv_or_x, 1);
|
553
|
+
rb_define_method(cBitVector, "or", frb_bv_or, 1);
|
554
|
+
rb_define_method(cBitVector, "|", frb_bv_or, 1);
|
555
|
+
rb_define_method(cBitVector, "xor!", frb_bv_xor_x, 1);
|
556
|
+
rb_define_method(cBitVector, "xor", frb_bv_xor, 1);
|
557
|
+
rb_define_method(cBitVector, "^", frb_bv_xor, 1);
|
558
|
+
rb_define_method(cBitVector, "not!", frb_bv_not_x, 0);
|
559
|
+
rb_define_method(cBitVector, "not", frb_bv_not, 0);
|
560
|
+
rb_define_method(cBitVector, "~", frb_bv_not, 0);
|
561
|
+
rb_define_method(cBitVector, "reset_scan", frb_bv_reset_scan, 0);
|
562
|
+
rb_define_method(cBitVector, "next", frb_bv_next, 0);
|
563
|
+
rb_define_method(cBitVector, "next_unset", frb_bv_next_unset, 0);
|
564
|
+
rb_define_method(cBitVector, "next_from", frb_bv_next_from, 1);
|
565
|
+
rb_define_method(cBitVector, "next_unset_from", frb_bv_next_unset_from, 1);
|
566
|
+
rb_define_method(cBitVector, "each", frb_bv_each, 0);
|
567
|
+
rb_define_method(cBitVector, "to_a", frb_bv_to_a, 0);
|
568
|
+
}
|
569
|
+
|
570
|
+
/*******************
|
571
|
+
*** MultiMapper ***
|
572
|
+
*******************/
|
573
|
+
static VALUE cMultiMapper;
|
574
|
+
|
575
|
+
static void
|
576
|
+
frb_mulmap_free(void *p)
|
577
|
+
{
|
578
|
+
object_del(p);
|
579
|
+
mulmap_destroy((MultiMapper *)p);
|
580
|
+
}
|
581
|
+
|
582
|
+
static VALUE
|
583
|
+
frb_mulmap_alloc(VALUE klass)
|
584
|
+
{
|
585
|
+
MultiMapper *mulmap = mulmap_new();
|
586
|
+
VALUE rmulmap = Data_Wrap_Struct(klass, NULL, &frb_mulmap_free, mulmap);
|
587
|
+
object_add(mulmap, rmulmap);
|
588
|
+
return rmulmap;
|
589
|
+
}
|
590
|
+
|
591
|
+
/* XXX: Duplication from frb_add_mapping_i in r_analysis.c */
|
592
|
+
static INLINE void frb_mulmap_add_mapping_i(MultiMapper *mulmap, VALUE from,
|
593
|
+
const char *to)
|
594
|
+
{
|
595
|
+
switch (TYPE(from)) {
|
596
|
+
case T_STRING:
|
597
|
+
mulmap_add_mapping(mulmap, rs2s(from), to);
|
598
|
+
break;
|
599
|
+
case T_SYMBOL:
|
600
|
+
mulmap_add_mapping(mulmap, rb_id2name(SYM2ID(from)), to);
|
601
|
+
break;
|
602
|
+
default:
|
603
|
+
rb_raise(rb_eArgError,
|
604
|
+
"cannot map from %s with MappingFilter",
|
605
|
+
rs2s(rb_obj_as_string(from)));
|
606
|
+
break;
|
607
|
+
}
|
608
|
+
}
|
609
|
+
|
610
|
+
/* XXX: Duplication from frb_add_mappings_i in r_analysis.c */
|
611
|
+
static int frb_mulmap_add_mappings_i(VALUE key, VALUE value, VALUE arg)
|
612
|
+
{
|
613
|
+
if (key == Qundef) {
|
614
|
+
return ST_CONTINUE;
|
615
|
+
} else {
|
616
|
+
MultiMapper *mulmap = (MultiMapper *)arg;
|
617
|
+
const char *to;
|
618
|
+
switch (TYPE(value)) {
|
619
|
+
case T_STRING:
|
620
|
+
to = rs2s(value);
|
621
|
+
break;
|
622
|
+
case T_SYMBOL:
|
623
|
+
to = rb_id2name(SYM2ID(value));
|
624
|
+
break;
|
625
|
+
default:
|
626
|
+
rb_raise(rb_eArgError,
|
627
|
+
"cannot map to %s with MultiMapper",
|
628
|
+
rs2s(rb_obj_as_string(key)));
|
629
|
+
break;
|
630
|
+
}
|
631
|
+
if (TYPE(key) == T_ARRAY) {
|
632
|
+
int i;
|
633
|
+
for (i = RARRAY_LEN(key) - 1; i >= 0; i--) {
|
634
|
+
frb_mulmap_add_mapping_i(mulmap, RARRAY_PTR(key)[i], to);
|
635
|
+
}
|
636
|
+
}
|
637
|
+
else {
|
638
|
+
frb_mulmap_add_mapping_i(mulmap, key, to);
|
639
|
+
}
|
640
|
+
}
|
641
|
+
return ST_CONTINUE;
|
642
|
+
}
|
643
|
+
|
644
|
+
/*
|
645
|
+
* call-seq:
|
646
|
+
* MultiMapper.new() -> new_multi_mapper
|
647
|
+
*
|
648
|
+
* Returns a new multi-mapper object and compiles it for optimization.
|
649
|
+
*
|
650
|
+
* Note that MultiMapper is immutable.
|
651
|
+
*/
|
652
|
+
static VALUE
|
653
|
+
frb_mulmap_init(VALUE self, VALUE rmappings)
|
654
|
+
{
|
655
|
+
MultiMapper *mulmap = DATA_PTR(self);
|
656
|
+
rb_hash_foreach(rmappings, frb_mulmap_add_mappings_i, (VALUE)mulmap);
|
657
|
+
mulmap_compile(mulmap);
|
658
|
+
|
659
|
+
return self;
|
660
|
+
}
|
661
|
+
|
662
|
+
/*
|
663
|
+
* call-seq:
|
664
|
+
* multi_mapper.map(string) -> mapped_string
|
665
|
+
*
|
666
|
+
* Performs all the mappings on the string.
|
667
|
+
*/
|
668
|
+
VALUE
|
669
|
+
frb_mulmap_map(VALUE self, VALUE rstring)
|
670
|
+
{
|
671
|
+
MultiMapper *mulmap = DATA_PTR(self);
|
672
|
+
char *string = rs2s(rb_obj_as_string(rstring));
|
673
|
+
char *mapped_string = mulmap_dynamic_map(mulmap, string);
|
674
|
+
VALUE rmapped_string = rb_str_new2(mapped_string);
|
675
|
+
free(mapped_string);
|
676
|
+
return rmapped_string;
|
677
|
+
}
|
678
|
+
|
679
|
+
/*
|
680
|
+
* Document-class: Ferret::Utils::MultiMapper
|
681
|
+
*
|
682
|
+
* == Summary
|
683
|
+
*
|
684
|
+
* A MultiMapper performs a list of mappings from one string to another. You
|
685
|
+
* could of course just use gsub to do this but when you are just mapping
|
686
|
+
* strings, this is much faster.
|
687
|
+
*
|
688
|
+
* Note that MultiMapper is immutable.
|
689
|
+
*
|
690
|
+
* == Example
|
691
|
+
*
|
692
|
+
* mapping = {
|
693
|
+
* ['à','á','â','ã','ä','å','ā','ă'] => 'a',
|
694
|
+
* 'æ' => 'ae',
|
695
|
+
* ['ď','đ'] => 'd',
|
696
|
+
* ['ç','ć','č','ĉ','ċ'] => 'c',
|
697
|
+
* ['è','é','ê','ë','ē','ę','ě','ĕ','ė',] => 'e',
|
698
|
+
* ['ƒ'] => 'f',
|
699
|
+
* ['ĝ','ğ','ġ','ģ'] => 'g',
|
700
|
+
* ['ĥ','ħ'] => 'h',
|
701
|
+
* ['ì','ì','í','î','ï','ī','ĩ','ĭ'] => 'i',
|
702
|
+
* ['į','ı','ij','ĵ'] => 'j',
|
703
|
+
* ['ķ','ĸ'] => 'k',
|
704
|
+
* ['ł','ľ','ĺ','ļ','ŀ'] => 'l',
|
705
|
+
* ['ñ','ń','ň','ņ','ʼn','ŋ'] => 'n',
|
706
|
+
* ['ò','ó','ô','õ','ö','ø','ō','ő','ŏ','ŏ'] => 'o',
|
707
|
+
* ['œ'] => 'oek',
|
708
|
+
* ['ą'] => 'q',
|
709
|
+
* ['ŕ','ř','ŗ'] => 'r',
|
710
|
+
* ['ś','š','ş','ŝ','ș'] => 's',
|
711
|
+
* ['ť','ţ','ŧ','ț'] => 't',
|
712
|
+
* ['ù','ú','û','ü','ū','ů','ű','ŭ','ũ','ų'] => 'u',
|
713
|
+
* ['ŵ'] => 'w',
|
714
|
+
* ['ý','ÿ','ŷ'] => 'y',
|
715
|
+
* ['ž','ż','ź'] => 'z'
|
716
|
+
* mapper = MultiMapper.new(mapping)
|
717
|
+
* mapped_string = mapper.map(string)
|
718
|
+
*/
|
719
|
+
static void
|
720
|
+
Init_MultiMapper(void)
|
721
|
+
{
|
722
|
+
/* MultiMapper */
|
723
|
+
cMultiMapper = rb_define_class_under(mUtils, "MultiMapper", rb_cObject);
|
724
|
+
rb_define_alloc_func(cMultiMapper, frb_mulmap_alloc);
|
725
|
+
|
726
|
+
rb_define_method(cMultiMapper, "initialize", frb_mulmap_init, 1);
|
727
|
+
rb_define_method(cMultiMapper, "map", frb_mulmap_map, 1);
|
562
728
|
}
|
563
729
|
|
564
730
|
/*********************
|
@@ -575,7 +741,7 @@ typedef struct PriQ
|
|
575
741
|
|
576
742
|
#define PQ_START_CAPA 32
|
577
743
|
|
578
|
-
static bool
|
744
|
+
static bool frb_pq_lt(VALUE proc, VALUE v1, VALUE v2)
|
579
745
|
{
|
580
746
|
if (proc == Qnil) {
|
581
747
|
return RTEST(rb_funcall(v1, id_lt, 1, v2));
|
@@ -594,7 +760,7 @@ static void pq_up(PriQ *pq)
|
|
594
760
|
|
595
761
|
node = heap[i];
|
596
762
|
|
597
|
-
while ((j > 0) &&
|
763
|
+
while ((j > 0) && frb_pq_lt(pq->proc, node, heap[j])) {
|
598
764
|
heap[i] = heap[j];
|
599
765
|
i = j;
|
600
766
|
j = j >> 1;
|
@@ -611,16 +777,16 @@ static void pq_down(PriQ *pq)
|
|
611
777
|
VALUE *heap = pq->heap;
|
612
778
|
VALUE node = heap[i]; /* save top node */
|
613
779
|
|
614
|
-
if ((k <= size) && (
|
780
|
+
if ((k <= size) && (frb_pq_lt(pq->proc, heap[k], heap[j]))) {
|
615
781
|
j = k;
|
616
782
|
}
|
617
783
|
|
618
|
-
while ((j <= size) &&
|
784
|
+
while ((j <= size) && frb_pq_lt(pq->proc, heap[j], node)) {
|
619
785
|
heap[i] = heap[j]; /* shift up child */
|
620
786
|
i = j;
|
621
787
|
j = i << 1;
|
622
788
|
k = j + 1;
|
623
|
-
if ((k <= size) &&
|
789
|
+
if ((k <= size) && frb_pq_lt(pq->proc, heap[k], heap[j])) {
|
624
790
|
j = k;
|
625
791
|
}
|
626
792
|
}
|
@@ -641,7 +807,7 @@ static void pq_push(PriQ *pq, VALUE elem)
|
|
641
807
|
static VALUE cPriorityQueue;
|
642
808
|
|
643
809
|
static void
|
644
|
-
|
810
|
+
frb_pq_mark(void *p)
|
645
811
|
{
|
646
812
|
PriQ *pq = (PriQ *)p;
|
647
813
|
int i;
|
@@ -650,21 +816,21 @@ frt_pq_mark(void *p)
|
|
650
816
|
}
|
651
817
|
}
|
652
818
|
|
653
|
-
static void
|
819
|
+
static void frb_pq_free(PriQ *pq)
|
654
820
|
{
|
655
821
|
free(pq->heap);
|
656
822
|
free(pq);
|
657
823
|
}
|
658
824
|
|
659
825
|
static VALUE
|
660
|
-
|
826
|
+
frb_pq_alloc(VALUE klass)
|
661
827
|
{
|
662
828
|
PriQ *pq = ALLOC_AND_ZERO(PriQ);
|
663
829
|
pq->capa = PQ_START_CAPA;
|
664
830
|
pq->mem_capa = PQ_START_CAPA;
|
665
831
|
pq->heap = ALLOC_N(VALUE, PQ_START_CAPA);
|
666
832
|
pq->proc = Qnil;
|
667
|
-
return Data_Wrap_Struct(klass, &
|
833
|
+
return Data_Wrap_Struct(klass, &frb_pq_mark, &frb_pq_free, pq);
|
668
834
|
}
|
669
835
|
|
670
836
|
#define GET_PQ(pq, self) Data_Get_Struct(self, PriQ, pq)
|
@@ -681,7 +847,7 @@ frt_pq_alloc(VALUE klass)
|
|
681
847
|
* inserted into the queue.
|
682
848
|
*/
|
683
849
|
static VALUE
|
684
|
-
|
850
|
+
frb_pq_init(int argc, VALUE *argv, VALUE self)
|
685
851
|
{
|
686
852
|
if (argc >= 1) {
|
687
853
|
PriQ *pq;
|
@@ -736,7 +902,7 @@ frt_pq_init(int argc, VALUE *argv, VALUE self)
|
|
736
902
|
* queue is cloned, its contents are not cloned.
|
737
903
|
*/
|
738
904
|
static VALUE
|
739
|
-
|
905
|
+
frb_pq_clone(VALUE self)
|
740
906
|
{
|
741
907
|
PriQ *pq, *new_pq = ALLOC(PriQ);
|
742
908
|
GET_PQ(pq, self);
|
@@ -744,7 +910,7 @@ frt_pq_clone(VALUE self)
|
|
744
910
|
new_pq->heap = ALLOC_N(VALUE, new_pq->mem_capa);
|
745
911
|
memcpy(new_pq->heap, pq->heap, sizeof(VALUE) * (new_pq->size + 1));
|
746
912
|
|
747
|
-
return Data_Wrap_Struct(cPriorityQueue, &
|
913
|
+
return Data_Wrap_Struct(cPriorityQueue, &frb_pq_mark, &frb_pq_free, new_pq);
|
748
914
|
}
|
749
915
|
|
750
916
|
/*
|
@@ -754,7 +920,7 @@ frt_pq_clone(VALUE self)
|
|
754
920
|
* Clears all elements from the priority queue. The size will be reset to 0.
|
755
921
|
*/
|
756
922
|
static VALUE
|
757
|
-
|
923
|
+
frb_pq_clear(VALUE self)
|
758
924
|
{
|
759
925
|
PriQ *pq;
|
760
926
|
GET_PQ(pq, self);
|
@@ -771,14 +937,14 @@ frt_pq_clear(VALUE self)
|
|
771
937
|
* position in the queue according to its priority.
|
772
938
|
*/
|
773
939
|
static VALUE
|
774
|
-
|
940
|
+
frb_pq_insert(VALUE self, VALUE elem)
|
775
941
|
{
|
776
942
|
PriQ *pq;
|
777
943
|
GET_PQ(pq, self);
|
778
944
|
if (pq->size < pq->capa) {
|
779
945
|
pq_push(pq, elem);
|
780
946
|
}
|
781
|
-
else if (pq->size > 0 &&
|
947
|
+
else if (pq->size > 0 && frb_pq_lt(pq->proc, pq->heap[1], elem)) {
|
782
948
|
pq->heap[1] = elem;
|
783
949
|
pq_down(pq);
|
784
950
|
}
|
@@ -795,7 +961,7 @@ frt_pq_insert(VALUE self, VALUE elem)
|
|
795
961
|
* do this by calling the adjust method.
|
796
962
|
*/
|
797
963
|
static VALUE
|
798
|
-
|
964
|
+
frb_pq_adjust(VALUE self)
|
799
965
|
{
|
800
966
|
PriQ *pq;
|
801
967
|
GET_PQ(pq, self);
|
@@ -811,7 +977,7 @@ frt_pq_adjust(VALUE self)
|
|
811
977
|
* queue.
|
812
978
|
*/
|
813
979
|
static VALUE
|
814
|
-
|
980
|
+
frb_pq_top(VALUE self)
|
815
981
|
{
|
816
982
|
PriQ *pq;
|
817
983
|
GET_PQ(pq, self);
|
@@ -825,7 +991,7 @@ frt_pq_top(VALUE self)
|
|
825
991
|
* Returns the top element in the queue removing it from the queue.
|
826
992
|
*/
|
827
993
|
static VALUE
|
828
|
-
|
994
|
+
frb_pq_pop(VALUE self)
|
829
995
|
{
|
830
996
|
PriQ *pq;
|
831
997
|
GET_PQ(pq, self);
|
@@ -851,7 +1017,7 @@ frt_pq_pop(VALUE self)
|
|
851
1017
|
* its _capacity_
|
852
1018
|
*/
|
853
1019
|
static VALUE
|
854
|
-
|
1020
|
+
frb_pq_size(VALUE self)
|
855
1021
|
{
|
856
1022
|
PriQ *pq;
|
857
1023
|
GET_PQ(pq, self);
|
@@ -868,7 +1034,7 @@ frt_pq_size(VALUE self)
|
|
868
1034
|
* _capacity_
|
869
1035
|
*/
|
870
1036
|
static VALUE
|
871
|
-
|
1037
|
+
frb_pq_capa(VALUE self)
|
872
1038
|
{
|
873
1039
|
PriQ *pq;
|
874
1040
|
GET_PQ(pq, self);
|
@@ -922,18 +1088,18 @@ Init_PriorityQueue(void)
|
|
922
1088
|
{
|
923
1089
|
/* PriorityQueue */
|
924
1090
|
cPriorityQueue = rb_define_class_under(mUtils, "PriorityQueue", rb_cObject);
|
925
|
-
rb_define_alloc_func(cPriorityQueue,
|
926
|
-
|
927
|
-
rb_define_method(cPriorityQueue, "initialize",
|
928
|
-
rb_define_method(cPriorityQueue, "clone",
|
929
|
-
rb_define_method(cPriorityQueue, "clear",
|
930
|
-
rb_define_method(cPriorityQueue, "insert",
|
931
|
-
rb_define_method(cPriorityQueue, "<<",
|
932
|
-
rb_define_method(cPriorityQueue, "top",
|
933
|
-
rb_define_method(cPriorityQueue, "pop",
|
934
|
-
rb_define_method(cPriorityQueue, "size",
|
935
|
-
rb_define_method(cPriorityQueue, "capacity",
|
936
|
-
rb_define_method(cPriorityQueue, "adjust",
|
1091
|
+
rb_define_alloc_func(cPriorityQueue, frb_pq_alloc);
|
1092
|
+
|
1093
|
+
rb_define_method(cPriorityQueue, "initialize", frb_pq_init, -1);
|
1094
|
+
rb_define_method(cPriorityQueue, "clone", frb_pq_clone, 0);
|
1095
|
+
rb_define_method(cPriorityQueue, "clear", frb_pq_clear, 0);
|
1096
|
+
rb_define_method(cPriorityQueue, "insert", frb_pq_insert, 1);
|
1097
|
+
rb_define_method(cPriorityQueue, "<<", frb_pq_insert, 1);
|
1098
|
+
rb_define_method(cPriorityQueue, "top", frb_pq_top, 0);
|
1099
|
+
rb_define_method(cPriorityQueue, "pop", frb_pq_pop, 0);
|
1100
|
+
rb_define_method(cPriorityQueue, "size", frb_pq_size, 0);
|
1101
|
+
rb_define_method(cPriorityQueue, "capacity", frb_pq_capa, 0);
|
1102
|
+
rb_define_method(cPriorityQueue, "adjust", frb_pq_adjust, 0);
|
937
1103
|
}
|
938
1104
|
|
939
1105
|
/* rdoc hack
|
@@ -947,6 +1113,7 @@ extern VALUE mFerret = rb_define_module("Ferret");
|
|
947
1113
|
* useful when indexing with Ferret. They are;
|
948
1114
|
*
|
949
1115
|
* * BitVector
|
1116
|
+
* * MultiMapper
|
950
1117
|
* * PriorityQueue
|
951
1118
|
* * => more to come
|
952
1119
|
*
|
@@ -959,5 +1126,6 @@ Init_Utils(void)
|
|
959
1126
|
mUtils = rb_define_module_under(mFerret, "Utils");
|
960
1127
|
|
961
1128
|
Init_BitVector();
|
1129
|
+
Init_MultiMapper();
|
962
1130
|
Init_PriorityQueue();
|
963
1131
|
}
|