ferret 0.10.14 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TODO +3 -0
- data/ext/analysis.c +5 -0
- data/ext/compound_io.c +46 -24
- data/ext/except.c +14 -0
- data/ext/except.h +29 -17
- data/ext/ferret.c +22 -1
- data/ext/ferret.h +2 -1
- data/ext/fs_store.c +9 -12
- data/ext/global.c +80 -0
- data/ext/global.h +10 -0
- data/ext/hash.c +0 -7
- data/ext/hash.h +0 -8
- data/ext/index.c +1289 -625
- data/ext/index.h +59 -14
- data/ext/q_boolean.c +12 -5
- data/ext/q_parser.c +570 -372
- data/ext/r_analysis.c +16 -16
- data/ext/r_index.c +41 -43
- data/ext/r_qparser.c +37 -36
- data/ext/r_search.c +10 -10
- data/ext/r_store.c +7 -7
- data/ext/ram_store.c +4 -3
- data/ext/search.c +3 -2
- data/ext/store.c +35 -19
- data/ext/store.h +3 -5
- data/lib/ferret/index.rb +4 -4
- data/lib/ferret_version.rb +1 -1
- data/test/threading/thread_safety_read_write_test.rb +76 -0
- data/test/threading/thread_safety_test.rb +17 -21
- data/test/unit/index/tc_index.rb +6 -2
- data/test/unit/index/tc_index_writer.rb +2 -2
- data/test/unit/query_parser/tc_query_parser.rb +20 -5
- data/test/unit/search/tc_index_searcher.rb +3 -1
- data/test/unit/search/tm_searcher.rb +3 -1
- metadata +3 -2
data/TODO
CHANGED
@@ -5,6 +5,9 @@
|
|
5
5
|
* Fix highlighting to work for compressed fields
|
6
6
|
* Fix highlighting to work for external fields
|
7
7
|
* Add Ferret::Index::Index
|
8
|
+
* Fix:
|
9
|
+
> Working Query: field1:value1 AND NOT field2:value2
|
10
|
+
> Failing Query: field1:value1 AND ( NOT field2:value2 )
|
8
11
|
|
9
12
|
= Done
|
10
13
|
* Add string Sort descripter
|
data/ext/analysis.c
CHANGED
@@ -876,6 +876,11 @@ static Token *std_next(TokenStream *ts)
|
|
876
876
|
tk_set_ts(&(CTS(ts)->token), start, t, ts->text, 1);
|
877
877
|
CTS(ts)->token.end += 2;
|
878
878
|
}
|
879
|
+
else if (t[-1] == '\'') {
|
880
|
+
t -= 1;
|
881
|
+
tk_set_ts(&(CTS(ts)->token), start, t, ts->text, 1);
|
882
|
+
CTS(ts)->token.end += 1;
|
883
|
+
}
|
879
884
|
else {
|
880
885
|
tk_set_ts(&(CTS(ts)->token), start, t, ts->text, 1);
|
881
886
|
}
|
data/ext/compound_io.c
CHANGED
@@ -58,6 +58,20 @@ static int cmpd_count(Store *store)
|
|
58
58
|
return store->dir.cmpd->entries->size;
|
59
59
|
}
|
60
60
|
|
61
|
+
static void cmpd_each(Store *store,
|
62
|
+
void (*func)(char *fname, void *arg), void *arg)
|
63
|
+
{
|
64
|
+
HashTable *ht = store->dir.cmpd->entries;
|
65
|
+
int i;
|
66
|
+
for (i = 0; i <= ht->mask; i++) {
|
67
|
+
char *fn = (char *)ht->table[i].key;
|
68
|
+
if (fn) {
|
69
|
+
func(fn, arg);
|
70
|
+
}
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
|
61
75
|
/**
|
62
76
|
* @throws UNSUPPORTED_ERROR
|
63
77
|
*/
|
@@ -200,35 +214,42 @@ Store *open_cmpd_store(Store *store, const char *name)
|
|
200
214
|
int count, i;
|
201
215
|
off_t offset;
|
202
216
|
char *fname;
|
203
|
-
FileEntry *entry;
|
217
|
+
FileEntry *entry = NULL;
|
204
218
|
Store *new_store = NULL;
|
205
219
|
CompoundStore *cmpd = NULL;
|
206
220
|
InStream *is = NULL;
|
207
221
|
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
222
|
+
TRY
|
223
|
+
new_store = store_new();
|
224
|
+
cmpd = ALLOC_AND_ZERO(CompoundStore);
|
225
|
+
|
226
|
+
cmpd->store = store;
|
227
|
+
cmpd->name = name;
|
228
|
+
cmpd->entries = h_new_str(&free, &free);
|
229
|
+
is = cmpd->stream = store->open_input(store, cmpd->name);
|
230
|
+
|
231
|
+
/* read the directory and init files */
|
232
|
+
count = is_read_vint(is);
|
233
|
+
entry = NULL;
|
234
|
+
for (i = 0; i < count; i++) {
|
235
|
+
offset = (off_t)is_read_i64(is);
|
236
|
+
fname = is_read_string(is);
|
237
|
+
|
238
|
+
if (entry != NULL) {
|
239
|
+
/* set length of the previous entry */
|
240
|
+
entry->length = offset - entry->offset;
|
241
|
+
}
|
242
|
+
|
243
|
+
entry = ALLOC(FileEntry);
|
244
|
+
entry->offset = offset;
|
245
|
+
h_set(cmpd->entries, fname, entry);
|
226
246
|
}
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
247
|
+
XCATCHALL
|
248
|
+
store_destroy(new_store);
|
249
|
+
if (is) is_close(is);
|
250
|
+
if (cmpd->entries) h_destroy(cmpd->entries);
|
251
|
+
free(cmpd);
|
252
|
+
XENDTRY
|
232
253
|
|
233
254
|
/* set the length of the final entry */
|
234
255
|
if (entry != NULL) {
|
@@ -243,6 +264,7 @@ Store *open_cmpd_store(Store *store, const char *name)
|
|
243
264
|
new_store->count = &cmpd_count;
|
244
265
|
new_store->clear = &cmpd_clear;
|
245
266
|
new_store->length = &cmpd_length;
|
267
|
+
new_store->each = &cmpd_each;
|
246
268
|
new_store->close_i = &cmpd_close_i;
|
247
269
|
new_store->new_output = &cmpd_new_output;
|
248
270
|
new_store->open_input = &cmpd_open_input;
|
data/ext/except.c
CHANGED
@@ -14,6 +14,7 @@ const char *const FRT_ERROR_TYPES[] = {
|
|
14
14
|
"Finally",
|
15
15
|
"Exception",
|
16
16
|
"IO Error",
|
17
|
+
"File Not Found Error",
|
17
18
|
"Argument Error",
|
18
19
|
"End-of-File Error",
|
19
20
|
"Unsupported Function Error",
|
@@ -37,6 +38,19 @@ static void exception_stack_alloc(void)
|
|
37
38
|
thread_key_create(&exception_stack_key, NULL);
|
38
39
|
}
|
39
40
|
|
41
|
+
/*
|
42
|
+
static void put_stack(char *inst, xcontext_t *top)
|
43
|
+
{
|
44
|
+
int x = 10;
|
45
|
+
fprintf(stderr, "( %s )", inst);
|
46
|
+
while (top && x--) {
|
47
|
+
fprintf(stderr, "=> %x ", top);
|
48
|
+
top = top->next;
|
49
|
+
}
|
50
|
+
fprintf(stderr, "\n");
|
51
|
+
}
|
52
|
+
*/
|
53
|
+
|
40
54
|
void xpush_context(xcontext_t *context)
|
41
55
|
{
|
42
56
|
xcontext_t *top_context;
|
data/ext/except.h
CHANGED
@@ -25,7 +25,7 @@
|
|
25
25
|
* if (close_widget_two(arg) == 0) {
|
26
26
|
* RAISE(EXCEPTION_CODE, msg);
|
27
27
|
* }
|
28
|
-
*
|
28
|
+
* XENDTRY
|
29
29
|
* </pre>
|
30
30
|
*
|
31
31
|
* Basically exception handling uses the following macros;
|
@@ -48,17 +48,25 @@
|
|
48
48
|
* Code in this block is always called. Use this block to close any
|
49
49
|
* resources opened in the Exception handling body.
|
50
50
|
*
|
51
|
-
* XFINALLY
|
52
|
-
* Similar to case FINALLY: except that any exceptions thrown in this block
|
53
|
-
* are ignored until the end of the block is reached at which time the first
|
54
|
-
* exception which was raise will be re-raised. This is useful for closing a
|
55
|
-
* number of resources that all might raise exceptions so as much as
|
56
|
-
* possible will be successfully closed.
|
57
|
-
*
|
58
51
|
* ENDTRY
|
59
52
|
* Must be placed at the end of all exception handling code.
|
53
|
+
*
|
54
|
+
* XFINALLY
|
55
|
+
* Similar to case FINALLY: except that it uses a fall through (ie, you must
|
56
|
+
* not use a break before it) instead of a jump to get to it. This saves a
|
57
|
+
* jump. It must be used in combination with XENDTRY and must not have any
|
58
|
+
* other catches. This is an optimization so should probably be not be used
|
59
|
+
* in most cases.
|
60
|
+
*
|
61
|
+
* XCATCHALL
|
62
|
+
* Like XFINALLY but the block is only called when an exception is raised.
|
63
|
+
* Must use in combination with XENDTRY and do not have any other FINALLY or
|
64
|
+
* catch block.
|
65
|
+
*
|
66
|
+
* XENDTRY
|
67
|
+
* Must use in combination with XFINALLY or XCATCHALL. Simply, it doesn't
|
68
|
+
* jump to FINALLY, making it more efficient.
|
60
69
|
*/
|
61
|
-
|
62
70
|
#ifndef FRT_EXCEPT_H
|
63
71
|
#define FRT_EXCEPT_H
|
64
72
|
|
@@ -70,14 +78,15 @@
|
|
70
78
|
#define EXCEPTION 2
|
71
79
|
#define FERRET_ERROR 2
|
72
80
|
#define IO_ERROR 3
|
73
|
-
#define
|
74
|
-
#define
|
75
|
-
#define
|
76
|
-
#define
|
77
|
-
#define
|
78
|
-
#define
|
79
|
-
#define
|
80
|
-
#define
|
81
|
+
#define FILE_NOT_FOUND_ERROR 4
|
82
|
+
#define ARG_ERROR 5
|
83
|
+
#define EOF_ERROR 6
|
84
|
+
#define UNSUPPORTED_ERROR 7
|
85
|
+
#define STATE_ERROR 8
|
86
|
+
#define PARSE_ERROR 9
|
87
|
+
#define MEM_ERROR 10
|
88
|
+
#define INDEX_ERROR 11
|
89
|
+
#define LOCK_ERROR 12
|
81
90
|
|
82
91
|
extern char *const UNSUPPORTED_ERROR_MSG;
|
83
92
|
extern char *const EOF_ERROR_MSG;
|
@@ -115,6 +124,9 @@ typedef struct xcontext_t
|
|
115
124
|
}\
|
116
125
|
} while (0);
|
117
126
|
|
127
|
+
#define RETURN_EARLY() xpop_context()
|
128
|
+
|
129
|
+
|
118
130
|
#define XFINALLY default: xcontext.in_finally = 1;
|
119
131
|
|
120
132
|
#define XCATCHALL break; default: xcontext.in_finally = 1;
|
data/ext/ferret.c
CHANGED
@@ -158,6 +158,22 @@ void *frt_rb_data_ptr(VALUE val)
|
|
158
158
|
return DATA_PTR(val);
|
159
159
|
}
|
160
160
|
|
161
|
+
char *
|
162
|
+
rs2s(VALUE rstr)
|
163
|
+
{
|
164
|
+
return (char *)(RSTRING(rstr)->ptr ? RSTRING(rstr)->ptr : EMPTY_STRING);
|
165
|
+
}
|
166
|
+
|
167
|
+
char *
|
168
|
+
nstrdup(VALUE rstr)
|
169
|
+
{
|
170
|
+
char *old = rs2s(rstr);
|
171
|
+
int len = RSTRING(rstr)->len;
|
172
|
+
char *new = ALLOC_N(char, len + 1);
|
173
|
+
memcpy(new, old, len + 1);
|
174
|
+
return new;
|
175
|
+
}
|
176
|
+
|
161
177
|
char *
|
162
178
|
frt_field(VALUE rfield)
|
163
179
|
{
|
@@ -165,7 +181,7 @@ frt_field(VALUE rfield)
|
|
165
181
|
case T_SYMBOL:
|
166
182
|
return rb_id2name(SYM2ID(rfield));
|
167
183
|
case T_STRING:
|
168
|
-
return
|
184
|
+
return rs2s(rfield);
|
169
185
|
default:
|
170
186
|
rb_raise(rb_eArgError, "field name must be a symbol");
|
171
187
|
}
|
@@ -292,6 +308,7 @@ void Init_ferret_ext(void)
|
|
292
308
|
{
|
293
309
|
VALUE cParseError;
|
294
310
|
VALUE cStateError;
|
311
|
+
VALUE cFileNotFoundError;
|
295
312
|
|
296
313
|
/* initialize object map */
|
297
314
|
object_map = h_new(&value_hash, &value_eq, NULL, NULL);
|
@@ -336,10 +353,14 @@ void Init_ferret_ext(void)
|
|
336
353
|
rb_define_class_under(mFerret, "ParseError", rb_eStandardError);
|
337
354
|
cStateError =
|
338
355
|
rb_define_class_under(mFerret, "StateError", rb_eStandardError);
|
356
|
+
cFileNotFoundError =
|
357
|
+
rb_define_class_under(rb_cObject, "FileNotFoundError", rb_eIOError);
|
339
358
|
|
340
359
|
error_map = rb_hash_new();
|
341
360
|
rb_hash_aset(error_map, rb_intern("Exception"), rb_eStandardError);
|
342
361
|
rb_hash_aset(error_map, rb_intern("IO Error"), rb_eIOError);
|
362
|
+
rb_hash_aset(error_map, rb_intern("File Not Found Error"),
|
363
|
+
cFileNotFoundError);
|
343
364
|
rb_hash_aset(error_map, rb_intern("Argument Error"), rb_eArgError);
|
344
365
|
rb_hash_aset(error_map, rb_intern("End-of-File Error"), rb_eEOFError);
|
345
366
|
rb_hash_aset(error_map, rb_intern("Unsupported Function Error"),
|
data/ext/ferret.h
CHANGED
@@ -65,7 +65,8 @@ extern VALUE frt_hs_to_rb_ary(HashSet *hs);
|
|
65
65
|
extern void *frt_rb_data_ptr(VALUE val);
|
66
66
|
extern char * frt_field(VALUE rfield);
|
67
67
|
extern VALUE frt_get_term(const char *field, const char *term);
|
68
|
-
|
68
|
+
extern char *rs2s(VALUE rstr);
|
69
|
+
extern char *nstrdup(VALUE rstr);
|
69
70
|
#define Frt_Make_Struct(klass)\
|
70
71
|
rb_data_object_alloc(klass,NULL,(RUBY_DATA_FUNC)NULL,(RUBY_DATA_FUNC)NULL)
|
71
72
|
|
data/ext/fs_store.c
CHANGED
@@ -202,7 +202,11 @@ static void fs_clear_all(Store *store)
|
|
202
202
|
*/
|
203
203
|
static void fs_destroy(Store *store)
|
204
204
|
{
|
205
|
-
|
205
|
+
TRY
|
206
|
+
fs_clear_locks(store);
|
207
|
+
XCATCHALL
|
208
|
+
HANDLED();
|
209
|
+
XENDTRY
|
206
210
|
free(store->dir.path);
|
207
211
|
store_destroy(store);
|
208
212
|
}
|
@@ -320,7 +324,8 @@ static InStream *fs_open_input(Store *store, const char *filename)
|
|
320
324
|
char path[MAX_FILE_PATH];
|
321
325
|
int fd = open(join_path(path, store->dir.path, filename), O_RDONLY | O_BINARY);
|
322
326
|
if (fd < 0) {
|
323
|
-
RAISE(
|
327
|
+
RAISE(FILE_NOT_FOUND_ERROR,
|
328
|
+
"tried to open \"%s\" but it doesn't exist: <%s>",
|
324
329
|
path, strerror(errno));
|
325
330
|
}
|
326
331
|
is = is_new();
|
@@ -344,17 +349,9 @@ static int fs_lock_obtain(Lock *lock)
|
|
344
349
|
open(lock->name, O_CREAT | O_EXCL | O_RDWR,
|
345
350
|
S_IRUSR | S_IWUSR)) < 0) && (trys > 0)) {
|
346
351
|
|
347
|
-
|
348
|
-
|
349
|
-
#endif
|
350
|
-
trys--;
|
351
|
-
/* sleep for 10 milliseconds
|
352
|
-
clock_t start = clock();
|
352
|
+
/* sleep for 10 milliseconds */
|
353
|
+
micro_sleep(10000);
|
353
354
|
trys--;
|
354
|
-
|
355
|
-
while (((double)(clock() - start) / CLOCKS_PER_SEC) < 0.01) {
|
356
|
-
}
|
357
|
-
*/
|
358
355
|
}
|
359
356
|
if (f >= 0) {
|
360
357
|
close(f);
|
data/ext/global.c
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
#include <assert.h>
|
7
7
|
#include <math.h>
|
8
8
|
#include <ctype.h>
|
9
|
+
#include <unistd.h>
|
9
10
|
|
10
11
|
const char *EMPTY_STRING = "";
|
11
12
|
|
@@ -55,6 +56,34 @@ int icmp_risky(const void *p1, const void *p2)
|
|
55
56
|
return (*(int *)p1) - *((int *)p2);
|
56
57
|
}
|
57
58
|
|
59
|
+
unsigned int *imalloc(unsigned int value)
|
60
|
+
{
|
61
|
+
unsigned int *p = ALLOC(unsigned int);
|
62
|
+
*p = value;
|
63
|
+
return p;
|
64
|
+
}
|
65
|
+
|
66
|
+
unsigned long *lmalloc(unsigned long value)
|
67
|
+
{
|
68
|
+
unsigned long *p = ALLOC(unsigned long);
|
69
|
+
*p = value;
|
70
|
+
return p;
|
71
|
+
}
|
72
|
+
|
73
|
+
f_u32 *u32malloc(f_u32 value)
|
74
|
+
{
|
75
|
+
f_u32 *p = ALLOC(f_u32);
|
76
|
+
*p = value;
|
77
|
+
return p;
|
78
|
+
}
|
79
|
+
|
80
|
+
f_u64 *u64malloc(f_u64 value)
|
81
|
+
{
|
82
|
+
f_u64 *p = ALLOC(f_u64);
|
83
|
+
*p = value;
|
84
|
+
return p;
|
85
|
+
}
|
86
|
+
|
58
87
|
|
59
88
|
#ifndef RUBY_BINDINGS
|
60
89
|
/* frt_exit: print error message and exit */
|
@@ -327,3 +356,54 @@ void dummy_free(void *p)
|
|
327
356
|
{
|
328
357
|
(void)p; /* suppress unused argument warning */
|
329
358
|
}
|
359
|
+
|
360
|
+
#ifdef FRT_IS_C99
|
361
|
+
extern void usleep(unsigned long usec);
|
362
|
+
#endif
|
363
|
+
|
364
|
+
extern void micro_sleep(const int micro_seconds)
|
365
|
+
{
|
366
|
+
#ifdef POSH_OS_WIN32
|
367
|
+
Sleep(micro_seconds / 1000);
|
368
|
+
#else
|
369
|
+
usleep(micro_seconds);
|
370
|
+
#endif
|
371
|
+
}
|
372
|
+
|
373
|
+
typedef struct FreeMe
|
374
|
+
{
|
375
|
+
void *p;
|
376
|
+
free_ft free_func;
|
377
|
+
} FreeMe;
|
378
|
+
|
379
|
+
static FreeMe *free_mes = NULL;
|
380
|
+
static int free_mes_size = 0;
|
381
|
+
static int free_mes_capa = 0;
|
382
|
+
|
383
|
+
void register_for_cleanup(void *p, free_ft free_func)
|
384
|
+
{
|
385
|
+
FreeMe *free_me;
|
386
|
+
if (free_mes_capa == 0) {
|
387
|
+
free_mes_capa = 16;
|
388
|
+
free_mes = ALLOC_N(FreeMe, free_mes_capa);
|
389
|
+
}
|
390
|
+
else if (free_mes_capa <= free_mes_size) {
|
391
|
+
free_mes_capa *= 2;
|
392
|
+
REALLOC_N(free_mes, FreeMe, free_mes_capa);
|
393
|
+
}
|
394
|
+
free_me = free_mes + free_mes_size++;
|
395
|
+
free_me->p = p;
|
396
|
+
free_me->free_func = free_func;
|
397
|
+
}
|
398
|
+
|
399
|
+
void do_clean_up()
|
400
|
+
{
|
401
|
+
int i;
|
402
|
+
for (i = 0; i < free_mes_size; i++) {
|
403
|
+
FreeMe *free_me = free_mes + i;
|
404
|
+
free_me->free_func(free_me->p);
|
405
|
+
}
|
406
|
+
free(free_mes);
|
407
|
+
free_mes = NULL;
|
408
|
+
free_mes_size = free_mes_capa = 0;
|
409
|
+
}
|
data/ext/global.h
CHANGED
@@ -61,6 +61,11 @@ typedef void (*free_ft)(void *key);
|
|
61
61
|
extern char *progname();
|
62
62
|
extern void setprogname(const char *str);
|
63
63
|
|
64
|
+
extern unsigned int *imalloc(unsigned int value);
|
65
|
+
extern unsigned long *lmalloc(unsigned long value);
|
66
|
+
extern f_u32 *u32malloc(f_u32 value);
|
67
|
+
extern f_u64 *u64malloc(f_u64 value);
|
68
|
+
|
64
69
|
extern void *emalloc(size_t n);
|
65
70
|
extern void *ecalloc(size_t n);
|
66
71
|
extern void *erealloc(void *ptr, size_t n);
|
@@ -82,6 +87,11 @@ extern char *dbl_to_s(char *buf, double num);
|
|
82
87
|
extern char *strfmt(const char *fmt, ...);
|
83
88
|
extern char *vstrfmt(const char *fmt, va_list args);
|
84
89
|
|
90
|
+
extern void micro_sleep(const int micro_seconds);
|
91
|
+
|
92
|
+
extern void register_for_cleanup(void *p, free_ft free_func);
|
93
|
+
extern void do_clean_up();
|
94
|
+
|
85
95
|
/**
|
86
96
|
* A dummy function which can be passed to functions which expect a free
|
87
97
|
* function such as h_new() if you don't want the free functions to do anything.
|