ferret 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +23 -5
- data/TODO +2 -1
- data/ext/analysis.c +838 -177
- data/ext/analysis.h +55 -7
- data/ext/api.c +69 -0
- data/ext/api.h +27 -0
- data/ext/array.c +8 -5
- data/ext/compound_io.c +132 -96
- data/ext/document.c +58 -28
- data/ext/except.c +59 -0
- data/ext/except.h +88 -0
- data/ext/ferret.c +47 -3
- data/ext/ferret.h +3 -0
- data/ext/field.c +15 -9
- data/ext/filter.c +1 -1
- data/ext/fs_store.c +215 -34
- data/ext/global.c +72 -3
- data/ext/global.h +4 -3
- data/ext/hash.c +44 -3
- data/ext/hash.h +9 -0
- data/ext/header.h +58 -0
- data/ext/inc/except.h +88 -0
- data/ext/inc/lang.h +23 -13
- data/ext/ind.c +16 -10
- data/ext/index.h +2 -22
- data/ext/index_io.c +3 -11
- data/ext/index_rw.c +245 -193
- data/ext/lang.h +23 -13
- data/ext/libstemmer.c +92 -0
- data/ext/libstemmer.h +79 -0
- data/ext/modules.h +162 -0
- data/ext/q_boolean.c +34 -21
- data/ext/q_const_score.c +6 -12
- data/ext/q_filtered_query.c +206 -0
- data/ext/q_fuzzy.c +18 -15
- data/ext/q_match_all.c +3 -7
- data/ext/q_multi_phrase.c +10 -14
- data/ext/q_parser.c +29 -2
- data/ext/q_phrase.c +14 -21
- data/ext/q_prefix.c +15 -12
- data/ext/q_range.c +30 -28
- data/ext/q_span.c +13 -21
- data/ext/q_term.c +17 -26
- data/ext/r_analysis.c +693 -21
- data/ext/r_doc.c +11 -12
- data/ext/r_index_io.c +4 -1
- data/ext/r_qparser.c +21 -2
- data/ext/r_search.c +285 -18
- data/ext/ram_store.c +5 -2
- data/ext/search.c +11 -17
- data/ext/search.h +21 -45
- data/ext/similarity.h +67 -0
- data/ext/sort.c +30 -25
- data/ext/stem_ISO_8859_1_danish.c +338 -0
- data/ext/stem_ISO_8859_1_danish.h +16 -0
- data/ext/stem_ISO_8859_1_dutch.c +635 -0
- data/ext/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/stem_ISO_8859_1_english.c +1156 -0
- data/ext/stem_ISO_8859_1_english.h +16 -0
- data/ext/stem_ISO_8859_1_finnish.c +792 -0
- data/ext/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/stem_ISO_8859_1_french.c +1276 -0
- data/ext/stem_ISO_8859_1_french.h +16 -0
- data/ext/stem_ISO_8859_1_german.c +512 -0
- data/ext/stem_ISO_8859_1_german.h +16 -0
- data/ext/stem_ISO_8859_1_italian.c +1091 -0
- data/ext/stem_ISO_8859_1_italian.h +16 -0
- data/ext/stem_ISO_8859_1_norwegian.c +296 -0
- data/ext/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/stem_ISO_8859_1_porter.c +776 -0
- data/ext/stem_ISO_8859_1_porter.h +16 -0
- data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
- data/ext/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/stem_ISO_8859_1_spanish.c +1119 -0
- data/ext/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/stem_ISO_8859_1_swedish.c +307 -0
- data/ext/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/stem_KOI8_R_russian.c +701 -0
- data/ext/stem_KOI8_R_russian.h +16 -0
- data/ext/stem_UTF_8_danish.c +344 -0
- data/ext/stem_UTF_8_danish.h +16 -0
- data/ext/stem_UTF_8_dutch.c +653 -0
- data/ext/stem_UTF_8_dutch.h +16 -0
- data/ext/stem_UTF_8_english.c +1176 -0
- data/ext/stem_UTF_8_english.h +16 -0
- data/ext/stem_UTF_8_finnish.c +808 -0
- data/ext/stem_UTF_8_finnish.h +16 -0
- data/ext/stem_UTF_8_french.c +1296 -0
- data/ext/stem_UTF_8_french.h +16 -0
- data/ext/stem_UTF_8_german.c +526 -0
- data/ext/stem_UTF_8_german.h +16 -0
- data/ext/stem_UTF_8_italian.c +1113 -0
- data/ext/stem_UTF_8_italian.h +16 -0
- data/ext/stem_UTF_8_norwegian.c +302 -0
- data/ext/stem_UTF_8_norwegian.h +16 -0
- data/ext/stem_UTF_8_porter.c +794 -0
- data/ext/stem_UTF_8_porter.h +16 -0
- data/ext/stem_UTF_8_portuguese.c +1055 -0
- data/ext/stem_UTF_8_portuguese.h +16 -0
- data/ext/stem_UTF_8_russian.c +709 -0
- data/ext/stem_UTF_8_russian.h +16 -0
- data/ext/stem_UTF_8_spanish.c +1137 -0
- data/ext/stem_UTF_8_spanish.h +16 -0
- data/ext/stem_UTF_8_swedish.c +313 -0
- data/ext/stem_UTF_8_swedish.h +16 -0
- data/ext/stopwords.c +325 -0
- data/ext/store.c +34 -2
- data/ext/tags +2953 -0
- data/ext/term.c +21 -15
- data/ext/termdocs.c +5 -3
- data/ext/utilities.c +446 -0
- data/ext/vector.c +27 -13
- data/lib/ferret/document/document.rb +1 -1
- data/lib/ferret/index/index.rb +44 -6
- data/lib/ferret/query_parser/query_parser.tab.rb +7 -3
- data/lib/rferret.rb +2 -1
- data/test/test_helper.rb +2 -2
- data/test/unit/analysis/ctc_analyzer.rb +401 -0
- data/test/unit/analysis/ctc_tokenstream.rb +423 -0
- data/test/unit/analysis/{tc_letter_tokenizer.rb → rtc_letter_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_lower_case_filter.rb → rtc_lower_case_filter.rb} +0 -0
- data/test/unit/analysis/{tc_lower_case_tokenizer.rb → rtc_lower_case_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_per_field_analyzer_wrapper.rb → rtc_per_field_analyzer_wrapper.rb} +0 -0
- data/test/unit/analysis/{tc_porter_stem_filter.rb → rtc_porter_stem_filter.rb} +0 -0
- data/test/unit/analysis/{tc_standard_analyzer.rb → rtc_standard_analyzer.rb} +0 -0
- data/test/unit/analysis/{tc_standard_tokenizer.rb → rtc_standard_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_stop_analyzer.rb → rtc_stop_analyzer.rb} +0 -0
- data/test/unit/analysis/{tc_stop_filter.rb → rtc_stop_filter.rb} +0 -0
- data/test/unit/analysis/{tc_white_space_analyzer.rb → rtc_white_space_analyzer.rb} +0 -0
- data/test/unit/analysis/{tc_white_space_tokenizer.rb → rtc_white_space_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_word_list_loader.rb → rtc_word_list_loader.rb} +0 -0
- data/test/unit/analysis/tc_analyzer.rb +1 -2
- data/test/unit/analysis/{c_token.rb → tc_token.rb} +0 -0
- data/test/unit/document/rtc_field.rb +28 -0
- data/test/unit/document/{c_document.rb → tc_document.rb} +0 -0
- data/test/unit/document/tc_field.rb +82 -12
- data/test/unit/index/{tc_compound_file_io.rb → rtc_compound_file_io.rb} +0 -0
- data/test/unit/index/{tc_field_infos.rb → rtc_field_infos.rb} +0 -0
- data/test/unit/index/{tc_fields_io.rb → rtc_fields_io.rb} +0 -0
- data/test/unit/index/{tc_multiple_term_doc_pos_enum.rb → rtc_multiple_term_doc_pos_enum.rb} +0 -0
- data/test/unit/index/{tc_segment_infos.rb → rtc_segment_infos.rb} +0 -0
- data/test/unit/index/{tc_segment_term_docs.rb → rtc_segment_term_docs.rb} +0 -0
- data/test/unit/index/{tc_segment_term_enum.rb → rtc_segment_term_enum.rb} +0 -0
- data/test/unit/index/{tc_segment_term_vector.rb → rtc_segment_term_vector.rb} +0 -0
- data/test/unit/index/{tc_term_buffer.rb → rtc_term_buffer.rb} +0 -0
- data/test/unit/index/{tc_term_info.rb → rtc_term_info.rb} +0 -0
- data/test/unit/index/{tc_term_infos_io.rb → rtc_term_infos_io.rb} +0 -0
- data/test/unit/index/{tc_term_vectors_io.rb → rtc_term_vectors_io.rb} +0 -0
- data/test/unit/index/{c_index.rb → tc_index.rb} +26 -6
- data/test/unit/index/{c_index_reader.rb → tc_index_reader.rb} +0 -0
- data/test/unit/index/{c_index_writer.rb → tc_index_writer.rb} +0 -0
- data/test/unit/index/{c_term.rb → tc_term.rb} +0 -0
- data/test/unit/index/{c_term_voi.rb → tc_term_voi.rb} +0 -0
- data/test/unit/query_parser/{c_query_parser.rb → rtc_query_parser.rb} +14 -14
- data/test/unit/query_parser/tc_query_parser.rb +24 -16
- data/test/unit/search/{tc_similarity.rb → rtc_similarity.rb} +0 -0
- data/test/unit/search/rtc_sort_field.rb +14 -0
- data/test/unit/search/{c_filter.rb → tc_filter.rb} +11 -11
- data/test/unit/search/{c_fuzzy_query.rb → tc_fuzzy_query.rb} +0 -0
- data/test/unit/search/{c_index_searcher.rb → tc_index_searcher.rb} +0 -0
- data/test/unit/search/{c_search_and_sort.rb → tc_search_and_sort.rb} +0 -0
- data/test/unit/search/{c_sort.rb → tc_sort.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +20 -7
- data/test/unit/search/{c_spans.rb → tc_spans.rb} +0 -0
- data/test/unit/store/rtc_fs_store.rb +62 -0
- data/test/unit/store/rtc_ram_store.rb +15 -0
- data/test/unit/store/rtm_store.rb +150 -0
- data/test/unit/store/rtm_store_lock.rb +2 -0
- data/test/unit/store/tc_fs_store.rb +54 -40
- data/test/unit/store/tc_ram_store.rb +20 -0
- data/test/unit/store/tm_store.rb +30 -146
- data/test/unit/store/tm_store_lock.rb +66 -0
- data/test/unit/utils/{tc_bit_vector.rb → rtc_bit_vector.rb} +0 -0
- data/test/unit/utils/{tc_date_tools.rb → rtc_date_tools.rb} +0 -0
- data/test/unit/utils/{tc_number_tools.rb → rtc_number_tools.rb} +0 -0
- data/test/unit/utils/{tc_parameter.rb → rtc_parameter.rb} +0 -0
- data/test/unit/utils/{tc_priority_queue.rb → rtc_priority_queue.rb} +0 -0
- data/test/unit/utils/{tc_string_helper.rb → rtc_string_helper.rb} +0 -0
- data/test/unit/utils/{tc_thread.rb → rtc_thread.rb} +0 -0
- data/test/unit/utils/{tc_weak_key_hash.rb → rtc_weak_key_hash.rb} +0 -0
- metadata +360 -289
- data/test/unit/document/c_field.rb +0 -98
- data/test/unit/search/c_sort_field.rb +0 -27
- data/test/unit/store/c_fs_store.rb +0 -76
- data/test/unit/store/c_ram_store.rb +0 -35
- data/test/unit/store/m_store.rb +0 -34
- data/test/unit/store/m_store_lock.rb +0 -68
data/ext/global.c
CHANGED
@@ -7,6 +7,9 @@
|
|
7
7
|
#include <math.h>
|
8
8
|
#include <ctype.h>
|
9
9
|
|
10
|
+
static char * const ESTRDUP_MEM_ERROR_MSG = "estrdup failed";
|
11
|
+
static char * const MEM_ERROR_MSG = "malloc failed";
|
12
|
+
|
10
13
|
const char *EMPTY_STRING = "";
|
11
14
|
|
12
15
|
int min3(int a, int b, int c)
|
@@ -153,7 +156,7 @@ char *estrdup(const char *s)
|
|
153
156
|
|
154
157
|
t = (char *)malloc(strlen(s) + 1);
|
155
158
|
if (t == NULL)
|
156
|
-
|
159
|
+
RAISE(MEM_ERROR, ESTRDUP_MEM_ERROR_MSG);
|
157
160
|
|
158
161
|
strcpy(t, s);
|
159
162
|
return t;
|
@@ -165,7 +168,7 @@ void *emalloc(size_t size)
|
|
165
168
|
void *p;
|
166
169
|
p = malloc(size);
|
167
170
|
if (p == NULL)
|
168
|
-
|
171
|
+
RAISE(MEM_ERROR, MEM_ERROR_MSG);
|
169
172
|
return p;
|
170
173
|
}
|
171
174
|
|
@@ -174,7 +177,7 @@ void *erealloc(void *ptr, size_t size)
|
|
174
177
|
void *p;
|
175
178
|
p = realloc(ptr, size);
|
176
179
|
if (p == NULL)
|
177
|
-
|
180
|
+
RAISE(MEM_ERROR, MEM_ERROR_MSG);
|
178
181
|
return p;
|
179
182
|
}
|
180
183
|
|
@@ -217,3 +220,69 @@ void lower_str(char *str)
|
|
217
220
|
str++;
|
218
221
|
}
|
219
222
|
}
|
223
|
+
|
224
|
+
/* strfmt: like sprintf except that it allocates memory for the string */
|
225
|
+
char *strfmt(const char *fmt, ...)
|
226
|
+
{
|
227
|
+
char *string;
|
228
|
+
char *p = (char *)fmt, *q;
|
229
|
+
va_list args;
|
230
|
+
int len = strlen(fmt) + 1;
|
231
|
+
int slen;
|
232
|
+
char *s;
|
233
|
+
long i;
|
234
|
+
double d;
|
235
|
+
|
236
|
+
q = string = ALLOC_N(char, len);
|
237
|
+
|
238
|
+
va_start(args, fmt);
|
239
|
+
while (*p) {
|
240
|
+
if (*p == '%') {
|
241
|
+
p++;
|
242
|
+
switch (*p) {
|
243
|
+
case 's':
|
244
|
+
p++;
|
245
|
+
s = va_arg(args, char *);
|
246
|
+
if (s) {
|
247
|
+
slen = strlen(s);
|
248
|
+
len += slen;
|
249
|
+
*q = 0;
|
250
|
+
REALLOC_N(string, char, len);
|
251
|
+
q = string + strlen(string);
|
252
|
+
sprintf(q, s);
|
253
|
+
q += slen;
|
254
|
+
}
|
255
|
+
continue;
|
256
|
+
case 'f':
|
257
|
+
p++;
|
258
|
+
len += 32;
|
259
|
+
*q = 0;
|
260
|
+
REALLOC_N(string, char, len);
|
261
|
+
q = string + strlen(string);
|
262
|
+
d = va_arg(args, double);
|
263
|
+
dbl_to_s(q, d);
|
264
|
+
q += strlen(q);
|
265
|
+
continue;
|
266
|
+
case 'd':
|
267
|
+
p++;
|
268
|
+
len += 20;
|
269
|
+
*q = 0;
|
270
|
+
REALLOC_N(string, char, len);
|
271
|
+
q = string + strlen(string);
|
272
|
+
i = va_arg(args, long);
|
273
|
+
sprintf(q, "%ld", i);
|
274
|
+
q += strlen(q);
|
275
|
+
continue;
|
276
|
+
default:
|
277
|
+
break;
|
278
|
+
}
|
279
|
+
}
|
280
|
+
*q = *p;
|
281
|
+
p++;
|
282
|
+
q++;
|
283
|
+
}
|
284
|
+
va_end(args);
|
285
|
+
*q = 0;
|
286
|
+
|
287
|
+
return string;
|
288
|
+
}
|
data/ext/global.h
CHANGED
@@ -4,9 +4,6 @@
|
|
4
4
|
#include <stdlib.h>
|
5
5
|
#include <stdio.h>
|
6
6
|
#include <assert.h>
|
7
|
-
#include <pthread.h>
|
8
|
-
#include "lang.h"
|
9
|
-
|
10
7
|
//#define DEBUG
|
11
8
|
#define VALGRIND
|
12
9
|
|
@@ -19,6 +16,9 @@ typedef unsigned int uint;
|
|
19
16
|
|
20
17
|
typedef void (*destroy_func_t)(void *p);
|
21
18
|
|
19
|
+
#include "lang.h"
|
20
|
+
#include "except.h"
|
21
|
+
|
22
22
|
#define MAX_WORD_SIZE 255
|
23
23
|
#define MAX_PATH 1024
|
24
24
|
#define MAX_BUFFER_SIZE 1024
|
@@ -70,4 +70,5 @@ int max3(int a, int b, int c);
|
|
70
70
|
|
71
71
|
char *dbl_to_s(char *buf, double num);
|
72
72
|
void lower_str(char *str);
|
73
|
+
char *strfmt(const char *fmt, ...);
|
73
74
|
#endif
|
data/ext/hash.c
CHANGED
@@ -388,7 +388,8 @@ int h_resize(HshTable *ht, int min_newsize) {
|
|
388
388
|
ht->table = ALLOC_N(HshEntry, newsize);
|
389
389
|
}
|
390
390
|
memset(ht->table, 0, sizeof(HshEntry) * newsize);
|
391
|
-
i = ht->
|
391
|
+
i = ht->used;
|
392
|
+
//i = ht->fill;
|
392
393
|
ht->fill = ht->used;
|
393
394
|
ht->mask = newsize - 1;
|
394
395
|
int j = 0;
|
@@ -399,8 +400,8 @@ int h_resize(HshTable *ht, int min_newsize) {
|
|
399
400
|
he_new->key = he_old->key;
|
400
401
|
he_new->value = he_old->value;
|
401
402
|
i--;
|
402
|
-
} else if (he_old->key == dummy_key) { //dummy entry
|
403
|
-
|
403
|
+
//} else if (he_old->key == dummy_key) { //dummy entry
|
404
|
+
// i--;
|
404
405
|
} // else empty entry so nothing to do
|
405
406
|
}
|
406
407
|
if (oldtable != smallcopy && oldtable != ht->smalltable)
|
@@ -444,3 +445,43 @@ int h_has_key(HshTable *ht, const void *key)
|
|
444
445
|
return HASH_KEY_EQUAL;
|
445
446
|
}
|
446
447
|
}
|
448
|
+
|
449
|
+
void h_each(HshTable *ht,
|
450
|
+
void (*each_kv)(void *key, void *value, void *arg),
|
451
|
+
void *arg)
|
452
|
+
{
|
453
|
+
HshEntry *he;
|
454
|
+
int i = ht->used;
|
455
|
+
for (he = ht->table; i > 0; he++) {
|
456
|
+
if (he->value != NULL) {// active entry
|
457
|
+
each_kv(he->key, he->value, arg);
|
458
|
+
i--;
|
459
|
+
}
|
460
|
+
}
|
461
|
+
}
|
462
|
+
|
463
|
+
HshTable *h_clone(HshTable *ht,
|
464
|
+
h_clone_func_t clone_key,
|
465
|
+
h_clone_func_t clone_value)
|
466
|
+
{
|
467
|
+
void *key, *value;
|
468
|
+
HshEntry *he;
|
469
|
+
int i = ht->used;
|
470
|
+
HshTable *ht_clone;
|
471
|
+
if (ht->lookup == &h_lookup_str) {
|
472
|
+
ht_clone = h_new_str(ht->free_key, ht->free_value);
|
473
|
+
} else {
|
474
|
+
ht_clone = h_new(ht->hash, ht->eq, ht->free_key, ht->free_value);
|
475
|
+
}
|
476
|
+
|
477
|
+
for (he = ht->table; i > 0; he++) {
|
478
|
+
if (he->value != NULL) {// active entry
|
479
|
+
key = clone_key ? clone_key(he->key) : he->key;
|
480
|
+
value = clone_value ? clone_value(he->value) : he->value;
|
481
|
+
h_set(ht_clone, key, value);
|
482
|
+
i--;
|
483
|
+
}
|
484
|
+
}
|
485
|
+
return ht_clone;
|
486
|
+
}
|
487
|
+
|
data/ext/hash.h
CHANGED
@@ -74,6 +74,15 @@ int h_set_safe(HshTable *ht, const void *key, void *value);
|
|
74
74
|
int h_has_key(HshTable *ht, const void *key);
|
75
75
|
unsigned int str_hash(const char *const str);
|
76
76
|
|
77
|
+
void h_each(HshTable *ht,
|
78
|
+
void (*each_kv)(void *key, void *value, void *arg),
|
79
|
+
void *arg);
|
80
|
+
|
81
|
+
typedef void *(*h_clone_func_t)(void *val);
|
82
|
+
HshTable *h_clone(HshTable *ht,
|
83
|
+
h_clone_func_t clone_key,
|
84
|
+
h_clone_func_t clone_value);
|
85
|
+
|
77
86
|
void dummy_free(void *p);
|
78
87
|
HshEntry *h_lookup_str(HshTable *ht, register const void *key_p);
|
79
88
|
|
data/ext/header.h
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
|
2
|
+
#include <limits.h>
|
3
|
+
|
4
|
+
#include "api.h"
|
5
|
+
|
6
|
+
#define MAXINT INT_MAX
|
7
|
+
#define MININT INT_MIN
|
8
|
+
|
9
|
+
#define HEAD 2*sizeof(int)
|
10
|
+
|
11
|
+
#define SIZE(p) ((int *)(p))[-1]
|
12
|
+
#define SET_SIZE(p, n) ((int *)(p))[-1] = n
|
13
|
+
#define CAPACITY(p) ((int *)(p))[-2]
|
14
|
+
|
15
|
+
struct among
|
16
|
+
{ int s_size; /* number of chars in string */
|
17
|
+
symbol * s; /* search string */
|
18
|
+
int substring_i;/* index to longest matching substring */
|
19
|
+
int result; /* result of the lookup */
|
20
|
+
int (* function)(struct SN_env *);
|
21
|
+
};
|
22
|
+
|
23
|
+
extern symbol * create_s(void);
|
24
|
+
extern void lose_s(symbol * p);
|
25
|
+
|
26
|
+
extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
|
27
|
+
|
28
|
+
extern int in_grouping_U(struct SN_env * z, unsigned char * s, int min, int max);
|
29
|
+
extern int in_grouping_b_U(struct SN_env * z, unsigned char * s, int min, int max);
|
30
|
+
extern int out_grouping_U(struct SN_env * z, unsigned char * s, int min, int max);
|
31
|
+
extern int out_grouping_b_U(struct SN_env * z, unsigned char * s, int min, int max);
|
32
|
+
|
33
|
+
extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
|
34
|
+
extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
|
35
|
+
extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
|
36
|
+
extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
|
37
|
+
|
38
|
+
extern int eq_s(struct SN_env * z, int s_size, symbol * s);
|
39
|
+
extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
|
40
|
+
extern int eq_v(struct SN_env * z, symbol * p);
|
41
|
+
extern int eq_v_b(struct SN_env * z, symbol * p);
|
42
|
+
|
43
|
+
extern int find_among(struct SN_env * z, struct among * v, int v_size);
|
44
|
+
extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
|
45
|
+
|
46
|
+
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment);
|
47
|
+
extern int slice_from_s(struct SN_env * z, int s_size, symbol * s);
|
48
|
+
extern int slice_from_v(struct SN_env * z, symbol * p);
|
49
|
+
extern int slice_del(struct SN_env * z);
|
50
|
+
|
51
|
+
extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
|
52
|
+
extern int insert_v(struct SN_env * z, int bra, int ket, symbol * p);
|
53
|
+
|
54
|
+
extern symbol * slice_to(struct SN_env * z, symbol * p);
|
55
|
+
extern symbol * assign_to(struct SN_env * z, symbol * p);
|
56
|
+
|
57
|
+
extern void debug(struct SN_env * z, int number, int line_count);
|
58
|
+
|
data/ext/inc/except.h
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
#ifndef FRT_EXCEPT_H
|
2
|
+
#define FRT_EXCEPT_H
|
3
|
+
|
4
|
+
#include <setjmp.h>
|
5
|
+
#include <ruby.h>
|
6
|
+
|
7
|
+
#define BODY 0
|
8
|
+
#define FINALLY -1
|
9
|
+
#define EXCEPTION 1
|
10
|
+
#define ERROR 1
|
11
|
+
#define IO_ERROR 2
|
12
|
+
#define ARG_ERROR 3
|
13
|
+
#define EOF_ERROR 4
|
14
|
+
#define UNSUPPORTED_ERROR 5
|
15
|
+
#define STATE_ERROR 6
|
16
|
+
#define PARSE_ERROR 7
|
17
|
+
#define MEM_ERROR 8
|
18
|
+
|
19
|
+
typedef struct xcontext_t {
|
20
|
+
jmp_buf jbuf;
|
21
|
+
struct xcontext_t *next;
|
22
|
+
char *msg;
|
23
|
+
volatile int excode;
|
24
|
+
int handled : 1;
|
25
|
+
int in_finally : 1;
|
26
|
+
} xcontext_t;
|
27
|
+
|
28
|
+
RUBY_EXTERN int rb_thread_critical;
|
29
|
+
extern xcontext_t *xtop_context;
|
30
|
+
|
31
|
+
#define TRY\
|
32
|
+
xcontext_t xcontext;\
|
33
|
+
rb_thread_critical = Qtrue;\
|
34
|
+
xcontext.next = xtop_context;\
|
35
|
+
xtop_context = &xcontext;\
|
36
|
+
xcontext.handled = true;\
|
37
|
+
xcontext.in_finally = false;\
|
38
|
+
switch (setjmp(xcontext.jbuf)) {\
|
39
|
+
case BODY:
|
40
|
+
|
41
|
+
|
42
|
+
#define XENDTRY\
|
43
|
+
}\
|
44
|
+
xtop_context = xcontext.next;\
|
45
|
+
if (!xcontext.handled) {\
|
46
|
+
RAISE(xcontext.excode, xcontext.msg);\
|
47
|
+
}\
|
48
|
+
rb_thread_critical = 0;
|
49
|
+
|
50
|
+
#define ENDTRY\
|
51
|
+
}\
|
52
|
+
if (!xcontext.in_finally) {\
|
53
|
+
xtop_context = xcontext.next;\
|
54
|
+
if (!xcontext.handled) {\
|
55
|
+
RAISE(xcontext.excode, xcontext.msg);\
|
56
|
+
}\
|
57
|
+
xcontext.in_finally = 1;\
|
58
|
+
longjmp(xcontext.jbuf, FINALLY);\
|
59
|
+
}\
|
60
|
+
rb_thread_critical = 0;
|
61
|
+
|
62
|
+
#define XFINALLY default: xcontext.in_finally = 1;
|
63
|
+
|
64
|
+
#define XCATCHALL break; default: xcontext.in_finally = 1;
|
65
|
+
|
66
|
+
//fprintf(stderr,"Error occured in %s, %d: %s\n", __FILE__, __LINE__, __func__);
|
67
|
+
#define RAISE(xexcode, xmsg) \
|
68
|
+
do {\
|
69
|
+
if (!xtop_context) {\
|
70
|
+
eprintf(EXCEPTION_CODE, "Error: exception %d not handled: %s", xexcode, xmsg);\
|
71
|
+
} else if (!xtop_context->in_finally) {\
|
72
|
+
xtop_context->msg = xmsg;\
|
73
|
+
xtop_context->excode = xexcode;\
|
74
|
+
xtop_context->handled = false;\
|
75
|
+
longjmp(xtop_context->jbuf, xexcode);\
|
76
|
+
} else if (xtop_context->handled) {\
|
77
|
+
xtop_context->msg = xmsg;\
|
78
|
+
xtop_context->excode = xexcode;\
|
79
|
+
xtop_context->handled = false;\
|
80
|
+
}\
|
81
|
+
} while (0)
|
82
|
+
|
83
|
+
#define HANDLED() xcontext.handled = 1 /* true */
|
84
|
+
|
85
|
+
extern char * const UNSUPPORTED_ERROR_MSG;
|
86
|
+
extern char * const EOF_ERROR_MSG;
|
87
|
+
|
88
|
+
#endif
|
data/ext/inc/lang.h
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
#define FRT_LANG_H
|
3
3
|
|
4
4
|
#include <ruby.h>
|
5
|
+
#include "hash.h"
|
5
6
|
|
6
7
|
#define FERRET_EXT
|
7
8
|
|
@@ -14,28 +15,37 @@ extern void setprogname(const char *str);
|
|
14
15
|
|
15
16
|
extern VALUE cQueryParseException;
|
16
17
|
|
17
|
-
#define
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
18
|
+
#define EXCEPTION_CODE rb_eException
|
19
|
+
//#define IO_ERROR rb_eIOError
|
20
|
+
//#define ARG_ERROR rb_eArgError
|
21
|
+
//#define EOF_ERROR rb_eEOFError
|
22
|
+
//#define UNSUPPORTED_ERROR rb_eNotImpError
|
23
|
+
//#define STATE_ERROR rb_eException
|
24
|
+
//#define PARSE_ERROR cQueryParseException
|
25
|
+
//#define MEM_ERROR rb_eNoMemError
|
25
26
|
|
26
27
|
typedef void * mutex_t;
|
27
|
-
typedef
|
28
|
+
typedef struct HshTable * thread_key_t;
|
29
|
+
typedef int thread_once_t;
|
28
30
|
#define MUTEX_INITIALIZER NULL
|
29
31
|
#define MUTEX_RECURSIVE_INITIALIZER NULL
|
32
|
+
#define THREAD_ONCE_INIT 1;
|
30
33
|
#define mutex_init(a, b)
|
31
34
|
#define mutex_lock(a)
|
32
35
|
#define mutex_trylock(a)
|
33
36
|
#define mutex_unlock(a)
|
34
37
|
#define mutex_destroy(a)
|
35
|
-
#define thread_key_create(a, b)
|
36
|
-
#define thread_key_delete(a)
|
37
|
-
#define thread_setspecific(a, b)
|
38
|
-
#define thread_getspecific(a)
|
38
|
+
#define thread_key_create(a, b) frt_thread_key_create(a, b)
|
39
|
+
#define thread_key_delete(a) frt_thread_key_delete(a)
|
40
|
+
#define thread_setspecific(a, b) frt_thread_setspecific(a, b)
|
41
|
+
#define thread_getspecific(a) frt_thread_getspecific(a)
|
39
42
|
#define thread_exit(a)
|
43
|
+
#define thread_once(a, b) frt_thread_once(a, b)
|
44
|
+
|
45
|
+
void frt_thread_once(int *once_control, void (*init_routine) (void));
|
46
|
+
void frt_thread_key_create(thread_key_t *key, void (*destr_function) (void *));
|
47
|
+
void frt_thread_key_delete(thread_key_t key);
|
48
|
+
void frt_thread_setspecific(thread_key_t key, const void *pointer);
|
49
|
+
void *frt_thread_getspecific(thread_key_t key);
|
40
50
|
|
41
51
|
#endif
|
data/ext/ind.c
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
#include <string.h>
|
2
2
|
#include "search.h"
|
3
3
|
|
4
|
+
static char * const NON_UNIQUE_KEY_ERROR_MSG = "Tried to use a key that was not unique";
|
5
|
+
|
4
6
|
static const char *ID_STRING = "id";
|
5
7
|
|
6
8
|
#define INDEX_CLOSE_READER(self) do {\
|
@@ -53,7 +55,7 @@ Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
|
|
53
55
|
self->analyzer = analyzer;
|
54
56
|
self->close_analyzer = false;
|
55
57
|
} else {
|
56
|
-
self->analyzer =
|
58
|
+
self->analyzer = mb_standard_analyzer_create(true);
|
57
59
|
self->close_analyzer = true;
|
58
60
|
}
|
59
61
|
self->use_compound_file = true;
|
@@ -69,6 +71,7 @@ Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
|
|
69
71
|
self->id_field = (char *)ID_STRING;
|
70
72
|
self->def_field = (char *)ID_STRING;
|
71
73
|
self->auto_flush = false;
|
74
|
+
self->check_latest = true;
|
72
75
|
|
73
76
|
self->qp = qp_create(all_fields, def_fields, self->analyzer);
|
74
77
|
/* Index is a convenience class so set qp convenience options */
|
@@ -117,7 +120,7 @@ inline void ensure_writer_open(Index *self)
|
|
117
120
|
inline void ensure_reader_open(Index *self)
|
118
121
|
{
|
119
122
|
if (self->ir) {
|
120
|
-
if (!ir_is_latest(self->ir)) {
|
123
|
+
if (self->check_latest && !ir_is_latest(self->ir)) {
|
121
124
|
INDEX_CLOSE_READER(self);
|
122
125
|
self->ir = ir_open(self->store, false);
|
123
126
|
}
|
@@ -196,7 +199,7 @@ static void inline index_add_doc_i(Index *self, Document *doc)
|
|
196
199
|
td = sea_search(self->sea, q, 0, 1, NULL, NULL);
|
197
200
|
if (td->total_hits > 1) {
|
198
201
|
td_destroy(td);
|
199
|
-
|
202
|
+
RAISE(ARG_ERROR, NON_UNIQUE_KEY_ERROR_MSG);
|
200
203
|
} else if (td->total_hits == 1) {
|
201
204
|
ir_delete_doc(self->ir, td->hits[0]->doc);
|
202
205
|
}
|
@@ -230,7 +233,7 @@ void index_add_doc(Index *self, Document *doc)
|
|
230
233
|
void index_add_string(Index *self, char *str, Analyzer *analyzer)
|
231
234
|
{
|
232
235
|
Document *doc = doc_create();
|
233
|
-
doc_add_field(doc, df_create(self->
|
236
|
+
doc_add_field(doc, df_create(self->def_field, estrdup(str),
|
234
237
|
DF_STORE_YES, DF_INDEX_TOKENIZED, DF_TERM_VECTOR_NO));
|
235
238
|
if (analyzer) index_add_doc_a(self, doc, analyzer);
|
236
239
|
else index_add_doc(self, doc);
|
@@ -242,7 +245,7 @@ void index_add_array(Index *self, Array *ary, Analyzer *analyzer)
|
|
242
245
|
int i;
|
243
246
|
Document *doc = doc_create();
|
244
247
|
for (i = 0; i < ary->size; i++) {
|
245
|
-
doc_add_field(doc, df_create(self->
|
248
|
+
doc_add_field(doc, df_create(self->def_field, estrdup(ary->elems[i]),
|
246
249
|
DF_STORE_YES, DF_INDEX_TOKENIZED, DF_TERM_VECTOR_NO));
|
247
250
|
}
|
248
251
|
if (analyzer) index_add_doc_a(self, doc, analyzer);
|
@@ -341,11 +344,14 @@ void index_delete_term(Index *self, Term *term)
|
|
341
344
|
mutex_lock(&self->store->ext_mutex);
|
342
345
|
ensure_reader_open(self);
|
343
346
|
tde = ir_term_docs_for(self->ir, term);
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
347
|
+
TRY
|
348
|
+
while (tde->next(tde)) {
|
349
|
+
ir_delete_doc(self->ir, tde->doc_num(tde));
|
350
|
+
AUTOFLUSH_IR;
|
351
|
+
}
|
352
|
+
XFINALLY
|
353
|
+
tde->close(tde);
|
354
|
+
XENDTRY
|
349
355
|
mutex_unlock(&self->store->ext_mutex);
|
350
356
|
}
|
351
357
|
|