isomorfeus-ferret 0.12.7 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +101 -19
- data/README.md +54 -1
- data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
- data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
- data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
- data/ext/isomorfeus_ferret_ext/bzip_blocksort.c +1094 -0
- data/ext/isomorfeus_ferret_ext/bzip_huffman.c +205 -0
- data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
- data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
- data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
- data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
- data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
- data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
- data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
- data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
- data/ext/isomorfeus_ferret_ext/frb_index.c +492 -474
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
- data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
- data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
- data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
- data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
- data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
- data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
- data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
- data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +105 -63
- data/ext/isomorfeus_ferret_ext/frt_global.h +7 -3
- data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
- data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_index.c +580 -399
- data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +67 -91
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
- data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
- data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
- data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
- data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
- data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +21 -109
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
- data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
- data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
- data/ext/isomorfeus_ferret_ext/test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
- data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
- data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
- data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
- data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
- data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
- data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -62
- data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
- data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
- data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
- data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +27 -57
- data/ext/isomorfeus_ferret_ext/email.rl +0 -21
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
- data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
- data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
- data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
- data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
- data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
- data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
- data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
- data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -5,8 +5,7 @@
|
|
5
5
|
|
6
6
|
#define FRT_BV_INIT_CAPA 256
|
7
7
|
|
8
|
-
typedef struct FrtBitVector
|
9
|
-
{
|
8
|
+
typedef struct FrtBitVector {
|
10
9
|
/** The bits are held in an array of 32-bit integers */
|
11
10
|
frt_u32 *bits;
|
12
11
|
|
@@ -26,6 +25,7 @@ typedef struct FrtBitVector
|
|
26
25
|
|
27
26
|
bool extends_as_ones : 1;
|
28
27
|
int ref_cnt;
|
28
|
+
VALUE rbv;
|
29
29
|
} FrtBitVector;
|
30
30
|
|
31
31
|
/**
|
@@ -74,9 +74,7 @@ extern void frt_bv_destroy(FrtBitVector *bv);
|
|
74
74
|
* cause any bugs in this code but could cause problems if users are relying
|
75
75
|
* on the fact that size is accurate.
|
76
76
|
*/
|
77
|
-
static FRT_ATTR_ALWAYS_INLINE
|
78
|
-
void frt_bv_set_value(FrtBitVector *bv, int bit, bool value)
|
79
|
-
{
|
77
|
+
static FRT_ATTR_ALWAYS_INLINE void frt_bv_set_value(FrtBitVector *bv, int bit, bool value) {
|
80
78
|
frt_u32 *word_p;
|
81
79
|
int word = bit >> 5;
|
82
80
|
frt_u32 bitmask = 1 << (bit & 31);
|
@@ -120,9 +118,7 @@ void frt_bv_set_value(FrtBitVector *bv, int bit, bool value)
|
|
120
118
|
* @param bv the FrtBitVector to set the bit in
|
121
119
|
* @param index the index of the bit to set
|
122
120
|
*/
|
123
|
-
static FRT_ATTR_ALWAYS_INLINE
|
124
|
-
void frt_bv_set(FrtBitVector *bv, int bit)
|
125
|
-
{
|
121
|
+
static FRT_ATTR_ALWAYS_INLINE void frt_bv_set(FrtBitVector *bv, int bit) {
|
126
122
|
frt_bv_set_value(bv, bit, 1);
|
127
123
|
}
|
128
124
|
|
@@ -153,9 +149,7 @@ void frt_bv_set(FrtBitVector *bv, int bit)
|
|
153
149
|
* @param bv the FrtBitVector to set the bit in
|
154
150
|
* @param index the index of the bit to set
|
155
151
|
*/
|
156
|
-
static FRT_ATTR_ALWAYS_INLINE
|
157
|
-
void frt_bv_set_fast(FrtBitVector *bv, int bit)
|
158
|
-
{
|
152
|
+
static FRT_ATTR_ALWAYS_INLINE void frt_bv_set_fast(FrtBitVector *bv, int bit) {
|
159
153
|
bv->count++;
|
160
154
|
bv->size = bit + 1;
|
161
155
|
bv->bits[bit >> 5] |= (1 << (bit & 31));
|
@@ -170,9 +164,7 @@ void frt_bv_set_fast(FrtBitVector *bv, int bit)
|
|
170
164
|
* @param index the index of the bit to check
|
171
165
|
* @return 1 if the bit was set, 0 otherwise
|
172
166
|
*/
|
173
|
-
static FRT_ATTR_ALWAYS_INLINE
|
174
|
-
int frt_bv_get(FrtBitVector *bv, int bit)
|
175
|
-
{
|
167
|
+
static FRT_ATTR_ALWAYS_INLINE int frt_bv_get(FrtBitVector *bv, int bit) {
|
176
168
|
/* out of range so return 0 because it can't have been set */
|
177
169
|
if (unlikely(bit >= bv->size)) {
|
178
170
|
return bv->extends_as_ones;
|
@@ -188,9 +180,7 @@ int frt_bv_get(FrtBitVector *bv, int bit)
|
|
188
180
|
* @param bv the FrtBitVector to unset the bit in
|
189
181
|
* @param index the index of the bit to unset
|
190
182
|
*/
|
191
|
-
static FRT_ATTR_ALWAYS_INLINE
|
192
|
-
void frt_bv_unset(FrtBitVector *bv, int bit)
|
193
|
-
{
|
183
|
+
static FRT_ATTR_ALWAYS_INLINE void frt_bv_unset(FrtBitVector *bv, int bit) {
|
194
184
|
frt_bv_set_value(bv, bit, 0);
|
195
185
|
}
|
196
186
|
|
@@ -213,9 +203,7 @@ extern void frt_bv_clear(FrtBitVector *bv);
|
|
213
203
|
* @return the number of set bits in the FrtBitVector. FrtBitVector.count is also
|
214
204
|
* set
|
215
205
|
*/
|
216
|
-
static FRT_ATTR_ALWAYS_INLINE
|
217
|
-
int frt_bv_recount(FrtBitVector *bv)
|
218
|
-
{
|
206
|
+
static FRT_ATTR_ALWAYS_INLINE int frt_bv_recount(FrtBitVector *bv) {
|
219
207
|
unsigned int extra = ((bv->size & 31) >> 3) + 1;
|
220
208
|
unsigned int len = bv->size >> 5;
|
221
209
|
unsigned int idx, count = 0;
|
@@ -230,8 +218,7 @@ int frt_bv_recount(FrtBitVector *bv)
|
|
230
218
|
case 2: count += frt_count_zeros(bv->bits[idx] | 0xffff00ff);
|
231
219
|
case 1: count += frt_count_zeros(bv->bits[idx] | 0xffffff00);
|
232
220
|
}
|
233
|
-
}
|
234
|
-
else {
|
221
|
+
} else {
|
235
222
|
for (idx = 0; idx < len; ++idx) {
|
236
223
|
count += frt_count_ones(bv->bits[idx]);
|
237
224
|
}
|
@@ -263,9 +250,7 @@ extern void frt_bv_scan_reset(FrtBitVector *bv);
|
|
263
250
|
* @param bv the FrtBitVector to scan
|
264
251
|
* @return the next set bit's index or -1 if no more bits are set
|
265
252
|
*/
|
266
|
-
static FRT_ATTR_ALWAYS_INLINE
|
267
|
-
int frt_bv_scan_next_from(FrtBitVector *bv, const int bit)
|
268
|
-
{
|
253
|
+
static FRT_ATTR_ALWAYS_INLINE int frt_bv_scan_next_from(FrtBitVector *bv, const int bit) {
|
269
254
|
frt_u32 pos = bit >> 5;
|
270
255
|
frt_u32 word = bv->bits[pos];
|
271
256
|
|
@@ -276,8 +261,7 @@ int frt_bv_scan_next_from(FrtBitVector *bv, const int bit)
|
|
276
261
|
word &= (frt_u32)~0 << (bit & 31);
|
277
262
|
if (word) {
|
278
263
|
goto done;
|
279
|
-
}
|
280
|
-
else {
|
264
|
+
} else {
|
281
265
|
frt_u32 word_size = FRT_TO_WORD(bv->size);
|
282
266
|
for (pos++; pos < word_size; ++pos)
|
283
267
|
{
|
@@ -286,7 +270,7 @@ int frt_bv_scan_next_from(FrtBitVector *bv, const int bit)
|
|
286
270
|
}
|
287
271
|
}
|
288
272
|
return -1;
|
289
|
-
|
273
|
+
done:
|
290
274
|
return bv->curr_bit = (pos << 5) + frt_count_trailing_zeros(word);
|
291
275
|
}
|
292
276
|
|
@@ -313,9 +297,7 @@ int frt_bv_scan_next(FrtBitVector *bv)
|
|
313
297
|
* @param bv the FrtBitVector to scan
|
314
298
|
* @return the next unset bit's index or -1 if no more bits are unset
|
315
299
|
*/
|
316
|
-
static FRT_ATTR_ALWAYS_INLINE
|
317
|
-
int frt_bv_scan_next_unset_from(FrtBitVector *bv, const int bit)
|
318
|
-
{
|
300
|
+
static FRT_ATTR_ALWAYS_INLINE int frt_bv_scan_next_unset_from(FrtBitVector *bv, const int bit) {
|
319
301
|
frt_u32 pos = bit >> 5;
|
320
302
|
frt_u32 word = bv->bits[pos];
|
321
303
|
|
@@ -326,8 +308,7 @@ int frt_bv_scan_next_unset_from(FrtBitVector *bv, const int bit)
|
|
326
308
|
word |= (1 << (bit & 31)) - 1;
|
327
309
|
if (~word) {
|
328
310
|
goto done;
|
329
|
-
}
|
330
|
-
else {
|
311
|
+
} else {
|
331
312
|
frt_u32 word_size = FRT_TO_WORD(bv->size);
|
332
313
|
for (pos++; pos < word_size; ++pos)
|
333
314
|
{
|
@@ -336,7 +317,7 @@ int frt_bv_scan_next_unset_from(FrtBitVector *bv, const int bit)
|
|
336
317
|
}
|
337
318
|
}
|
338
319
|
return -1;
|
339
|
-
|
320
|
+
done:
|
340
321
|
return bv->curr_bit = (pos << 5) + frt_count_trailing_ones(word);
|
341
322
|
}
|
342
323
|
|
@@ -372,12 +353,9 @@ extern int frt_bv_eq(FrtBitVector *bv1, FrtBitVector *bv2);
|
|
372
353
|
*/
|
373
354
|
extern unsigned long long frt_bv_hash(FrtBitVector *bv);
|
374
355
|
|
375
|
-
static FRT_ATTR_ALWAYS_INLINE
|
376
|
-
void frt_bv_capa(FrtBitVector *bv, int capa, int size)
|
377
|
-
{
|
356
|
+
static FRT_ATTR_ALWAYS_INLINE void frt_bv_capa(FrtBitVector *bv, int capa, int size) {
|
378
357
|
int word_size = FRT_TO_WORD(size);
|
379
|
-
if (bv->capa < capa)
|
380
|
-
{
|
358
|
+
if (bv->capa < capa) {
|
381
359
|
FRT_REALLOC_N(bv->bits, frt_u32, capa);
|
382
360
|
bv->capa = capa;
|
383
361
|
memset(bv->bits + word_size, (bv->extends_as_ones ? 0xFF : 0),
|
@@ -408,11 +386,11 @@ void frt_bv_capa(FrtBitVector *bv, int capa, int size)
|
|
408
386
|
int i; \
|
409
387
|
int a_wsz = FRT_TO_WORD(a->size); \
|
410
388
|
int b_wsz = FRT_TO_WORD(b->size); \
|
411
|
-
int max_size = FRT_MAX(a->size, b->size);
|
412
|
-
int min_size = FRT_MIN(a->size, b->size);
|
389
|
+
int max_size = FRT_MAX(a->size, b->size); \
|
390
|
+
int min_size = FRT_MIN(a->size, b->size); \
|
413
391
|
int max_word_size = FRT_TO_WORD(max_size); \
|
414
392
|
int min_word_size = FRT_TO_WORD(min_size); \
|
415
|
-
int capa = FRT_MAX(frt_round2(max_word_size), 4);
|
393
|
+
int capa = FRT_MAX(frt_round2(max_word_size), 4); \
|
416
394
|
\
|
417
395
|
bv->extends_as_ones = (a->extends_as_ones op b->extends_as_ones); \
|
418
396
|
frt_bv_capa(bv, capa, max_size); \
|
@@ -432,33 +410,22 @@ void frt_bv_capa(FrtBitVector *bv, int capa, int size)
|
|
432
410
|
frt_bv_recount(bv); \
|
433
411
|
} while(0)
|
434
412
|
|
435
|
-
static FRT_ATTR_ALWAYS_INLINE
|
436
|
-
FrtBitVector *frt_bv_and_i(FrtBitVector *bv,
|
437
|
-
FrtBitVector *a, FrtBitVector *b)
|
438
|
-
{
|
413
|
+
static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_and_i(FrtBitVector *bv, FrtBitVector *a, FrtBitVector *b) {
|
439
414
|
FRT_BV_OP(bv, a, b, &, frt_bv_and_ext);
|
440
415
|
return bv;
|
441
416
|
}
|
442
417
|
|
443
|
-
static FRT_ATTR_ALWAYS_INLINE
|
444
|
-
FrtBitVector *frt_bv_or_i(FrtBitVector *bv,
|
445
|
-
FrtBitVector *a, FrtBitVector *b)
|
446
|
-
{
|
418
|
+
static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_or_i(FrtBitVector *bv, FrtBitVector *a, FrtBitVector *b) {
|
447
419
|
FRT_BV_OP(bv, a, b, |, frt_bv_or_ext);
|
448
420
|
return bv;
|
449
421
|
}
|
450
422
|
|
451
|
-
static FRT_ATTR_ALWAYS_INLINE
|
452
|
-
FrtBitVector *frt_bv_xor_i(FrtBitVector *bv,
|
453
|
-
FrtBitVector *a, FrtBitVector *b)
|
454
|
-
{
|
423
|
+
static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_xor_i(FrtBitVector *bv, FrtBitVector *a, FrtBitVector *b) {
|
455
424
|
FRT_BV_OP(bv, a, b, ^, frt_bv_xor_ext);
|
456
425
|
return bv;
|
457
426
|
}
|
458
427
|
|
459
|
-
static FRT_ATTR_ALWAYS_INLINE
|
460
|
-
FrtBitVector *frt_bv_not_i(FrtBitVector *bv, FrtBitVector *bv1)
|
461
|
-
{
|
428
|
+
static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_not_i(FrtBitVector *bv, FrtBitVector *bv1) {
|
462
429
|
int i;
|
463
430
|
int word_size = FRT_TO_WORD(bv1->size);
|
464
431
|
int capa = FRT_MAX(frt_round2(word_size), 4);
|
@@ -484,9 +451,7 @@ FrtBitVector *frt_bv_not_i(FrtBitVector *bv, FrtBitVector *bv1)
|
|
484
451
|
* @param bv2 second FrtBitVector to AND
|
485
452
|
* @return A FrtBitVector with all bits set that are set in both bv1 and bv2
|
486
453
|
*/
|
487
|
-
static FRT_ATTR_ALWAYS_INLINE
|
488
|
-
FrtBitVector *frt_bv_and(FrtBitVector *bv1, FrtBitVector *bv2)
|
489
|
-
{
|
454
|
+
static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_and(FrtBitVector *bv1, FrtBitVector *bv2) {
|
490
455
|
return frt_bv_and_i(frt_bv_new(), bv1, bv2);
|
491
456
|
}
|
492
457
|
|
@@ -498,9 +463,7 @@ FrtBitVector *frt_bv_and(FrtBitVector *bv1, FrtBitVector *bv2)
|
|
498
463
|
* @param bv2 second FrtBitVector to OR
|
499
464
|
* @return A FrtBitVector with all bits set that are set in both bv1 and bv2
|
500
465
|
*/
|
501
|
-
static FRT_ATTR_ALWAYS_INLINE
|
502
|
-
FrtBitVector *frt_bv_or(FrtBitVector *bv1, FrtBitVector *bv2)
|
503
|
-
{
|
466
|
+
static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_or(FrtBitVector *bv1, FrtBitVector *bv2) {
|
504
467
|
return frt_bv_or_i(frt_bv_new(), bv1, bv2);
|
505
468
|
}
|
506
469
|
|
@@ -513,9 +476,7 @@ FrtBitVector *frt_bv_or(FrtBitVector *bv1, FrtBitVector *bv2)
|
|
513
476
|
* @param bv2 second FrtBitVector to XOR
|
514
477
|
* @return A FrtBitVector with all bits set that are equal in bv1 and bv2
|
515
478
|
*/
|
516
|
-
static FRT_ATTR_ALWAYS_INLINE
|
517
|
-
FrtBitVector *frt_bv_xor(FrtBitVector *bv1, FrtBitVector *bv2)
|
518
|
-
{
|
479
|
+
static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_xor(FrtBitVector *bv1, FrtBitVector *bv2) {
|
519
480
|
return frt_bv_xor_i(frt_bv_new(), bv1, bv2);
|
520
481
|
}
|
521
482
|
|
@@ -525,9 +486,7 @@ FrtBitVector *frt_bv_xor(FrtBitVector *bv1, FrtBitVector *bv2)
|
|
525
486
|
* @param bv FrtBitVector to flip
|
526
487
|
* @return A FrtBitVector with all bits set that are set in both bv1 and bv2
|
527
488
|
*/
|
528
|
-
static FRT_ATTR_ALWAYS_INLINE
|
529
|
-
FrtBitVector *frt_bv_not(FrtBitVector *bv)
|
530
|
-
{
|
489
|
+
static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_not(FrtBitVector *bv) {
|
531
490
|
return frt_bv_not_i(frt_bv_new(), bv);
|
532
491
|
}
|
533
492
|
|
@@ -539,9 +498,7 @@ FrtBitVector *frt_bv_not(FrtBitVector *bv)
|
|
539
498
|
* @return A FrtBitVector
|
540
499
|
* @return bv1 with all bits set that where set in both bv1 and bv2
|
541
500
|
*/
|
542
|
-
static FRT_ATTR_ALWAYS_INLINE
|
543
|
-
FrtBitVector *frt_bv_and_x(FrtBitVector *bv1, FrtBitVector *bv2)
|
544
|
-
{
|
501
|
+
static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_and_x(FrtBitVector *bv1, FrtBitVector *bv2) {
|
545
502
|
return frt_bv_and_i(bv1, bv1, bv2);
|
546
503
|
}
|
547
504
|
|
@@ -552,9 +509,7 @@ FrtBitVector *frt_bv_and_x(FrtBitVector *bv1, FrtBitVector *bv2)
|
|
552
509
|
* @param bv2 second FrtBitVector to OR
|
553
510
|
* @return bv1
|
554
511
|
*/
|
555
|
-
static FRT_ATTR_ALWAYS_INLINE
|
556
|
-
FrtBitVector *frt_bv_or_x(FrtBitVector *bv1, FrtBitVector *bv2)
|
557
|
-
{
|
512
|
+
static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_or_x(FrtBitVector *bv1, FrtBitVector *bv2) {
|
558
513
|
return frt_bv_or_i(bv1, bv1, bv2);
|
559
514
|
}
|
560
515
|
|
@@ -565,9 +520,7 @@ FrtBitVector *frt_bv_or_x(FrtBitVector *bv1, FrtBitVector *bv2)
|
|
565
520
|
* @param bv2 second FrtBitVector to XOR
|
566
521
|
* @return bv1
|
567
522
|
*/
|
568
|
-
static FRT_ATTR_ALWAYS_INLINE
|
569
|
-
FrtBitVector *frt_bv_xor_x(FrtBitVector *bv1, FrtBitVector *bv2)
|
570
|
-
{
|
523
|
+
static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_xor_x(FrtBitVector *bv1, FrtBitVector *bv2) {
|
571
524
|
return frt_bv_xor_i(bv1, bv1, bv2);
|
572
525
|
}
|
573
526
|
|
@@ -577,9 +530,7 @@ FrtBitVector *frt_bv_xor_x(FrtBitVector *bv1, FrtBitVector *bv2)
|
|
577
530
|
* @param bv FrtBitVector to flip
|
578
531
|
* @return A +bv+ with all it's bits flipped
|
579
532
|
*/
|
580
|
-
static FRT_ATTR_ALWAYS_INLINE
|
581
|
-
FrtBitVector *frt_bv_not_x(FrtBitVector *bv)
|
582
|
-
{
|
533
|
+
static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_not_x(FrtBitVector *bv) {
|
583
534
|
return frt_bv_not_i(bv, bv);
|
584
535
|
}
|
585
536
|
|
@@ -7,39 +7,38 @@
|
|
7
7
|
*
|
8
8
|
****************************************************************************/
|
9
9
|
|
10
|
-
FrtDocField *frt_df_new(
|
11
|
-
{
|
10
|
+
FrtDocField *frt_df_new(ID name) {
|
12
11
|
FrtDocField *df = FRT_ALLOC(FrtDocField);
|
13
12
|
df->name = name;
|
14
13
|
df->size = 0;
|
15
14
|
df->capa = FRT_DF_INIT_CAPA;
|
16
15
|
df->data = FRT_ALLOC_N(char *, df->capa);
|
17
16
|
df->lengths = FRT_ALLOC_N(int, df->capa);
|
17
|
+
df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
|
18
18
|
df->destroy_data = false;
|
19
19
|
df->boost = 1.0f;
|
20
20
|
return df;
|
21
21
|
}
|
22
22
|
|
23
|
-
FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len)
|
24
|
-
{
|
23
|
+
FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len, rb_encoding *encoding) {
|
25
24
|
if (df->size >= df->capa) {
|
26
25
|
df->capa <<= 2;
|
27
26
|
FRT_REALLOC_N(df->data, char *, df->capa);
|
28
27
|
FRT_REALLOC_N(df->lengths, int, df->capa);
|
28
|
+
FRT_REALLOC_N(df->encodings, rb_encoding *, df->capa);
|
29
29
|
}
|
30
30
|
df->data[df->size] = data;
|
31
31
|
df->lengths[df->size] = len;
|
32
|
+
df->encodings[df->size] = encoding;
|
32
33
|
df->size++;
|
33
34
|
return df;
|
34
35
|
}
|
35
36
|
|
36
|
-
FrtDocField *frt_df_add_data(FrtDocField *df, char *data)
|
37
|
-
|
38
|
-
return frt_df_add_data_len(df, data, strlen(data));
|
37
|
+
FrtDocField *frt_df_add_data(FrtDocField *df, char *data, rb_encoding *encoding) {
|
38
|
+
return frt_df_add_data_len(df, data, strlen(data), encoding);
|
39
39
|
}
|
40
40
|
|
41
|
-
void frt_df_destroy(FrtDocField *df)
|
42
|
-
{
|
41
|
+
void frt_df_destroy(FrtDocField *df) {
|
43
42
|
if (df->destroy_data) {
|
44
43
|
int i;
|
45
44
|
for (i = 0; i < df->size; i++) {
|
@@ -48,15 +47,16 @@ void frt_df_destroy(FrtDocField *df)
|
|
48
47
|
}
|
49
48
|
free(df->data);
|
50
49
|
free(df->lengths);
|
50
|
+
free(df->encodings);
|
51
51
|
free(df);
|
52
52
|
}
|
53
53
|
|
54
54
|
/*
|
55
55
|
* Format for one item is: name: "data"
|
56
56
|
* for more items : name: ["data", "data", "data"]
|
57
|
+
* internally used for testing, thus encoding can be ignored
|
57
58
|
*/
|
58
|
-
char *frt_df_to_s(FrtDocField *df)
|
59
|
-
{
|
59
|
+
char *frt_df_to_s(FrtDocField *df) {
|
60
60
|
const char *df_name = rb_id2name(df->name);
|
61
61
|
int i, len = 0, namelen = strlen(df_name);
|
62
62
|
char *str, *s;
|
@@ -94,8 +94,7 @@ char *frt_df_to_s(FrtDocField *df)
|
|
94
94
|
*
|
95
95
|
****************************************************************************/
|
96
96
|
|
97
|
-
FrtDocument *frt_doc_new()
|
98
|
-
{
|
97
|
+
FrtDocument *frt_doc_new(void) {
|
99
98
|
FrtDocument *doc = FRT_ALLOC(FrtDocument);
|
100
99
|
doc->field_dict = frt_h_new_ptr((frt_free_ft)&frt_df_destroy);
|
101
100
|
doc->size = 0;
|
@@ -105,8 +104,7 @@ FrtDocument *frt_doc_new()
|
|
105
104
|
return doc;
|
106
105
|
}
|
107
106
|
|
108
|
-
FrtDocField *frt_doc_add_field(FrtDocument *doc, FrtDocField *df)
|
109
|
-
{
|
107
|
+
FrtDocField *frt_doc_add_field(FrtDocument *doc, FrtDocField *df) {
|
110
108
|
if (!frt_h_set_safe(doc->field_dict, (void *)df->name, df)) {
|
111
109
|
FRT_RAISE(FRT_EXCEPTION, "tried to add %s field which alread existed\n",
|
112
110
|
rb_id2name(df->name));
|
@@ -120,15 +118,12 @@ FrtDocField *frt_doc_add_field(FrtDocument *doc, FrtDocField *df)
|
|
120
118
|
return df;
|
121
119
|
}
|
122
120
|
|
123
|
-
FrtDocField *frt_doc_get_field(FrtDocument *doc,
|
124
|
-
{
|
121
|
+
FrtDocField *frt_doc_get_field(FrtDocument *doc, ID name) {
|
125
122
|
return (FrtDocField *)frt_h_get(doc->field_dict, (void *)name);
|
126
123
|
}
|
127
124
|
|
128
|
-
void frt_doc_destroy(FrtDocument *doc)
|
129
|
-
{
|
125
|
+
void frt_doc_destroy(FrtDocument *doc) {
|
130
126
|
frt_h_destroy(doc->field_dict);
|
131
127
|
free(doc->fields);
|
132
128
|
free(doc);
|
133
129
|
}
|
134
|
-
|
@@ -3,6 +3,7 @@
|
|
3
3
|
|
4
4
|
#include "frt_global.h"
|
5
5
|
#include "frt_hash.h"
|
6
|
+
#include <ruby/encoding.h>
|
6
7
|
|
7
8
|
/****************************************************************************
|
8
9
|
*
|
@@ -11,21 +12,21 @@
|
|
11
12
|
****************************************************************************/
|
12
13
|
|
13
14
|
#define FRT_DF_INIT_CAPA 1
|
14
|
-
typedef struct FrtDocField
|
15
|
-
|
16
|
-
FrtSymbol name;
|
15
|
+
typedef struct FrtDocField {
|
16
|
+
ID name;
|
17
17
|
int size;
|
18
18
|
int capa;
|
19
19
|
int *lengths;
|
20
|
+
rb_encoding **encodings; /* used for processing */
|
20
21
|
char **data;
|
21
22
|
float boost;
|
23
|
+
FrtCompressionType compression;
|
22
24
|
bool destroy_data : 1;
|
23
|
-
bool is_compressed : 1;
|
24
25
|
} FrtDocField;
|
25
26
|
|
26
|
-
extern FrtDocField *frt_df_new(
|
27
|
-
extern FrtDocField *frt_df_add_data(FrtDocField *df, char *data);
|
28
|
-
extern FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len);
|
27
|
+
extern FrtDocField *frt_df_new(ID name);
|
28
|
+
extern FrtDocField *frt_df_add_data(FrtDocField *df, char *data, rb_encoding *encoding);
|
29
|
+
extern FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len, rb_encoding *encoding);
|
29
30
|
extern void frt_df_destroy(FrtDocField *df);
|
30
31
|
extern char *frt_df_to_s(FrtDocField *df);
|
31
32
|
|
@@ -36,8 +37,7 @@ extern char *frt_df_to_s(FrtDocField *df);
|
|
36
37
|
****************************************************************************/
|
37
38
|
|
38
39
|
#define FRT_DOC_INIT_CAPA 8
|
39
|
-
typedef struct FrtDocument
|
40
|
-
{
|
40
|
+
typedef struct FrtDocument {
|
41
41
|
FrtHash *field_dict;
|
42
42
|
int size;
|
43
43
|
int capa;
|
@@ -47,7 +47,7 @@ typedef struct FrtDocument
|
|
47
47
|
|
48
48
|
extern FrtDocument *frt_doc_new();
|
49
49
|
extern FrtDocField *frt_doc_add_field(FrtDocument *doc, FrtDocField *df);
|
50
|
-
extern FrtDocField *frt_doc_get_field(FrtDocument *doc,
|
50
|
+
extern FrtDocField *frt_doc_get_field(FrtDocument *doc, ID name);
|
51
51
|
extern void frt_doc_destroy(FrtDocument *doc);
|
52
52
|
|
53
53
|
#endif
|
@@ -32,13 +32,11 @@ char frt_xmsg_buffer_final[FRT_XMSG_BUFFER_FINAL_SIZE];
|
|
32
32
|
static frt_thread_key_t exception_stack_key;
|
33
33
|
static frt_thread_once_t exception_stack_key_once = FRT_THREAD_ONCE_INIT;
|
34
34
|
|
35
|
-
static void exception_stack_alloc(void)
|
36
|
-
{
|
35
|
+
static void exception_stack_alloc(void) {
|
37
36
|
frt_thread_key_create(&exception_stack_key, NULL);
|
38
37
|
}
|
39
38
|
|
40
|
-
void frt_xpush_context(frt_xcontext_t *context)
|
41
|
-
{
|
39
|
+
void frt_xpush_context(frt_xcontext_t *context) {
|
42
40
|
frt_xcontext_t *top_context;
|
43
41
|
frt_thread_once(&exception_stack_key_once, *exception_stack_alloc);
|
44
42
|
top_context = (frt_xcontext_t *)frt_thread_getspecific(exception_stack_key);
|
@@ -48,18 +46,14 @@ void frt_xpush_context(frt_xcontext_t *context)
|
|
48
46
|
context->in_finally = false;
|
49
47
|
}
|
50
48
|
|
51
|
-
static void frt_xraise_context(frt_xcontext_t *context,
|
52
|
-
volatile int excode,
|
53
|
-
const char *const msg)
|
54
|
-
{
|
49
|
+
static void frt_xraise_context(frt_xcontext_t *context, volatile int excode, const char *const msg) {
|
55
50
|
context->msg = msg;
|
56
51
|
context->excode = excode;
|
57
52
|
context->handled = false;
|
58
53
|
longjmp(context->jbuf, excode);
|
59
54
|
}
|
60
55
|
|
61
|
-
void frt_xraise(int excode, const char *const msg)
|
62
|
-
{
|
56
|
+
void frt_xraise(int excode, const char *const msg) {
|
63
57
|
frt_xcontext_t *top_context;
|
64
58
|
frt_thread_once(&exception_stack_key_once, *exception_stack_alloc);
|
65
59
|
top_context = (frt_xcontext_t *)frt_thread_getspecific(exception_stack_key);
|
@@ -77,8 +71,7 @@ void frt_xraise(int excode, const char *const msg)
|
|
77
71
|
}
|
78
72
|
}
|
79
73
|
|
80
|
-
void frt_xpop_context()
|
81
|
-
{
|
74
|
+
void frt_xpop_context(void) {
|
82
75
|
frt_xcontext_t *top_cxt, *context;
|
83
76
|
frt_thread_once(&exception_stack_key_once, *exception_stack_alloc);
|
84
77
|
top_cxt = (frt_xcontext_t *)frt_thread_getspecific(exception_stack_key);
|
@@ -1,6 +1,8 @@
|
|
1
1
|
#include <string.h>
|
2
2
|
#include "frt_field_index.h"
|
3
3
|
|
4
|
+
#undef close
|
5
|
+
|
4
6
|
/***************************************************************************
|
5
7
|
*
|
6
8
|
* FrtFieldIndex
|
@@ -30,9 +32,7 @@ static void field_index_destroy(void *p)
|
|
30
32
|
free(self);
|
31
33
|
}
|
32
34
|
|
33
|
-
FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir,
|
34
|
-
const FrtFieldIndexClass *klass)
|
35
|
-
{
|
35
|
+
FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir, ID field, const FrtFieldIndexClass *klass) {
|
36
36
|
int length = 0;
|
37
37
|
FrtTermEnum *volatile te = NULL;
|
38
38
|
FrtTermDocEnum *volatile tde = NULL;
|
@@ -10,11 +10,11 @@
|
|
10
10
|
***************************************************************************/
|
11
11
|
|
12
12
|
typedef struct FrtStringIndex {
|
13
|
-
int
|
13
|
+
int size;
|
14
14
|
long *index;
|
15
15
|
char **values;
|
16
|
-
int
|
17
|
-
int
|
16
|
+
int v_size;
|
17
|
+
int v_capa;
|
18
18
|
} FrtStringIndex;
|
19
19
|
|
20
20
|
typedef struct FrtFieldIndexClass FrtFieldIndexClass;
|
@@ -26,9 +26,9 @@ struct FrtFieldIndexClass {
|
|
26
26
|
};
|
27
27
|
|
28
28
|
typedef struct FrtFieldIndex {
|
29
|
-
|
29
|
+
ID field;
|
30
30
|
const FrtFieldIndexClass *klass;
|
31
|
-
void
|
31
|
+
void *index;
|
32
32
|
} FrtFieldIndex;
|
33
33
|
|
34
34
|
extern const FrtFieldIndexClass FRT_INTEGER_FIELD_INDEX_CLASS;
|
@@ -36,7 +36,6 @@ extern const FrtFieldIndexClass FRT_FLOAT_FIELD_INDEX_CLASS;
|
|
36
36
|
extern const FrtFieldIndexClass FRT_STRING_FIELD_INDEX_CLASS;
|
37
37
|
extern const FrtFieldIndexClass FRT_BYTE_FIELD_INDEX_CLASS;
|
38
38
|
|
39
|
-
extern FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir,
|
40
|
-
const FrtFieldIndexClass *klass);
|
39
|
+
extern FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir, ID field, const FrtFieldIndexClass *klass);
|
41
40
|
|
42
41
|
#endif
|