isomorfeus-ferret 0.17.2 → 0.17.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
- data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
- data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +118 -125
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
- data/ext/isomorfeus_ferret_ext/frb_search.c +31 -23
- data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
- data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
- data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
- data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
- data/ext/isomorfeus_ferret_ext/frt_document.h +2 -27
- data/ext/isomorfeus_ferret_ext/frt_except.c +8 -6
- data/ext/isomorfeus_ferret_ext/frt_except.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +13 -32
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
- data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
- data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
- data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
- data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
- data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
- data/ext/isomorfeus_ferret_ext/frt_in_stream.c +35 -45
- data/ext/isomorfeus_ferret_ext/frt_in_stream.h +3 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +20 -38
- data/ext/isomorfeus_ferret_ext/frt_index.c +292 -790
- data/ext/isomorfeus_ferret_ext/frt_index.h +1 -102
- data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +18 -25
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +5 -5
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
- data/ext/isomorfeus_ferret_ext/frt_out_stream.c +31 -43
- data/ext/isomorfeus_ferret_ext/frt_out_stream.h +2 -2
- data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
- data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +85 -171
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +52 -104
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +6 -12
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +113 -226
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
- data/ext/isomorfeus_ferret_ext/frt_search.c +82 -164
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -25
- data/ext/isomorfeus_ferret_ext/frt_store.h +86 -52
- data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
- data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
- data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
- data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
- data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
- data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
- data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
- data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
- data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
- data/ext/isomorfeus_ferret_ext/test.c +40 -87
- data/ext/isomorfeus_ferret_ext/test.h +3 -6
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
- data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
- data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
- data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
- data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
- data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
- data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +15 -23
- data/ext/isomorfeus_ferret_ext/test_filter.c +17 -27
- data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
- data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
- data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
- data/ext/isomorfeus_ferret_ext/test_index.c +277 -487
- data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
- data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
- data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
- data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
- data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -109
- data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
- data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +14 -24
- data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
- data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
- data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_threading.c +14 -20
- data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
- data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
- data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
- data/lib/isomorfeus/ferret/index/index.rb +1 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +24 -4
@@ -61,8 +61,7 @@ static bool pp_next(PhPos *self) {
|
|
61
61
|
return true;
|
62
62
|
}
|
63
63
|
|
64
|
-
static bool pp_skip_to(PhPos *self, int doc_num)
|
65
|
-
{
|
64
|
+
static bool pp_skip_to(PhPos *self, int doc_num) {
|
66
65
|
FrtTermDocEnum *tpe = self->tpe;
|
67
66
|
assert(tpe);
|
68
67
|
|
@@ -77,8 +76,7 @@ static bool pp_skip_to(PhPos *self, int doc_num)
|
|
77
76
|
return true;
|
78
77
|
}
|
79
78
|
|
80
|
-
static bool pp_next_position(PhPos *self)
|
81
|
-
{
|
79
|
+
static bool pp_next_position(PhPos *self) {
|
82
80
|
FrtTermDocEnum *tpe = self->tpe;
|
83
81
|
self->count--;
|
84
82
|
if (self->count >= 0) { /* read subsequent pos's */
|
@@ -89,16 +87,14 @@ static bool pp_next_position(PhPos *self)
|
|
89
87
|
}
|
90
88
|
}
|
91
89
|
|
92
|
-
static bool pp_first_position(PhPos *self)
|
93
|
-
{
|
90
|
+
static bool pp_first_position(PhPos *self) {
|
94
91
|
FrtTermDocEnum *tpe = self->tpe;
|
95
92
|
self->count = tpe->freq(tpe); /* read first pos */
|
96
93
|
return pp_next_position(self);
|
97
94
|
}
|
98
95
|
|
99
96
|
#define PP_pp(p) (*(PhPos **)p)
|
100
|
-
static int pp_cmp(const void *const p1, const void *const p2)
|
101
|
-
{
|
97
|
+
static int pp_cmp(const void *const p1, const void *const p2) {
|
102
98
|
int cmp = PP_pp(p1)->doc - PP_pp(p2)->doc;
|
103
99
|
if (cmp == 0) {
|
104
100
|
cmp = PP_pp(p1)->position - PP_pp(p2)->position;
|
@@ -109,13 +105,11 @@ static int pp_cmp(const void *const p1, const void *const p2)
|
|
109
105
|
return cmp;
|
110
106
|
}
|
111
107
|
|
112
|
-
static int pp_pos_cmp(const void *const p1, const void *const p2)
|
113
|
-
{
|
108
|
+
static int pp_pos_cmp(const void *const p1, const void *const p2) {
|
114
109
|
return PP_pp(p1)->position - PP_pp(p2)->position;
|
115
110
|
}
|
116
111
|
|
117
|
-
static bool pp_less_than(const PhPos *pp1, const PhPos *pp2)
|
118
|
-
{
|
112
|
+
static bool pp_less_than(const PhPos *pp1, const PhPos *pp2) {
|
119
113
|
if (pp1->position == pp2->position) {
|
120
114
|
return pp1->offset < pp2->offset;
|
121
115
|
} else {
|
@@ -123,16 +117,14 @@ static bool pp_less_than(const PhPos *pp1, const PhPos *pp2)
|
|
123
117
|
}
|
124
118
|
}
|
125
119
|
|
126
|
-
static void pp_destroy(PhPos *pp)
|
127
|
-
{
|
120
|
+
static void pp_destroy(PhPos *pp) {
|
128
121
|
if (pp->tpe) {
|
129
122
|
pp->tpe->close(pp->tpe);
|
130
123
|
}
|
131
124
|
free(pp);
|
132
125
|
}
|
133
126
|
|
134
|
-
static PhPos *pp_new(FrtTermDocEnum *tpe, int offset)
|
135
|
-
{
|
127
|
+
static PhPos *pp_new(FrtTermDocEnum *tpe, int offset) {
|
136
128
|
PhPos *self = FRT_ALLOC(PhPos);
|
137
129
|
|
138
130
|
self->tpe = tpe;
|
@@ -148,8 +140,7 @@ static PhPos *pp_new(FrtTermDocEnum *tpe, int offset)
|
|
148
140
|
|
149
141
|
#define PhSc(scorer) ((PhraseScorer *)(scorer))
|
150
142
|
|
151
|
-
typedef struct PhraseScorer
|
152
|
-
{
|
143
|
+
typedef struct PhraseScorer {
|
153
144
|
FrtScorer super;
|
154
145
|
float (*phrase_freq)(FrtScorer *self);
|
155
146
|
float freq;
|
@@ -165,8 +156,7 @@ typedef struct PhraseScorer
|
|
165
156
|
bool check_repeats : 1;
|
166
157
|
} PhraseScorer;
|
167
158
|
|
168
|
-
static void phsc_init(PhraseScorer *phsc)
|
169
|
-
{
|
159
|
+
static void phsc_init(PhraseScorer *phsc) {
|
170
160
|
int i;
|
171
161
|
for (i = phsc->pp_cnt - 1; i >= 0; i--) {
|
172
162
|
if (!(phsc->more = pp_next(phsc->phrase_pos[i]))) break;
|
@@ -179,8 +169,7 @@ static void phsc_init(PhraseScorer *phsc)
|
|
179
169
|
}
|
180
170
|
}
|
181
171
|
|
182
|
-
static bool phsc_do_next(FrtScorer *self)
|
183
|
-
{
|
172
|
+
static bool phsc_do_next(FrtScorer *self) {
|
184
173
|
PhraseScorer *phsc = PhSc(self);
|
185
174
|
const int pp_cnt = phsc->pp_cnt;
|
186
175
|
int pp_first_idx = phsc->pp_first_idx;
|
@@ -220,8 +209,7 @@ static bool phsc_do_next(FrtScorer *self)
|
|
220
209
|
return false;
|
221
210
|
}
|
222
211
|
|
223
|
-
static float phsc_score(FrtScorer *self)
|
224
|
-
{
|
212
|
+
static float phsc_score(FrtScorer *self) {
|
225
213
|
PhraseScorer *phsc = PhSc(self);
|
226
214
|
float raw_score = frt_sim_tf(self->similarity, phsc->freq) * phsc->value;
|
227
215
|
/* normalize */
|
@@ -230,8 +218,7 @@ static float phsc_score(FrtScorer *self)
|
|
230
218
|
phsc->norms[self->doc]);
|
231
219
|
}
|
232
220
|
|
233
|
-
static bool phsc_next(FrtScorer *self)
|
234
|
-
{
|
221
|
+
static bool phsc_next(FrtScorer *self) {
|
235
222
|
PhraseScorer *phsc = PhSc(self);
|
236
223
|
if (phsc->first_time) {
|
237
224
|
phsc_init(phsc);
|
@@ -243,8 +230,7 @@ static bool phsc_next(FrtScorer *self)
|
|
243
230
|
return phsc_do_next(self);
|
244
231
|
}
|
245
232
|
|
246
|
-
static bool phsc_skip_to(FrtScorer *self, int doc_num)
|
247
|
-
{
|
233
|
+
static bool phsc_skip_to(FrtScorer *self, int doc_num) {
|
248
234
|
PhraseScorer *phsc = PhSc(self);
|
249
235
|
int i;
|
250
236
|
for (i = phsc->pp_cnt - 1; i >= 0; i--) {
|
@@ -261,8 +247,7 @@ static bool phsc_skip_to(FrtScorer *self, int doc_num)
|
|
261
247
|
return phsc_do_next(self);
|
262
248
|
}
|
263
249
|
|
264
|
-
static FrtExplanation *phsc_explain(FrtScorer *self, int doc_num)
|
265
|
-
{
|
250
|
+
static FrtExplanation *phsc_explain(FrtScorer *self, int doc_num) {
|
266
251
|
PhraseScorer *phsc = PhSc(self);
|
267
252
|
float phrase_freq;
|
268
253
|
|
@@ -273,8 +258,7 @@ static FrtExplanation *phsc_explain(FrtScorer *self, int doc_num)
|
|
273
258
|
"tf(phrase_freq=%f)", phrase_freq);
|
274
259
|
}
|
275
260
|
|
276
|
-
static void phsc_destroy(FrtScorer *self)
|
277
|
-
{
|
261
|
+
static void phsc_destroy(FrtScorer *self) {
|
278
262
|
PhraseScorer *phsc = PhSc(self);
|
279
263
|
int i;
|
280
264
|
for (i = phsc->pp_cnt - 1; i >= 0; i--) {
|
@@ -289,8 +273,7 @@ static FrtScorer *phsc_new(FrtWeight *weight,
|
|
289
273
|
FrtPhrasePosition *positions, int pos_cnt,
|
290
274
|
FrtSimilarity *similarity,
|
291
275
|
frt_uchar *norms,
|
292
|
-
int slop)
|
293
|
-
{
|
276
|
+
int slop) {
|
294
277
|
int i;
|
295
278
|
FrtScorer *self = frt_scorer_new(PhraseScorer, similarity);
|
296
279
|
FrtHashSet *term_set = NULL;
|
@@ -342,8 +325,7 @@ static FrtScorer *phsc_new(FrtWeight *weight,
|
|
342
325
|
* ExactPhraseScorer
|
343
326
|
***************************************************************************/
|
344
327
|
|
345
|
-
static float ephsc_phrase_freq(FrtScorer *self)
|
346
|
-
{
|
328
|
+
static float ephsc_phrase_freq(FrtScorer *self) {
|
347
329
|
PhraseScorer *phsc = PhSc(self);
|
348
330
|
int i;
|
349
331
|
int pp_first_idx = 0;
|
@@ -387,8 +369,7 @@ static float ephsc_phrase_freq(FrtScorer *self)
|
|
387
369
|
static FrtScorer *exact_phrase_scorer_new(FrtWeight *weight,
|
388
370
|
FrtTermDocEnum **term_pos_enum,
|
389
371
|
FrtPhrasePosition *positions, int pp_cnt,
|
390
|
-
FrtSimilarity *similarity, frt_uchar *norms)
|
391
|
-
{
|
372
|
+
FrtSimilarity *similarity, frt_uchar *norms) {
|
392
373
|
FrtScorer *self = phsc_new(weight,
|
393
374
|
term_pos_enum,
|
394
375
|
positions,
|
@@ -407,8 +388,7 @@ static FrtScorer *exact_phrase_scorer_new(FrtWeight *weight,
|
|
407
388
|
|
408
389
|
static bool sphsc_check_repeats(PhPos *pp,
|
409
390
|
PhPos **positions,
|
410
|
-
const int p_cnt)
|
411
|
-
{
|
391
|
+
const int p_cnt) {
|
412
392
|
int j;
|
413
393
|
for (j = 0; j < p_cnt; j++) {
|
414
394
|
PhPos *ppj = positions[j];
|
@@ -432,8 +412,7 @@ static bool sphsc_check_repeats(PhPos *pp,
|
|
432
412
|
return true;
|
433
413
|
}
|
434
414
|
|
435
|
-
static float sphsc_phrase_freq(FrtScorer *self)
|
436
|
-
{
|
415
|
+
static float sphsc_phrase_freq(FrtScorer *self) {
|
437
416
|
PhraseScorer *phsc = PhSc(self);
|
438
417
|
PhPos *pp;
|
439
418
|
FrtPriorityQueue *pq = frt_pq_new(phsc->pp_cnt, (frt_lt_ft)&pp_less_than, NULL);
|
@@ -494,8 +473,7 @@ static FrtScorer *sloppy_phrase_scorer_new(FrtWeight *weight,
|
|
494
473
|
FrtTermDocEnum **term_pos_enum,
|
495
474
|
FrtPhrasePosition *positions,
|
496
475
|
int pp_cnt, FrtSimilarity *similarity,
|
497
|
-
int slop, frt_uchar *norms)
|
498
|
-
{
|
476
|
+
int slop, frt_uchar *norms) {
|
499
477
|
FrtScorer *self = phsc_new(weight,
|
500
478
|
term_pos_enum,
|
501
479
|
positions,
|
@@ -514,13 +492,11 @@ static FrtScorer *sloppy_phrase_scorer_new(FrtWeight *weight,
|
|
514
492
|
*
|
515
493
|
***************************************************************************/
|
516
494
|
|
517
|
-
static char *phw_to_s(FrtWeight *self)
|
518
|
-
{
|
495
|
+
static char *phw_to_s(FrtWeight *self) {
|
519
496
|
return frt_strfmt("PhraseWeight(%f)", self->value);
|
520
497
|
}
|
521
498
|
|
522
|
-
static FrtScorer *phw_scorer(FrtWeight *self, FrtIndexReader *ir)
|
523
|
-
{
|
499
|
+
static FrtScorer *phw_scorer(FrtWeight *self, FrtIndexReader *ir) {
|
524
500
|
int i;
|
525
501
|
FrtScorer *phsc = NULL;
|
526
502
|
FrtPhraseQuery *phq = PhQ(self->query);
|
@@ -561,8 +537,7 @@ static FrtScorer *phw_scorer(FrtWeight *self, FrtIndexReader *ir)
|
|
561
537
|
return phsc;
|
562
538
|
}
|
563
539
|
|
564
|
-
static FrtExplanation *phw_explain(FrtWeight *self, FrtIndexReader *ir, int doc_num)
|
565
|
-
{
|
540
|
+
static FrtExplanation *phw_explain(FrtWeight *self, FrtIndexReader *ir, int doc_num) {
|
566
541
|
FrtExplanation *expl;
|
567
542
|
FrtExplanation *idf_expl1;
|
568
543
|
FrtExplanation *idf_expl2;
|
@@ -659,16 +634,14 @@ static FrtExplanation *phw_explain(FrtWeight *self, FrtIndexReader *ir, int doc_
|
|
659
634
|
if (query_expl->value == 1.0) {
|
660
635
|
frt_expl_destroy(expl);
|
661
636
|
return field_expl;
|
662
|
-
}
|
663
|
-
else {
|
637
|
+
} else {
|
664
638
|
expl->value = (query_expl->value * field_expl->value);
|
665
639
|
frt_expl_add_detail(expl, field_expl);
|
666
640
|
return expl;
|
667
641
|
}
|
668
642
|
}
|
669
643
|
|
670
|
-
static FrtWeight *phw_new(FrtQuery *query, FrtSearcher *searcher)
|
671
|
-
{
|
644
|
+
static FrtWeight *phw_new(FrtQuery *query, FrtSearcher *searcher) {
|
672
645
|
FrtWeight *self = w_new(FrtWeight, query);
|
673
646
|
|
674
647
|
self->scorer = &phw_scorer;
|
@@ -690,8 +663,7 @@ static FrtWeight *phw_new(FrtQuery *query, FrtSearcher *searcher)
|
|
690
663
|
***************************************************************************/
|
691
664
|
|
692
665
|
/* ** TVPosEnum ** */
|
693
|
-
typedef struct TVPosEnum
|
694
|
-
{
|
666
|
+
typedef struct TVPosEnum {
|
695
667
|
int index;
|
696
668
|
int size;
|
697
669
|
int offset;
|
@@ -699,20 +671,17 @@ typedef struct TVPosEnum
|
|
699
671
|
int positions[1];
|
700
672
|
} TVPosEnum;
|
701
673
|
|
702
|
-
static bool tvpe_next(TVPosEnum *self)
|
703
|
-
{
|
674
|
+
static bool tvpe_next(TVPosEnum *self) {
|
704
675
|
if (++(self->index) < self->size) {
|
705
676
|
self->pos = self->positions[self->index] - self->offset;
|
706
677
|
return true;
|
707
|
-
}
|
708
|
-
else {
|
678
|
+
} else {
|
709
679
|
self->pos = -1;
|
710
680
|
return false;
|
711
681
|
}
|
712
682
|
}
|
713
683
|
|
714
|
-
static int tvpe_skip_to(TVPosEnum *self, int position)
|
715
|
-
{
|
684
|
+
static int tvpe_skip_to(TVPosEnum *self, int position) {
|
716
685
|
int i;
|
717
686
|
int search_pos = position + self->offset;
|
718
687
|
for (i = self->index + 1; i < self->size; i++) {
|
@@ -729,13 +698,11 @@ static int tvpe_skip_to(TVPosEnum *self, int position)
|
|
729
698
|
return true;
|
730
699
|
}
|
731
700
|
|
732
|
-
static bool tvpe_lt(TVPosEnum *tvpe1, TVPosEnum *tvpe2)
|
733
|
-
{
|
701
|
+
static bool tvpe_lt(TVPosEnum *tvpe1, TVPosEnum *tvpe2) {
|
734
702
|
return tvpe1->pos < tvpe2->pos;
|
735
703
|
}
|
736
704
|
|
737
|
-
static TVPosEnum *tvpe_new(int *positions, int size, int offset)
|
738
|
-
{
|
705
|
+
static TVPosEnum *tvpe_new(int *positions, int size, int offset) {
|
739
706
|
TVPosEnum *self = (TVPosEnum*)frt_emalloc(sizeof(TVPosEnum) + size*sizeof(int));
|
740
707
|
memcpy(self->positions, positions, size * sizeof(int));
|
741
708
|
self->size = size;
|
@@ -745,8 +712,7 @@ static TVPosEnum *tvpe_new(int *positions, int size, int offset)
|
|
745
712
|
return self;
|
746
713
|
}
|
747
714
|
|
748
|
-
static TVPosEnum *tvpe_new_merge(char **terms, int t_cnt, FrtTermVector *tv, int offset)
|
749
|
-
{
|
715
|
+
static TVPosEnum *tvpe_new_merge(char **terms, int t_cnt, FrtTermVector *tv, int offset) {
|
750
716
|
int i, total_positions = 0;
|
751
717
|
FrtPriorityQueue *tvpe_pq = frt_pq_new(t_cnt, (frt_lt_ft)tvpe_lt, &free);
|
752
718
|
TVPosEnum *self = NULL;
|
@@ -787,8 +753,7 @@ static TVPosEnum *tvpe_new_merge(char **terms, int t_cnt, FrtTermVector *tv, int
|
|
787
753
|
return self;
|
788
754
|
}
|
789
755
|
|
790
|
-
static TVPosEnum *get_tvpe(FrtTermVector *tv, char **terms, int t_cnt, int offset)
|
791
|
-
{
|
756
|
+
static TVPosEnum *get_tvpe(FrtTermVector *tv, char **terms, int t_cnt, int offset) {
|
792
757
|
TVPosEnum *tvpe = NULL;
|
793
758
|
if (t_cnt == 1) {
|
794
759
|
FrtTVTerm *tv_term = frt_tv_get_tv_term(tv, terms[0]);
|
@@ -801,8 +766,7 @@ static TVPosEnum *get_tvpe(FrtTermVector *tv, char **terms, int t_cnt, int offse
|
|
801
766
|
return tvpe;
|
802
767
|
}
|
803
768
|
|
804
|
-
static FrtMatchVector *phq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtTermVector *tv)
|
805
|
-
{
|
769
|
+
static FrtMatchVector *phq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtTermVector *tv) {
|
806
770
|
if (tv->field == PhQ(self)->field) {
|
807
771
|
const int pos_cnt = PhQ(self)->pos_cnt;
|
808
772
|
int i;
|
@@ -872,8 +836,7 @@ static FrtMatchVector *phq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtT
|
|
872
836
|
if (tvpe && ((i == 0 && tvpe_next(tvpe))
|
873
837
|
|| tvpe_skip_to(tvpe, tvpe_a[i-1]->pos))) {
|
874
838
|
tvpe_a[i] = tvpe;
|
875
|
-
}
|
876
|
-
else {
|
839
|
+
} else {
|
877
840
|
done = true;
|
878
841
|
free(tvpe);
|
879
842
|
break;
|
@@ -889,8 +852,7 @@ static FrtMatchVector *phq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtT
|
|
889
852
|
last = first;
|
890
853
|
first_index = FRT_NEXT_NUM(first_index, pos_cnt);
|
891
854
|
first = tvpe_a[first_index];
|
892
|
-
}
|
893
|
-
else {
|
855
|
+
} else {
|
894
856
|
done = true;
|
895
857
|
break;
|
896
858
|
}
|
@@ -917,8 +879,7 @@ static FrtMatchVector *phq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtT
|
|
917
879
|
|
918
880
|
#define PhQ_INIT_CAPA 4
|
919
881
|
|
920
|
-
static void phq_extract_terms(FrtQuery *self, FrtHashSet *term_set)
|
921
|
-
{
|
882
|
+
static void phq_extract_terms(FrtQuery *self, FrtHashSet *term_set) {
|
922
883
|
FrtPhraseQuery *phq = PhQ(self);
|
923
884
|
int i, j;
|
924
885
|
for (i = 0; i < phq->pos_cnt; i++) {
|
@@ -948,8 +909,7 @@ static char *phq_to_s(FrtQuery *self, ID default_field) {
|
|
948
909
|
if (phq->pos_cnt == 0) {
|
949
910
|
if (default_field != phq->field) {
|
950
911
|
return frt_strfmt("%s:\"\"", field_name);
|
951
|
-
}
|
952
|
-
else {
|
912
|
+
} else {
|
953
913
|
return frt_estrdup("\"\"");
|
954
914
|
}
|
955
915
|
}
|
@@ -988,8 +948,7 @@ static char *phq_to_s(FrtQuery *self, ID default_field) {
|
|
988
948
|
pos = positions[i].pos;
|
989
949
|
if (pos == last_pos) {
|
990
950
|
buffer[buf_index - 1] = '&';
|
991
|
-
}
|
992
|
-
else {
|
951
|
+
} else {
|
993
952
|
for (j = last_pos; j < pos - 1; j++) {
|
994
953
|
memcpy(buffer + buf_index, "<> ", 3);
|
995
954
|
buf_index += 3;
|
@@ -1026,8 +985,7 @@ static char *phq_to_s(FrtQuery *self, ID default_field) {
|
|
1026
985
|
return buffer;
|
1027
986
|
}
|
1028
987
|
|
1029
|
-
static void phq_destroy(FrtQuery *self)
|
1030
|
-
{
|
988
|
+
static void phq_destroy(FrtQuery *self) {
|
1031
989
|
FrtPhraseQuery *phq = PhQ(self);
|
1032
990
|
int i;
|
1033
991
|
for (i = 0; i < phq->pos_cnt; i++) {
|
@@ -1037,8 +995,7 @@ static void phq_destroy(FrtQuery *self)
|
|
1037
995
|
frt_q_destroy_i(self);
|
1038
996
|
}
|
1039
997
|
|
1040
|
-
static FrtQuery *phq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
1041
|
-
{
|
998
|
+
static FrtQuery *phq_rewrite(FrtQuery *self, FrtIndexReader *ir) {
|
1042
999
|
FrtPhraseQuery *phq = PhQ(self);
|
1043
1000
|
(void)ir;
|
1044
1001
|
if (phq->pos_cnt == 1) {
|
@@ -1049,8 +1006,7 @@ static FrtQuery *phq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
|
1049
1006
|
FrtQuery *tq = frt_tq_new(phq->field, terms[0]);
|
1050
1007
|
tq->boost = self->boost;
|
1051
1008
|
return tq;
|
1052
|
-
}
|
1053
|
-
else {
|
1009
|
+
} else {
|
1054
1010
|
FrtQuery *q = frt_multi_tq_new(phq->field);
|
1055
1011
|
int i;
|
1056
1012
|
for (i = 0; i < t_cnt; i++) {
|
@@ -1065,8 +1021,7 @@ static FrtQuery *phq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
|
1065
1021
|
}
|
1066
1022
|
}
|
1067
1023
|
|
1068
|
-
static unsigned long long phq_hash(FrtQuery *self)
|
1069
|
-
{
|
1024
|
+
static unsigned long long phq_hash(FrtQuery *self) {
|
1070
1025
|
int i, j;
|
1071
1026
|
FrtPhraseQuery *phq = PhQ(self);
|
1072
1027
|
unsigned long long hash = frt_str_hash(rb_id2name(phq->field));
|
@@ -1080,8 +1035,7 @@ static unsigned long long phq_hash(FrtQuery *self)
|
|
1080
1035
|
return (hash ^ phq->slop);
|
1081
1036
|
}
|
1082
1037
|
|
1083
|
-
static int phq_eq(FrtQuery *self, FrtQuery *o)
|
1084
|
-
{
|
1038
|
+
static int phq_eq(FrtQuery *self, FrtQuery *o) {
|
1085
1039
|
int i, j;
|
1086
1040
|
FrtPhraseQuery *phq1 = PhQ(self);
|
1087
1041
|
FrtPhraseQuery *phq2 = PhQ(o);
|
@@ -1134,8 +1088,7 @@ FrtQuery *frt_phq_new(ID field) {
|
|
1134
1088
|
return frt_phq_init(self, field);
|
1135
1089
|
}
|
1136
1090
|
|
1137
|
-
void frt_phq_add_term_abs(FrtQuery *self, const char *term, int position)
|
1138
|
-
{
|
1091
|
+
void frt_phq_add_term_abs(FrtQuery *self, const char *term, int position) {
|
1139
1092
|
FrtPhraseQuery *phq = PhQ(self);
|
1140
1093
|
int index = phq->pos_cnt;
|
1141
1094
|
FrtPhrasePosition *pp;
|
@@ -1150,33 +1103,28 @@ void frt_phq_add_term_abs(FrtQuery *self, const char *term, int position)
|
|
1150
1103
|
phq->pos_cnt++;
|
1151
1104
|
}
|
1152
1105
|
|
1153
|
-
void frt_phq_add_term(FrtQuery *self, const char *term, int pos_inc)
|
1154
|
-
{
|
1106
|
+
void frt_phq_add_term(FrtQuery *self, const char *term, int pos_inc) {
|
1155
1107
|
FrtPhraseQuery *phq = PhQ(self);
|
1156
1108
|
int position;
|
1157
1109
|
if (phq->pos_cnt == 0) {
|
1158
1110
|
position = 0;
|
1159
|
-
}
|
1160
|
-
else {
|
1111
|
+
} else {
|
1161
1112
|
position = phq->positions[phq->pos_cnt - 1].pos + pos_inc;
|
1162
1113
|
}
|
1163
1114
|
frt_phq_add_term_abs(self, term, position);
|
1164
1115
|
}
|
1165
1116
|
|
1166
|
-
void frt_phq_append_multi_term(FrtQuery *self, const char *term)
|
1167
|
-
{
|
1117
|
+
void frt_phq_append_multi_term(FrtQuery *self, const char *term) {
|
1168
1118
|
FrtPhraseQuery *phq = PhQ(self);
|
1169
1119
|
int index = phq->pos_cnt - 1;
|
1170
1120
|
|
1171
1121
|
if (index < 0) {
|
1172
1122
|
frt_phq_add_term(self, term, 0);
|
1173
|
-
}
|
1174
|
-
else {
|
1123
|
+
} else {
|
1175
1124
|
frt_ary_push(phq->positions[index].terms, frt_estrdup(term));
|
1176
1125
|
}
|
1177
1126
|
}
|
1178
1127
|
|
1179
|
-
void frt_phq_set_slop(FrtQuery *self, int slop)
|
1180
|
-
{
|
1128
|
+
void frt_phq_set_slop(FrtQuery *self, int slop) {
|
1181
1129
|
PhQ(self)->slop = slop;
|
1182
1130
|
}
|
@@ -60,23 +60,20 @@ static char *range_to_s(FrtRange *range, ID default_field, float boost) {
|
|
60
60
|
return buffer;
|
61
61
|
}
|
62
62
|
|
63
|
-
static void range_destroy(FrtRange *range)
|
64
|
-
{
|
63
|
+
static void range_destroy(FrtRange *range) {
|
65
64
|
free(range->lower_term);
|
66
65
|
free(range->upper_term);
|
67
66
|
free(range);
|
68
67
|
}
|
69
68
|
|
70
|
-
static unsigned long long range_hash(FrtRange *filt)
|
71
|
-
{
|
69
|
+
static unsigned long long range_hash(FrtRange *filt) {
|
72
70
|
return filt->include_lower | (filt->include_upper << 1)
|
73
71
|
| ((frt_str_hash(rb_id2name(filt->field))
|
74
72
|
^ (filt->lower_term ? frt_str_hash(filt->lower_term) : 0)
|
75
73
|
^ (filt->upper_term ? frt_str_hash(filt->upper_term) : 0)) << 2);
|
76
74
|
}
|
77
75
|
|
78
|
-
static int range_eq(FrtRange *filt, FrtRange *o)
|
79
|
-
{
|
76
|
+
static int range_eq(FrtRange *filt, FrtRange *o) {
|
80
77
|
if ((filt->lower_term && !o->lower_term) || (!filt->lower_term && o->lower_term)) { return false; }
|
81
78
|
if ((filt->upper_term && !o->upper_term) || (!filt->upper_term && o->upper_term)) { return false; }
|
82
79
|
return ((filt->field == o->field)
|
@@ -149,8 +146,7 @@ static FrtRange *trange_new(ID field, const char *lower_term, const char *upper_
|
|
149
146
|
FRT_RAISE(FRT_ARG_ERROR, "Upper bound must be greater than lower bound."
|
150
147
|
" numbers \"%lg\" < \"%lg\"", upper_num, lower_num);
|
151
148
|
}
|
152
|
-
}
|
153
|
-
else {
|
149
|
+
} else {
|
154
150
|
if (upper_term && lower_term &&
|
155
151
|
(strcmp(upper_term, lower_term) < 0)) {
|
156
152
|
FRT_RAISE(FRT_ARG_ERROR, "Upper bound must be greater than lower bound."
|
@@ -182,16 +178,14 @@ static void frt_rfilt_destroy_i(FrtFilter *filt) {
|
|
182
178
|
frt_filt_destroy_i(filt);
|
183
179
|
}
|
184
180
|
|
185
|
-
static char *frt_rfilt_to_s(FrtFilter *filt)
|
186
|
-
{
|
181
|
+
static char *frt_rfilt_to_s(FrtFilter *filt) {
|
187
182
|
char *rstr = range_to_s(RF(filt)->range, (ID)NULL, 1.0);
|
188
183
|
char *rfstr = frt_strfmt("RangeFilter< %s >", rstr);
|
189
184
|
free(rstr);
|
190
185
|
return rfstr;
|
191
186
|
}
|
192
187
|
|
193
|
-
static FrtBitVector *frt_rfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir)
|
194
|
-
{
|
188
|
+
static FrtBitVector *frt_rfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir) {
|
195
189
|
FrtBitVector *bv = frt_bv_new_capa(ir->max_doc(ir));
|
196
190
|
FrtRange *range = RF(filt)->range;
|
197
191
|
FrtFieldInfo *fi = frt_fis_get_field(ir->fis, range->field);
|