isomorfeus-ferret 0.17.2 → 0.17.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
- data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
- data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +118 -125
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
- data/ext/isomorfeus_ferret_ext/frb_search.c +31 -23
- data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
- data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
- data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
- data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
- data/ext/isomorfeus_ferret_ext/frt_document.h +2 -27
- data/ext/isomorfeus_ferret_ext/frt_except.c +8 -6
- data/ext/isomorfeus_ferret_ext/frt_except.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +13 -32
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
- data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
- data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
- data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
- data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
- data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
- data/ext/isomorfeus_ferret_ext/frt_in_stream.c +35 -45
- data/ext/isomorfeus_ferret_ext/frt_in_stream.h +3 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +20 -38
- data/ext/isomorfeus_ferret_ext/frt_index.c +292 -790
- data/ext/isomorfeus_ferret_ext/frt_index.h +1 -102
- data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +18 -25
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +5 -5
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
- data/ext/isomorfeus_ferret_ext/frt_out_stream.c +31 -43
- data/ext/isomorfeus_ferret_ext/frt_out_stream.h +2 -2
- data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
- data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +85 -171
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +52 -104
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +6 -12
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +113 -226
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
- data/ext/isomorfeus_ferret_ext/frt_search.c +82 -164
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -25
- data/ext/isomorfeus_ferret_ext/frt_store.h +86 -52
- data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
- data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
- data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
- data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
- data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
- data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
- data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
- data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
- data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
- data/ext/isomorfeus_ferret_ext/test.c +40 -87
- data/ext/isomorfeus_ferret_ext/test.h +3 -6
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
- data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
- data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
- data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
- data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
- data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
- data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +15 -23
- data/ext/isomorfeus_ferret_ext/test_filter.c +17 -27
- data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
- data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
- data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
- data/ext/isomorfeus_ferret_ext/test_index.c +277 -487
- data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
- data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
- data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
- data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
- data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -109
- data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
- data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +14 -24
- data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
- data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
- data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_threading.c +14 -20
- data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
- data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
- data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
- data/lib/isomorfeus/ferret/index/index.rb +1 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +24 -4
@@ -61,8 +61,7 @@ static bool pp_next(PhPos *self) {
|
|
61
61
|
return true;
|
62
62
|
}
|
63
63
|
|
64
|
-
static bool pp_skip_to(PhPos *self, int doc_num)
|
65
|
-
{
|
64
|
+
static bool pp_skip_to(PhPos *self, int doc_num) {
|
66
65
|
FrtTermDocEnum *tpe = self->tpe;
|
67
66
|
assert(tpe);
|
68
67
|
|
@@ -77,8 +76,7 @@ static bool pp_skip_to(PhPos *self, int doc_num)
|
|
77
76
|
return true;
|
78
77
|
}
|
79
78
|
|
80
|
-
static bool pp_next_position(PhPos *self)
|
81
|
-
{
|
79
|
+
static bool pp_next_position(PhPos *self) {
|
82
80
|
FrtTermDocEnum *tpe = self->tpe;
|
83
81
|
self->count--;
|
84
82
|
if (self->count >= 0) { /* read subsequent pos's */
|
@@ -89,16 +87,14 @@ static bool pp_next_position(PhPos *self)
|
|
89
87
|
}
|
90
88
|
}
|
91
89
|
|
92
|
-
static bool pp_first_position(PhPos *self)
|
93
|
-
{
|
90
|
+
static bool pp_first_position(PhPos *self) {
|
94
91
|
FrtTermDocEnum *tpe = self->tpe;
|
95
92
|
self->count = tpe->freq(tpe); /* read first pos */
|
96
93
|
return pp_next_position(self);
|
97
94
|
}
|
98
95
|
|
99
96
|
#define PP_pp(p) (*(PhPos **)p)
|
100
|
-
static int pp_cmp(const void *const p1, const void *const p2)
|
101
|
-
{
|
97
|
+
static int pp_cmp(const void *const p1, const void *const p2) {
|
102
98
|
int cmp = PP_pp(p1)->doc - PP_pp(p2)->doc;
|
103
99
|
if (cmp == 0) {
|
104
100
|
cmp = PP_pp(p1)->position - PP_pp(p2)->position;
|
@@ -109,13 +105,11 @@ static int pp_cmp(const void *const p1, const void *const p2)
|
|
109
105
|
return cmp;
|
110
106
|
}
|
111
107
|
|
112
|
-
static int pp_pos_cmp(const void *const p1, const void *const p2)
|
113
|
-
{
|
108
|
+
static int pp_pos_cmp(const void *const p1, const void *const p2) {
|
114
109
|
return PP_pp(p1)->position - PP_pp(p2)->position;
|
115
110
|
}
|
116
111
|
|
117
|
-
static bool pp_less_than(const PhPos *pp1, const PhPos *pp2)
|
118
|
-
{
|
112
|
+
static bool pp_less_than(const PhPos *pp1, const PhPos *pp2) {
|
119
113
|
if (pp1->position == pp2->position) {
|
120
114
|
return pp1->offset < pp2->offset;
|
121
115
|
} else {
|
@@ -123,16 +117,14 @@ static bool pp_less_than(const PhPos *pp1, const PhPos *pp2)
|
|
123
117
|
}
|
124
118
|
}
|
125
119
|
|
126
|
-
static void pp_destroy(PhPos *pp)
|
127
|
-
{
|
120
|
+
static void pp_destroy(PhPos *pp) {
|
128
121
|
if (pp->tpe) {
|
129
122
|
pp->tpe->close(pp->tpe);
|
130
123
|
}
|
131
124
|
free(pp);
|
132
125
|
}
|
133
126
|
|
134
|
-
static PhPos *pp_new(FrtTermDocEnum *tpe, int offset)
|
135
|
-
{
|
127
|
+
static PhPos *pp_new(FrtTermDocEnum *tpe, int offset) {
|
136
128
|
PhPos *self = FRT_ALLOC(PhPos);
|
137
129
|
|
138
130
|
self->tpe = tpe;
|
@@ -148,8 +140,7 @@ static PhPos *pp_new(FrtTermDocEnum *tpe, int offset)
|
|
148
140
|
|
149
141
|
#define PhSc(scorer) ((PhraseScorer *)(scorer))
|
150
142
|
|
151
|
-
typedef struct PhraseScorer
|
152
|
-
{
|
143
|
+
typedef struct PhraseScorer {
|
153
144
|
FrtScorer super;
|
154
145
|
float (*phrase_freq)(FrtScorer *self);
|
155
146
|
float freq;
|
@@ -165,8 +156,7 @@ typedef struct PhraseScorer
|
|
165
156
|
bool check_repeats : 1;
|
166
157
|
} PhraseScorer;
|
167
158
|
|
168
|
-
static void phsc_init(PhraseScorer *phsc)
|
169
|
-
{
|
159
|
+
static void phsc_init(PhraseScorer *phsc) {
|
170
160
|
int i;
|
171
161
|
for (i = phsc->pp_cnt - 1; i >= 0; i--) {
|
172
162
|
if (!(phsc->more = pp_next(phsc->phrase_pos[i]))) break;
|
@@ -179,8 +169,7 @@ static void phsc_init(PhraseScorer *phsc)
|
|
179
169
|
}
|
180
170
|
}
|
181
171
|
|
182
|
-
static bool phsc_do_next(FrtScorer *self)
|
183
|
-
{
|
172
|
+
static bool phsc_do_next(FrtScorer *self) {
|
184
173
|
PhraseScorer *phsc = PhSc(self);
|
185
174
|
const int pp_cnt = phsc->pp_cnt;
|
186
175
|
int pp_first_idx = phsc->pp_first_idx;
|
@@ -220,8 +209,7 @@ static bool phsc_do_next(FrtScorer *self)
|
|
220
209
|
return false;
|
221
210
|
}
|
222
211
|
|
223
|
-
static float phsc_score(FrtScorer *self)
|
224
|
-
{
|
212
|
+
static float phsc_score(FrtScorer *self) {
|
225
213
|
PhraseScorer *phsc = PhSc(self);
|
226
214
|
float raw_score = frt_sim_tf(self->similarity, phsc->freq) * phsc->value;
|
227
215
|
/* normalize */
|
@@ -230,8 +218,7 @@ static float phsc_score(FrtScorer *self)
|
|
230
218
|
phsc->norms[self->doc]);
|
231
219
|
}
|
232
220
|
|
233
|
-
static bool phsc_next(FrtScorer *self)
|
234
|
-
{
|
221
|
+
static bool phsc_next(FrtScorer *self) {
|
235
222
|
PhraseScorer *phsc = PhSc(self);
|
236
223
|
if (phsc->first_time) {
|
237
224
|
phsc_init(phsc);
|
@@ -243,8 +230,7 @@ static bool phsc_next(FrtScorer *self)
|
|
243
230
|
return phsc_do_next(self);
|
244
231
|
}
|
245
232
|
|
246
|
-
static bool phsc_skip_to(FrtScorer *self, int doc_num)
|
247
|
-
{
|
233
|
+
static bool phsc_skip_to(FrtScorer *self, int doc_num) {
|
248
234
|
PhraseScorer *phsc = PhSc(self);
|
249
235
|
int i;
|
250
236
|
for (i = phsc->pp_cnt - 1; i >= 0; i--) {
|
@@ -261,8 +247,7 @@ static bool phsc_skip_to(FrtScorer *self, int doc_num)
|
|
261
247
|
return phsc_do_next(self);
|
262
248
|
}
|
263
249
|
|
264
|
-
static FrtExplanation *phsc_explain(FrtScorer *self, int doc_num)
|
265
|
-
{
|
250
|
+
static FrtExplanation *phsc_explain(FrtScorer *self, int doc_num) {
|
266
251
|
PhraseScorer *phsc = PhSc(self);
|
267
252
|
float phrase_freq;
|
268
253
|
|
@@ -273,8 +258,7 @@ static FrtExplanation *phsc_explain(FrtScorer *self, int doc_num)
|
|
273
258
|
"tf(phrase_freq=%f)", phrase_freq);
|
274
259
|
}
|
275
260
|
|
276
|
-
static void phsc_destroy(FrtScorer *self)
|
277
|
-
{
|
261
|
+
static void phsc_destroy(FrtScorer *self) {
|
278
262
|
PhraseScorer *phsc = PhSc(self);
|
279
263
|
int i;
|
280
264
|
for (i = phsc->pp_cnt - 1; i >= 0; i--) {
|
@@ -289,8 +273,7 @@ static FrtScorer *phsc_new(FrtWeight *weight,
|
|
289
273
|
FrtPhrasePosition *positions, int pos_cnt,
|
290
274
|
FrtSimilarity *similarity,
|
291
275
|
frt_uchar *norms,
|
292
|
-
int slop)
|
293
|
-
{
|
276
|
+
int slop) {
|
294
277
|
int i;
|
295
278
|
FrtScorer *self = frt_scorer_new(PhraseScorer, similarity);
|
296
279
|
FrtHashSet *term_set = NULL;
|
@@ -342,8 +325,7 @@ static FrtScorer *phsc_new(FrtWeight *weight,
|
|
342
325
|
* ExactPhraseScorer
|
343
326
|
***************************************************************************/
|
344
327
|
|
345
|
-
static float ephsc_phrase_freq(FrtScorer *self)
|
346
|
-
{
|
328
|
+
static float ephsc_phrase_freq(FrtScorer *self) {
|
347
329
|
PhraseScorer *phsc = PhSc(self);
|
348
330
|
int i;
|
349
331
|
int pp_first_idx = 0;
|
@@ -387,8 +369,7 @@ static float ephsc_phrase_freq(FrtScorer *self)
|
|
387
369
|
static FrtScorer *exact_phrase_scorer_new(FrtWeight *weight,
|
388
370
|
FrtTermDocEnum **term_pos_enum,
|
389
371
|
FrtPhrasePosition *positions, int pp_cnt,
|
390
|
-
FrtSimilarity *similarity, frt_uchar *norms)
|
391
|
-
{
|
372
|
+
FrtSimilarity *similarity, frt_uchar *norms) {
|
392
373
|
FrtScorer *self = phsc_new(weight,
|
393
374
|
term_pos_enum,
|
394
375
|
positions,
|
@@ -407,8 +388,7 @@ static FrtScorer *exact_phrase_scorer_new(FrtWeight *weight,
|
|
407
388
|
|
408
389
|
static bool sphsc_check_repeats(PhPos *pp,
|
409
390
|
PhPos **positions,
|
410
|
-
const int p_cnt)
|
411
|
-
{
|
391
|
+
const int p_cnt) {
|
412
392
|
int j;
|
413
393
|
for (j = 0; j < p_cnt; j++) {
|
414
394
|
PhPos *ppj = positions[j];
|
@@ -432,8 +412,7 @@ static bool sphsc_check_repeats(PhPos *pp,
|
|
432
412
|
return true;
|
433
413
|
}
|
434
414
|
|
435
|
-
static float sphsc_phrase_freq(FrtScorer *self)
|
436
|
-
{
|
415
|
+
static float sphsc_phrase_freq(FrtScorer *self) {
|
437
416
|
PhraseScorer *phsc = PhSc(self);
|
438
417
|
PhPos *pp;
|
439
418
|
FrtPriorityQueue *pq = frt_pq_new(phsc->pp_cnt, (frt_lt_ft)&pp_less_than, NULL);
|
@@ -494,8 +473,7 @@ static FrtScorer *sloppy_phrase_scorer_new(FrtWeight *weight,
|
|
494
473
|
FrtTermDocEnum **term_pos_enum,
|
495
474
|
FrtPhrasePosition *positions,
|
496
475
|
int pp_cnt, FrtSimilarity *similarity,
|
497
|
-
int slop, frt_uchar *norms)
|
498
|
-
{
|
476
|
+
int slop, frt_uchar *norms) {
|
499
477
|
FrtScorer *self = phsc_new(weight,
|
500
478
|
term_pos_enum,
|
501
479
|
positions,
|
@@ -514,13 +492,11 @@ static FrtScorer *sloppy_phrase_scorer_new(FrtWeight *weight,
|
|
514
492
|
*
|
515
493
|
***************************************************************************/
|
516
494
|
|
517
|
-
static char *phw_to_s(FrtWeight *self)
|
518
|
-
{
|
495
|
+
static char *phw_to_s(FrtWeight *self) {
|
519
496
|
return frt_strfmt("PhraseWeight(%f)", self->value);
|
520
497
|
}
|
521
498
|
|
522
|
-
static FrtScorer *phw_scorer(FrtWeight *self, FrtIndexReader *ir)
|
523
|
-
{
|
499
|
+
static FrtScorer *phw_scorer(FrtWeight *self, FrtIndexReader *ir) {
|
524
500
|
int i;
|
525
501
|
FrtScorer *phsc = NULL;
|
526
502
|
FrtPhraseQuery *phq = PhQ(self->query);
|
@@ -561,8 +537,7 @@ static FrtScorer *phw_scorer(FrtWeight *self, FrtIndexReader *ir)
|
|
561
537
|
return phsc;
|
562
538
|
}
|
563
539
|
|
564
|
-
static FrtExplanation *phw_explain(FrtWeight *self, FrtIndexReader *ir, int doc_num)
|
565
|
-
{
|
540
|
+
static FrtExplanation *phw_explain(FrtWeight *self, FrtIndexReader *ir, int doc_num) {
|
566
541
|
FrtExplanation *expl;
|
567
542
|
FrtExplanation *idf_expl1;
|
568
543
|
FrtExplanation *idf_expl2;
|
@@ -659,16 +634,14 @@ static FrtExplanation *phw_explain(FrtWeight *self, FrtIndexReader *ir, int doc_
|
|
659
634
|
if (query_expl->value == 1.0) {
|
660
635
|
frt_expl_destroy(expl);
|
661
636
|
return field_expl;
|
662
|
-
}
|
663
|
-
else {
|
637
|
+
} else {
|
664
638
|
expl->value = (query_expl->value * field_expl->value);
|
665
639
|
frt_expl_add_detail(expl, field_expl);
|
666
640
|
return expl;
|
667
641
|
}
|
668
642
|
}
|
669
643
|
|
670
|
-
static FrtWeight *phw_new(FrtQuery *query, FrtSearcher *searcher)
|
671
|
-
{
|
644
|
+
static FrtWeight *phw_new(FrtQuery *query, FrtSearcher *searcher) {
|
672
645
|
FrtWeight *self = w_new(FrtWeight, query);
|
673
646
|
|
674
647
|
self->scorer = &phw_scorer;
|
@@ -690,8 +663,7 @@ static FrtWeight *phw_new(FrtQuery *query, FrtSearcher *searcher)
|
|
690
663
|
***************************************************************************/
|
691
664
|
|
692
665
|
/* ** TVPosEnum ** */
|
693
|
-
typedef struct TVPosEnum
|
694
|
-
{
|
666
|
+
typedef struct TVPosEnum {
|
695
667
|
int index;
|
696
668
|
int size;
|
697
669
|
int offset;
|
@@ -699,20 +671,17 @@ typedef struct TVPosEnum
|
|
699
671
|
int positions[1];
|
700
672
|
} TVPosEnum;
|
701
673
|
|
702
|
-
static bool tvpe_next(TVPosEnum *self)
|
703
|
-
{
|
674
|
+
static bool tvpe_next(TVPosEnum *self) {
|
704
675
|
if (++(self->index) < self->size) {
|
705
676
|
self->pos = self->positions[self->index] - self->offset;
|
706
677
|
return true;
|
707
|
-
}
|
708
|
-
else {
|
678
|
+
} else {
|
709
679
|
self->pos = -1;
|
710
680
|
return false;
|
711
681
|
}
|
712
682
|
}
|
713
683
|
|
714
|
-
static int tvpe_skip_to(TVPosEnum *self, int position)
|
715
|
-
{
|
684
|
+
static int tvpe_skip_to(TVPosEnum *self, int position) {
|
716
685
|
int i;
|
717
686
|
int search_pos = position + self->offset;
|
718
687
|
for (i = self->index + 1; i < self->size; i++) {
|
@@ -729,13 +698,11 @@ static int tvpe_skip_to(TVPosEnum *self, int position)
|
|
729
698
|
return true;
|
730
699
|
}
|
731
700
|
|
732
|
-
static bool tvpe_lt(TVPosEnum *tvpe1, TVPosEnum *tvpe2)
|
733
|
-
{
|
701
|
+
static bool tvpe_lt(TVPosEnum *tvpe1, TVPosEnum *tvpe2) {
|
734
702
|
return tvpe1->pos < tvpe2->pos;
|
735
703
|
}
|
736
704
|
|
737
|
-
static TVPosEnum *tvpe_new(int *positions, int size, int offset)
|
738
|
-
{
|
705
|
+
static TVPosEnum *tvpe_new(int *positions, int size, int offset) {
|
739
706
|
TVPosEnum *self = (TVPosEnum*)frt_emalloc(sizeof(TVPosEnum) + size*sizeof(int));
|
740
707
|
memcpy(self->positions, positions, size * sizeof(int));
|
741
708
|
self->size = size;
|
@@ -745,8 +712,7 @@ static TVPosEnum *tvpe_new(int *positions, int size, int offset)
|
|
745
712
|
return self;
|
746
713
|
}
|
747
714
|
|
748
|
-
static TVPosEnum *tvpe_new_merge(char **terms, int t_cnt, FrtTermVector *tv, int offset)
|
749
|
-
{
|
715
|
+
static TVPosEnum *tvpe_new_merge(char **terms, int t_cnt, FrtTermVector *tv, int offset) {
|
750
716
|
int i, total_positions = 0;
|
751
717
|
FrtPriorityQueue *tvpe_pq = frt_pq_new(t_cnt, (frt_lt_ft)tvpe_lt, &free);
|
752
718
|
TVPosEnum *self = NULL;
|
@@ -787,8 +753,7 @@ static TVPosEnum *tvpe_new_merge(char **terms, int t_cnt, FrtTermVector *tv, int
|
|
787
753
|
return self;
|
788
754
|
}
|
789
755
|
|
790
|
-
static TVPosEnum *get_tvpe(FrtTermVector *tv, char **terms, int t_cnt, int offset)
|
791
|
-
{
|
756
|
+
static TVPosEnum *get_tvpe(FrtTermVector *tv, char **terms, int t_cnt, int offset) {
|
792
757
|
TVPosEnum *tvpe = NULL;
|
793
758
|
if (t_cnt == 1) {
|
794
759
|
FrtTVTerm *tv_term = frt_tv_get_tv_term(tv, terms[0]);
|
@@ -801,8 +766,7 @@ static TVPosEnum *get_tvpe(FrtTermVector *tv, char **terms, int t_cnt, int offse
|
|
801
766
|
return tvpe;
|
802
767
|
}
|
803
768
|
|
804
|
-
static FrtMatchVector *phq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtTermVector *tv)
|
805
|
-
{
|
769
|
+
static FrtMatchVector *phq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtTermVector *tv) {
|
806
770
|
if (tv->field == PhQ(self)->field) {
|
807
771
|
const int pos_cnt = PhQ(self)->pos_cnt;
|
808
772
|
int i;
|
@@ -872,8 +836,7 @@ static FrtMatchVector *phq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtT
|
|
872
836
|
if (tvpe && ((i == 0 && tvpe_next(tvpe))
|
873
837
|
|| tvpe_skip_to(tvpe, tvpe_a[i-1]->pos))) {
|
874
838
|
tvpe_a[i] = tvpe;
|
875
|
-
}
|
876
|
-
else {
|
839
|
+
} else {
|
877
840
|
done = true;
|
878
841
|
free(tvpe);
|
879
842
|
break;
|
@@ -889,8 +852,7 @@ static FrtMatchVector *phq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtT
|
|
889
852
|
last = first;
|
890
853
|
first_index = FRT_NEXT_NUM(first_index, pos_cnt);
|
891
854
|
first = tvpe_a[first_index];
|
892
|
-
}
|
893
|
-
else {
|
855
|
+
} else {
|
894
856
|
done = true;
|
895
857
|
break;
|
896
858
|
}
|
@@ -917,8 +879,7 @@ static FrtMatchVector *phq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtT
|
|
917
879
|
|
918
880
|
#define PhQ_INIT_CAPA 4
|
919
881
|
|
920
|
-
static void phq_extract_terms(FrtQuery *self, FrtHashSet *term_set)
|
921
|
-
{
|
882
|
+
static void phq_extract_terms(FrtQuery *self, FrtHashSet *term_set) {
|
922
883
|
FrtPhraseQuery *phq = PhQ(self);
|
923
884
|
int i, j;
|
924
885
|
for (i = 0; i < phq->pos_cnt; i++) {
|
@@ -948,8 +909,7 @@ static char *phq_to_s(FrtQuery *self, ID default_field) {
|
|
948
909
|
if (phq->pos_cnt == 0) {
|
949
910
|
if (default_field != phq->field) {
|
950
911
|
return frt_strfmt("%s:\"\"", field_name);
|
951
|
-
}
|
952
|
-
else {
|
912
|
+
} else {
|
953
913
|
return frt_estrdup("\"\"");
|
954
914
|
}
|
955
915
|
}
|
@@ -988,8 +948,7 @@ static char *phq_to_s(FrtQuery *self, ID default_field) {
|
|
988
948
|
pos = positions[i].pos;
|
989
949
|
if (pos == last_pos) {
|
990
950
|
buffer[buf_index - 1] = '&';
|
991
|
-
}
|
992
|
-
else {
|
951
|
+
} else {
|
993
952
|
for (j = last_pos; j < pos - 1; j++) {
|
994
953
|
memcpy(buffer + buf_index, "<> ", 3);
|
995
954
|
buf_index += 3;
|
@@ -1026,8 +985,7 @@ static char *phq_to_s(FrtQuery *self, ID default_field) {
|
|
1026
985
|
return buffer;
|
1027
986
|
}
|
1028
987
|
|
1029
|
-
static void phq_destroy(FrtQuery *self)
|
1030
|
-
{
|
988
|
+
static void phq_destroy(FrtQuery *self) {
|
1031
989
|
FrtPhraseQuery *phq = PhQ(self);
|
1032
990
|
int i;
|
1033
991
|
for (i = 0; i < phq->pos_cnt; i++) {
|
@@ -1037,8 +995,7 @@ static void phq_destroy(FrtQuery *self)
|
|
1037
995
|
frt_q_destroy_i(self);
|
1038
996
|
}
|
1039
997
|
|
1040
|
-
static FrtQuery *phq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
1041
|
-
{
|
998
|
+
static FrtQuery *phq_rewrite(FrtQuery *self, FrtIndexReader *ir) {
|
1042
999
|
FrtPhraseQuery *phq = PhQ(self);
|
1043
1000
|
(void)ir;
|
1044
1001
|
if (phq->pos_cnt == 1) {
|
@@ -1049,8 +1006,7 @@ static FrtQuery *phq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
|
1049
1006
|
FrtQuery *tq = frt_tq_new(phq->field, terms[0]);
|
1050
1007
|
tq->boost = self->boost;
|
1051
1008
|
return tq;
|
1052
|
-
}
|
1053
|
-
else {
|
1009
|
+
} else {
|
1054
1010
|
FrtQuery *q = frt_multi_tq_new(phq->field);
|
1055
1011
|
int i;
|
1056
1012
|
for (i = 0; i < t_cnt; i++) {
|
@@ -1065,8 +1021,7 @@ static FrtQuery *phq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
|
1065
1021
|
}
|
1066
1022
|
}
|
1067
1023
|
|
1068
|
-
static unsigned long long phq_hash(FrtQuery *self)
|
1069
|
-
{
|
1024
|
+
static unsigned long long phq_hash(FrtQuery *self) {
|
1070
1025
|
int i, j;
|
1071
1026
|
FrtPhraseQuery *phq = PhQ(self);
|
1072
1027
|
unsigned long long hash = frt_str_hash(rb_id2name(phq->field));
|
@@ -1080,8 +1035,7 @@ static unsigned long long phq_hash(FrtQuery *self)
|
|
1080
1035
|
return (hash ^ phq->slop);
|
1081
1036
|
}
|
1082
1037
|
|
1083
|
-
static int phq_eq(FrtQuery *self, FrtQuery *o)
|
1084
|
-
{
|
1038
|
+
static int phq_eq(FrtQuery *self, FrtQuery *o) {
|
1085
1039
|
int i, j;
|
1086
1040
|
FrtPhraseQuery *phq1 = PhQ(self);
|
1087
1041
|
FrtPhraseQuery *phq2 = PhQ(o);
|
@@ -1134,8 +1088,7 @@ FrtQuery *frt_phq_new(ID field) {
|
|
1134
1088
|
return frt_phq_init(self, field);
|
1135
1089
|
}
|
1136
1090
|
|
1137
|
-
void frt_phq_add_term_abs(FrtQuery *self, const char *term, int position)
|
1138
|
-
{
|
1091
|
+
void frt_phq_add_term_abs(FrtQuery *self, const char *term, int position) {
|
1139
1092
|
FrtPhraseQuery *phq = PhQ(self);
|
1140
1093
|
int index = phq->pos_cnt;
|
1141
1094
|
FrtPhrasePosition *pp;
|
@@ -1150,33 +1103,28 @@ void frt_phq_add_term_abs(FrtQuery *self, const char *term, int position)
|
|
1150
1103
|
phq->pos_cnt++;
|
1151
1104
|
}
|
1152
1105
|
|
1153
|
-
void frt_phq_add_term(FrtQuery *self, const char *term, int pos_inc)
|
1154
|
-
{
|
1106
|
+
void frt_phq_add_term(FrtQuery *self, const char *term, int pos_inc) {
|
1155
1107
|
FrtPhraseQuery *phq = PhQ(self);
|
1156
1108
|
int position;
|
1157
1109
|
if (phq->pos_cnt == 0) {
|
1158
1110
|
position = 0;
|
1159
|
-
}
|
1160
|
-
else {
|
1111
|
+
} else {
|
1161
1112
|
position = phq->positions[phq->pos_cnt - 1].pos + pos_inc;
|
1162
1113
|
}
|
1163
1114
|
frt_phq_add_term_abs(self, term, position);
|
1164
1115
|
}
|
1165
1116
|
|
1166
|
-
void frt_phq_append_multi_term(FrtQuery *self, const char *term)
|
1167
|
-
{
|
1117
|
+
void frt_phq_append_multi_term(FrtQuery *self, const char *term) {
|
1168
1118
|
FrtPhraseQuery *phq = PhQ(self);
|
1169
1119
|
int index = phq->pos_cnt - 1;
|
1170
1120
|
|
1171
1121
|
if (index < 0) {
|
1172
1122
|
frt_phq_add_term(self, term, 0);
|
1173
|
-
}
|
1174
|
-
else {
|
1123
|
+
} else {
|
1175
1124
|
frt_ary_push(phq->positions[index].terms, frt_estrdup(term));
|
1176
1125
|
}
|
1177
1126
|
}
|
1178
1127
|
|
1179
|
-
void frt_phq_set_slop(FrtQuery *self, int slop)
|
1180
|
-
{
|
1128
|
+
void frt_phq_set_slop(FrtQuery *self, int slop) {
|
1181
1129
|
PhQ(self)->slop = slop;
|
1182
1130
|
}
|
@@ -60,23 +60,20 @@ static char *range_to_s(FrtRange *range, ID default_field, float boost) {
|
|
60
60
|
return buffer;
|
61
61
|
}
|
62
62
|
|
63
|
-
static void range_destroy(FrtRange *range)
|
64
|
-
{
|
63
|
+
static void range_destroy(FrtRange *range) {
|
65
64
|
free(range->lower_term);
|
66
65
|
free(range->upper_term);
|
67
66
|
free(range);
|
68
67
|
}
|
69
68
|
|
70
|
-
static unsigned long long range_hash(FrtRange *filt)
|
71
|
-
{
|
69
|
+
static unsigned long long range_hash(FrtRange *filt) {
|
72
70
|
return filt->include_lower | (filt->include_upper << 1)
|
73
71
|
| ((frt_str_hash(rb_id2name(filt->field))
|
74
72
|
^ (filt->lower_term ? frt_str_hash(filt->lower_term) : 0)
|
75
73
|
^ (filt->upper_term ? frt_str_hash(filt->upper_term) : 0)) << 2);
|
76
74
|
}
|
77
75
|
|
78
|
-
static int range_eq(FrtRange *filt, FrtRange *o)
|
79
|
-
{
|
76
|
+
static int range_eq(FrtRange *filt, FrtRange *o) {
|
80
77
|
if ((filt->lower_term && !o->lower_term) || (!filt->lower_term && o->lower_term)) { return false; }
|
81
78
|
if ((filt->upper_term && !o->upper_term) || (!filt->upper_term && o->upper_term)) { return false; }
|
82
79
|
return ((filt->field == o->field)
|
@@ -149,8 +146,7 @@ static FrtRange *trange_new(ID field, const char *lower_term, const char *upper_
|
|
149
146
|
FRT_RAISE(FRT_ARG_ERROR, "Upper bound must be greater than lower bound."
|
150
147
|
" numbers \"%lg\" < \"%lg\"", upper_num, lower_num);
|
151
148
|
}
|
152
|
-
}
|
153
|
-
else {
|
149
|
+
} else {
|
154
150
|
if (upper_term && lower_term &&
|
155
151
|
(strcmp(upper_term, lower_term) < 0)) {
|
156
152
|
FRT_RAISE(FRT_ARG_ERROR, "Upper bound must be greater than lower bound."
|
@@ -182,16 +178,14 @@ static void frt_rfilt_destroy_i(FrtFilter *filt) {
|
|
182
178
|
frt_filt_destroy_i(filt);
|
183
179
|
}
|
184
180
|
|
185
|
-
static char *frt_rfilt_to_s(FrtFilter *filt)
|
186
|
-
{
|
181
|
+
static char *frt_rfilt_to_s(FrtFilter *filt) {
|
187
182
|
char *rstr = range_to_s(RF(filt)->range, (ID)NULL, 1.0);
|
188
183
|
char *rfstr = frt_strfmt("RangeFilter< %s >", rstr);
|
189
184
|
free(rstr);
|
190
185
|
return rfstr;
|
191
186
|
}
|
192
187
|
|
193
|
-
static FrtBitVector *frt_rfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir)
|
194
|
-
{
|
188
|
+
static FrtBitVector *frt_rfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir) {
|
195
189
|
FrtBitVector *bv = frt_bv_new_capa(ir->max_doc(ir));
|
196
190
|
FrtRange *range = RF(filt)->range;
|
197
191
|
FrtFieldInfo *fi = frt_fis_get_field(ir->fis, range->field);
|