isomorfeus-ferret 0.17.3 → 0.17.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +48 -67
- data/ext/isomorfeus_ferret_ext/frb_search.c +47 -47
- data/ext/isomorfeus_ferret_ext/frt_document.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_filter.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +46 -62
- data/ext/isomorfeus_ferret_ext/frt_index.h +3 -3
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +48 -48
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +4 -4
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +10 -10
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +26 -26
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -12
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +144 -145
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +9 -9
- data/ext/isomorfeus_ferret_ext/frt_search.c +31 -31
- data/ext/isomorfeus_ferret_ext/frt_search.h +6 -6
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_sort.c +20 -20
- data/ext/isomorfeus_ferret_ext/test.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_filter.c +5 -6
- data/ext/isomorfeus_ferret_ext/test_index.c +30 -32
- data/ext/isomorfeus_ferret_ext/test_search.c +7 -7
- data/ext/isomorfeus_ferret_ext/test_sort.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_threading.c +1 -1
- data/lib/isomorfeus/ferret/index/index.rb +7 -7
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +12 -6
@@ -25,8 +25,8 @@ static bool fqsc_next(FrtScorer *self) {
|
|
25
25
|
FrtScorer *sub_sc = FQSc(self)->sub_scorer;
|
26
26
|
FrtBitVector *bv = FQSc(self)->bv;
|
27
27
|
while (sub_sc->next(sub_sc)) {
|
28
|
-
self->
|
29
|
-
if (frt_bv_get(bv, self->
|
28
|
+
self->doc_num = sub_sc->doc_num;
|
29
|
+
if (frt_bv_get(bv, self->doc_num)) return true;
|
30
30
|
}
|
31
31
|
return false;
|
32
32
|
}
|
@@ -36,8 +36,8 @@ static bool fqsc_skip_to(FrtScorer *self, int doc_num) {
|
|
36
36
|
FrtBitVector *bv = FQSc(self)->bv;
|
37
37
|
if (sub_sc->skip_to(sub_sc, doc_num)) {
|
38
38
|
do {
|
39
|
-
self->
|
40
|
-
if (frt_bv_get(bv, self->
|
39
|
+
self->doc_num = sub_sc->doc_num;
|
40
|
+
if (frt_bv_get(bv, self->doc_num)) {
|
41
41
|
return true;
|
42
42
|
}
|
43
43
|
} while (sub_sc->next(sub_sc));
|
@@ -10,10 +10,10 @@
|
|
10
10
|
#define MASc(scorer) ((MatchAllScorer *)(scorer))
|
11
11
|
|
12
12
|
typedef struct MatchAllScorer {
|
13
|
-
FrtScorer
|
14
|
-
FrtIndexReader
|
15
|
-
int
|
16
|
-
float
|
13
|
+
FrtScorer super;
|
14
|
+
FrtIndexReader *ir;
|
15
|
+
int max_doc_num;
|
16
|
+
float score;
|
17
17
|
} MatchAllScorer;
|
18
18
|
|
19
19
|
static float masc_score(FrtScorer *self) {
|
@@ -21,9 +21,9 @@ static float masc_score(FrtScorer *self) {
|
|
21
21
|
}
|
22
22
|
|
23
23
|
static bool masc_next(FrtScorer *self) {
|
24
|
-
while (self->
|
25
|
-
self->
|
26
|
-
if (!MASc(self)->ir->is_deleted(MASc(self)->ir, self->
|
24
|
+
while (self->doc_num < (MASc(self)->max_doc_num - 1)) {
|
25
|
+
self->doc_num++;
|
26
|
+
if (!MASc(self)->ir->is_deleted(MASc(self)->ir, self->doc_num)) {
|
27
27
|
return true;
|
28
28
|
}
|
29
29
|
}
|
@@ -31,7 +31,7 @@ static bool masc_next(FrtScorer *self) {
|
|
31
31
|
}
|
32
32
|
|
33
33
|
static bool masc_skip_to(FrtScorer *self, int doc_num) {
|
34
|
-
self->
|
34
|
+
self->doc_num = doc_num - 1;
|
35
35
|
return masc_next(self);
|
36
36
|
}
|
37
37
|
|
@@ -50,9 +50,9 @@ static FrtScorer *masc_new(FrtWeight *weight, FrtIndexReader *ir) {
|
|
50
50
|
FrtScorer *self = frt_scorer_new(MatchAllScorer, weight->similarity);
|
51
51
|
MASc(self)->ir = ir;
|
52
52
|
FRT_REF(ir);
|
53
|
-
MASc(self)->
|
53
|
+
MASc(self)->max_doc_num = ir->max_doc_num(ir);
|
54
54
|
MASc(self)->score = weight->value;
|
55
|
-
self->
|
55
|
+
self->doc_num = -1;
|
56
56
|
self->score = &masc_score;
|
57
57
|
self->next = &masc_next;
|
58
58
|
self->skip_to = &masc_skip_to;
|
@@ -54,33 +54,33 @@ static BoostedTerm *boosted_term_new(const char *term, float boost) {
|
|
54
54
|
#define TDE_READ_SIZE 16
|
55
55
|
|
56
56
|
typedef struct TermDocEnumWrapper {
|
57
|
-
const char
|
57
|
+
const char *term;
|
58
58
|
FrtTermDocEnum *tde;
|
59
|
-
float
|
60
|
-
int
|
61
|
-
int
|
62
|
-
int
|
63
|
-
int
|
64
|
-
int
|
65
|
-
int
|
59
|
+
float boost;
|
60
|
+
int doc_num;
|
61
|
+
int freq;
|
62
|
+
int doc_nums[TDE_READ_SIZE];
|
63
|
+
int freqs[TDE_READ_SIZE];
|
64
|
+
int pointer;
|
65
|
+
int pointer_max;
|
66
66
|
} TermDocEnumWrapper;
|
67
67
|
|
68
68
|
static bool tdew_less_than(const TermDocEnumWrapper *tdew1, const TermDocEnumWrapper *tdew2) {
|
69
|
-
return (tdew1->
|
69
|
+
return (tdew1->doc_num < tdew2->doc_num);
|
70
70
|
}
|
71
71
|
|
72
72
|
static bool tdew_next(TermDocEnumWrapper *self) {
|
73
73
|
self->pointer++;
|
74
74
|
if (self->pointer >= self->pointer_max) {
|
75
75
|
/* refill buffer */
|
76
|
-
self->pointer_max = self->tde->read(self->tde, self->
|
76
|
+
self->pointer_max = self->tde->read(self->tde, self->doc_nums, self->freqs, TDE_READ_SIZE);
|
77
77
|
if (self->pointer_max != 0) {
|
78
78
|
self->pointer = 0;
|
79
79
|
} else {
|
80
80
|
return false;
|
81
81
|
}
|
82
82
|
}
|
83
|
-
self->
|
83
|
+
self->doc_num = self->doc_nums[self->pointer];
|
84
84
|
self->freq = self->freqs[self->pointer];
|
85
85
|
return true;
|
86
86
|
}
|
@@ -89,8 +89,8 @@ static bool tdew_skip_to(TermDocEnumWrapper *self, int doc_num) {
|
|
89
89
|
FrtTermDocEnum *tde = self->tde;
|
90
90
|
|
91
91
|
while (++(self->pointer) < self->pointer_max) {
|
92
|
-
if (self->
|
93
|
-
self->
|
92
|
+
if (self->doc_nums[self->pointer] >= doc_num) {
|
93
|
+
self->doc_num = self->doc_nums[self->pointer];
|
94
94
|
self->freq = self->freqs[self->pointer];
|
95
95
|
return true;
|
96
96
|
}
|
@@ -100,7 +100,7 @@ static bool tdew_skip_to(TermDocEnumWrapper *self, int doc_num) {
|
|
100
100
|
if (tde->skip_to(tde, doc_num)) {
|
101
101
|
self->pointer_max = 1;
|
102
102
|
self->pointer = 0;
|
103
|
-
self->
|
103
|
+
self->doc_nums[0] = self->doc_num = tde->doc_num(tde);
|
104
104
|
self->freqs[0] = self->freq = tde->freq(tde);
|
105
105
|
return true;
|
106
106
|
} else {
|
@@ -118,7 +118,7 @@ static TermDocEnumWrapper *tdew_new(const char *term, FrtTermDocEnum *tde, float
|
|
118
118
|
self->term = term;
|
119
119
|
self->tde = tde;
|
120
120
|
self->boost = boost;
|
121
|
-
self->
|
121
|
+
self->doc_num = -1;
|
122
122
|
return self;
|
123
123
|
}
|
124
124
|
|
@@ -144,11 +144,11 @@ typedef struct MultiTermScorer {
|
|
144
144
|
|
145
145
|
static float multi_tsc_score(FrtScorer *self) {
|
146
146
|
return MTSc(self)->total_score * MTSc(self)->weight_value
|
147
|
-
* frt_sim_decode_norm(self->similarity, MTSc(self)->norms[self->
|
147
|
+
* frt_sim_decode_norm(self->similarity, MTSc(self)->norms[self->doc_num]);
|
148
148
|
}
|
149
149
|
|
150
150
|
static bool multi_tsc_next(FrtScorer *self) {
|
151
|
-
int
|
151
|
+
int curr_doc_num;
|
152
152
|
float total_score = 0.0f;
|
153
153
|
TermDocEnumWrapper *tdew;
|
154
154
|
MultiTermScorer *mtsc = MTSc(self);
|
@@ -170,7 +170,7 @@ static bool multi_tsc_next(FrtScorer *self) {
|
|
170
170
|
return false;
|
171
171
|
}
|
172
172
|
|
173
|
-
self->
|
173
|
+
self->doc_num = curr_doc_num = tdew->doc_num;
|
174
174
|
do {
|
175
175
|
int freq = tdew->freq;
|
176
176
|
if (freq < SCORE_CACHE_SIZE) {
|
@@ -186,7 +186,7 @@ static bool multi_tsc_next(FrtScorer *self) {
|
|
186
186
|
}
|
187
187
|
|
188
188
|
} while (((tdew = (TermDocEnumWrapper *)frt_pq_top(tdew_pq)) != NULL)
|
189
|
-
&& tdew->
|
189
|
+
&& tdew->doc_num == curr_doc_num);
|
190
190
|
mtsc->total_score = total_score;
|
191
191
|
return true;
|
192
192
|
}
|
@@ -207,11 +207,11 @@ static bool multi_tsc_advance_to(FrtScorer *self, int target_doc_num) {
|
|
207
207
|
MTSc(self)->tdew_pq = tdew_pq;
|
208
208
|
}
|
209
209
|
if (tdew_pq->size == 0) {
|
210
|
-
self->
|
210
|
+
self->doc_num = -1;
|
211
211
|
return false;
|
212
212
|
}
|
213
213
|
while ((tdew = (TermDocEnumWrapper *)frt_pq_top(tdew_pq)) != NULL
|
214
|
-
&& (target_doc_num > tdew->
|
214
|
+
&& (target_doc_num > tdew->doc_num)) {
|
215
215
|
if (tdew_skip_to(tdew, target_doc_num)) {
|
216
216
|
frt_pq_down(tdew_pq);
|
217
217
|
} else {
|
@@ -231,11 +231,11 @@ static FrtExplanation *multi_tsc_explain(FrtScorer *self, int doc_num) {
|
|
231
231
|
TermDocEnumWrapper *tdew;
|
232
232
|
|
233
233
|
if (multi_tsc_advance_to(self, doc_num) &&
|
234
|
-
(tdew = (TermDocEnumWrapper *)frt_pq_top(mtsc->tdew_pq))->
|
234
|
+
(tdew = (TermDocEnumWrapper *)frt_pq_top(mtsc->tdew_pq))->doc_num == doc_num) {
|
235
235
|
|
236
236
|
FrtPriorityQueue *tdew_pq = MTSc(self)->tdew_pq;
|
237
237
|
FrtExplanation *expl = frt_expl_new(0.0f, "The sum of:");
|
238
|
-
int
|
238
|
+
int curr_doc_num = self->doc_num = tdew->doc_num;
|
239
239
|
float total_score = 0.0f;
|
240
240
|
|
241
241
|
do {
|
@@ -256,7 +256,7 @@ static FrtExplanation *multi_tsc_explain(FrtScorer *self, int doc_num) {
|
|
256
256
|
}
|
257
257
|
|
258
258
|
} while (((tdew = (TermDocEnumWrapper *)frt_pq_top(tdew_pq)) != NULL)
|
259
|
-
&& tdew->
|
259
|
+
&& tdew->doc_num == curr_doc_num);
|
260
260
|
expl->value = total_score;
|
261
261
|
return expl;
|
262
262
|
} else {
|
@@ -444,7 +444,7 @@ static FrtExplanation *multi_tw_explain(FrtWeight *self, FrtIndexReader *ir, int
|
|
444
444
|
static FrtWeight *multi_tw_new(FrtQuery *query, FrtSearcher *searcher) {
|
445
445
|
int i;
|
446
446
|
int doc_freq = 0;
|
447
|
-
FrtWeight *self
|
447
|
+
FrtWeight *self = w_new(FrtWeight, query);
|
448
448
|
FrtPriorityQueue *bt_pq = MTQ(query)->boosted_terms;
|
449
449
|
|
450
450
|
self->scorer = &multi_tw_scorer;
|
@@ -460,7 +460,7 @@ static FrtWeight *multi_tw_new(FrtQuery *query, FrtSearcher *searcher) {
|
|
460
460
|
((BoostedTerm *)bt_pq->heap[i])->term);
|
461
461
|
}
|
462
462
|
self->idf += frt_sim_idf(self->similarity, doc_freq,
|
463
|
-
searcher->
|
463
|
+
searcher->max_doc_num(searcher));
|
464
464
|
|
465
465
|
return self;
|
466
466
|
}
|
@@ -42,7 +42,7 @@ typedef struct PhPos {
|
|
42
42
|
FrtTermDocEnum *tpe;
|
43
43
|
int offset;
|
44
44
|
int count;
|
45
|
-
int
|
45
|
+
int doc_num;
|
46
46
|
int position;
|
47
47
|
} PhPos;
|
48
48
|
|
@@ -53,10 +53,10 @@ static bool pp_next(PhPos *self) {
|
|
53
53
|
if (!tpe->next(tpe)) {
|
54
54
|
tpe->close(tpe); /* close stream */
|
55
55
|
self->tpe = NULL;
|
56
|
-
self->
|
56
|
+
self->doc_num = INT_MAX; /* sentinel value */
|
57
57
|
return false;
|
58
58
|
}
|
59
|
-
self->
|
59
|
+
self->doc_num = tpe->doc_num(tpe);
|
60
60
|
self->position = 0;
|
61
61
|
return true;
|
62
62
|
}
|
@@ -68,10 +68,10 @@ static bool pp_skip_to(PhPos *self, int doc_num) {
|
|
68
68
|
if (!tpe->skip_to(tpe, doc_num)) {
|
69
69
|
tpe->close(tpe); /* close stream */
|
70
70
|
self->tpe = NULL;
|
71
|
-
self->
|
71
|
+
self->doc_num = INT_MAX; /* sentinel value */
|
72
72
|
return false;
|
73
73
|
}
|
74
|
-
self->
|
74
|
+
self->doc_num = tpe->doc_num(tpe);
|
75
75
|
self->position = 0;
|
76
76
|
return true;
|
77
77
|
}
|
@@ -95,7 +95,7 @@ static bool pp_first_position(PhPos *self) {
|
|
95
95
|
|
96
96
|
#define PP_pp(p) (*(PhPos **)p)
|
97
97
|
static int pp_cmp(const void *const p1, const void *const p2) {
|
98
|
-
int cmp = PP_pp(p1)->
|
98
|
+
int cmp = PP_pp(p1)->doc_num - PP_pp(p2)->doc_num;
|
99
99
|
if (cmp == 0) {
|
100
100
|
cmp = PP_pp(p1)->position - PP_pp(p2)->position;
|
101
101
|
if (cmp == 0) {
|
@@ -128,7 +128,7 @@ static PhPos *pp_new(FrtTermDocEnum *tpe, int offset) {
|
|
128
128
|
PhPos *self = FRT_ALLOC(PhPos);
|
129
129
|
|
130
130
|
self->tpe = tpe;
|
131
|
-
self->count = self->
|
131
|
+
self->count = self->doc_num = self->position = -1;
|
132
132
|
self->offset = offset;
|
133
133
|
|
134
134
|
return self;
|
@@ -179,9 +179,9 @@ static bool phsc_do_next(FrtScorer *self) {
|
|
179
179
|
PhPos *last = phrase_positions[FRT_PREV_NUM(pp_first_idx, pp_cnt)];
|
180
180
|
while (phsc->more) {
|
181
181
|
/* find doc with all the terms */
|
182
|
-
while (phsc->more && first->
|
182
|
+
while (phsc->more && first->doc_num < last->doc_num) {
|
183
183
|
/* skip first upto last */
|
184
|
-
phsc->more = pp_skip_to(first, last->
|
184
|
+
phsc->more = pp_skip_to(first, last->doc_num);
|
185
185
|
last = first;
|
186
186
|
pp_first_idx = FRT_NEXT_NUM(pp_first_idx, pp_cnt);
|
187
187
|
first = phrase_positions[pp_first_idx];
|
@@ -200,7 +200,7 @@ static bool phsc_do_next(FrtScorer *self) {
|
|
200
200
|
last = phrase_positions[FRT_PREV_NUM(pp_first_idx, pp_cnt)];
|
201
201
|
phsc->more = pp_next(last); /* trigger further scanning */
|
202
202
|
} else {
|
203
|
-
self->
|
203
|
+
self->doc_num = first->doc_num;
|
204
204
|
return true; /* found a match */
|
205
205
|
}
|
206
206
|
|
@@ -215,7 +215,7 @@ static float phsc_score(FrtScorer *self) {
|
|
215
215
|
/* normalize */
|
216
216
|
return raw_score * frt_sim_decode_norm(
|
217
217
|
self->similarity,
|
218
|
-
phsc->norms[self->
|
218
|
+
phsc->norms[self->doc_num]);
|
219
219
|
}
|
220
220
|
|
221
221
|
static bool phsc_next(FrtScorer *self) {
|
@@ -253,7 +253,7 @@ static FrtExplanation *phsc_explain(FrtScorer *self, int doc_num) {
|
|
253
253
|
|
254
254
|
phsc_skip_to(self, doc_num);
|
255
255
|
|
256
|
-
phrase_freq = (self->
|
256
|
+
phrase_freq = (self->doc_num == doc_num) ? phsc->freq : 0.0f;
|
257
257
|
return frt_expl_new(frt_sim_tf(self->similarity, phrase_freq),
|
258
258
|
"tf(phrase_freq=%f)", phrase_freq);
|
259
259
|
}
|
@@ -186,7 +186,7 @@ static char *frt_rfilt_to_s(FrtFilter *filt) {
|
|
186
186
|
}
|
187
187
|
|
188
188
|
static FrtBitVector *frt_rfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir) {
|
189
|
-
FrtBitVector *bv = frt_bv_new_capa(ir->
|
189
|
+
FrtBitVector *bv = frt_bv_new_capa(ir->max_doc_num(ir));
|
190
190
|
FrtRange *range = RF(filt)->range;
|
191
191
|
FrtFieldInfo *fi = frt_fis_get_field(ir->fis, range->field);
|
192
192
|
/* the field info exists we need to add docs to the bit vector, otherwise
|
@@ -318,7 +318,7 @@ static FrtBitVector *frt_trfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir) {
|
|
318
318
|
if ((!lt || (sscanf(lt, "%lg%n", &lnum, &len) && (int)strlen(lt) == len)) &&
|
319
319
|
(!ut || (sscanf(ut, "%lg%n", &unum, &len) && (int)strlen(ut) == len)))
|
320
320
|
{
|
321
|
-
FrtBitVector *bv = frt_bv_new_capa(ir->
|
321
|
+
FrtBitVector *bv = frt_bv_new_capa(ir->max_doc_num(ir));
|
322
322
|
FrtFieldInfo *fi = frt_fis_get_field(ir->fis, range->field);
|
323
323
|
/* the field info exists we need to add docs to the bit vector,
|
324
324
|
* otherwise we just return an empty bit vector */
|