isomorfeus-ferret 0.17.3 → 0.17.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +48 -67
- data/ext/isomorfeus_ferret_ext/frb_search.c +47 -47
- data/ext/isomorfeus_ferret_ext/frt_document.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_filter.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +46 -62
- data/ext/isomorfeus_ferret_ext/frt_index.h +3 -3
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +48 -48
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +4 -4
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +10 -10
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +26 -26
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -12
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +144 -145
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +9 -9
- data/ext/isomorfeus_ferret_ext/frt_search.c +31 -31
- data/ext/isomorfeus_ferret_ext/frt_search.h +6 -6
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_sort.c +20 -20
- data/ext/isomorfeus_ferret_ext/test.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_filter.c +5 -6
- data/ext/isomorfeus_ferret_ext/test_index.c +30 -32
- data/ext/isomorfeus_ferret_ext/test_search.c +7 -7
- data/ext/isomorfeus_ferret_ext/test_sort.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_threading.c +1 -1
- data/lib/isomorfeus/ferret/index/index.rb +7 -7
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +12 -6
@@ -62,11 +62,11 @@ static FrtMatchVector *mv_to_term_mv(FrtMatchVector *term_mv, FrtMatchVector *fu
|
|
62
62
|
|
63
63
|
typedef struct TVTermDocEnum {
|
64
64
|
FrtTermDocEnum super;
|
65
|
-
int
|
66
|
-
int
|
67
|
-
int
|
68
|
-
int
|
69
|
-
FrtTermVector
|
65
|
+
int doc_num;
|
66
|
+
int index;
|
67
|
+
int freq;
|
68
|
+
int *positions;
|
69
|
+
FrtTermVector *tv;
|
70
70
|
} TVTermDocEnum;
|
71
71
|
|
72
72
|
static void tv_tde_seek(FrtTermDocEnum *tde, int field_num, const char *term) {
|
@@ -74,31 +74,31 @@ static void tv_tde_seek(FrtTermDocEnum *tde, int field_num, const char *term) {
|
|
74
74
|
FrtTVTerm *tv_term = frt_tv_get_tv_term(tv_tde->tv, term);
|
75
75
|
(void)field_num;
|
76
76
|
if (tv_term) {
|
77
|
-
tv_tde->
|
77
|
+
tv_tde->doc_num = -1;
|
78
78
|
tv_tde->index = 0;
|
79
79
|
tv_tde->freq = tv_term->freq;
|
80
80
|
tv_tde->positions = tv_term->positions;
|
81
81
|
} else {
|
82
|
-
tv_tde->
|
82
|
+
tv_tde->doc_num = INT_MAX;
|
83
83
|
}
|
84
84
|
}
|
85
85
|
|
86
86
|
static bool tv_tde_next(FrtTermDocEnum *tde) {
|
87
|
-
if (TV_TDE(tde)->
|
88
|
-
TV_TDE(tde)->
|
87
|
+
if (TV_TDE(tde)->doc_num == -1) {
|
88
|
+
TV_TDE(tde)->doc_num = 0;
|
89
89
|
return true;
|
90
90
|
} else {
|
91
|
-
TV_TDE(tde)->
|
91
|
+
TV_TDE(tde)->doc_num = INT_MAX;
|
92
92
|
return false;
|
93
93
|
}
|
94
94
|
}
|
95
95
|
|
96
96
|
static bool tv_tde_skip_to(FrtTermDocEnum *tde, int doc_num) {
|
97
97
|
if (doc_num == 0) {
|
98
|
-
TV_TDE(tde)->
|
98
|
+
TV_TDE(tde)->doc_num = 0;
|
99
99
|
return true;
|
100
100
|
} else {
|
101
|
-
TV_TDE(tde)->
|
101
|
+
TV_TDE(tde)->doc_num = INT_MAX;
|
102
102
|
return false;
|
103
103
|
}
|
104
104
|
}
|
@@ -112,7 +112,7 @@ static int tv_tde_freq(FrtTermDocEnum *tde) {
|
|
112
112
|
}
|
113
113
|
|
114
114
|
static int tv_tde_doc_num(FrtTermDocEnum *tde) {
|
115
|
-
return TV_TDE(tde)->
|
115
|
+
return TV_TDE(tde)->doc_num;
|
116
116
|
}
|
117
117
|
|
118
118
|
static FrtTermDocEnum *spanq_ir_term_positions(FrtIndexReader *ir) {
|
@@ -167,12 +167,12 @@ static FrtMatchVector *spanq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, Fr
|
|
167
167
|
|
168
168
|
#define SpSc(scorer) ((SpanScorer *)(scorer))
|
169
169
|
typedef struct SpanScorer {
|
170
|
-
FrtScorer
|
171
|
-
FrtIndexReader
|
172
|
-
FrtSpanEnum
|
173
|
-
FrtSimilarity
|
174
|
-
frt_uchar
|
175
|
-
FrtWeight
|
170
|
+
FrtScorer super;
|
171
|
+
FrtIndexReader *ir;
|
172
|
+
FrtSpanEnum *spans;
|
173
|
+
FrtSimilarity *sim;
|
174
|
+
frt_uchar *norms;
|
175
|
+
FrtWeight *weight;
|
176
176
|
float value;
|
177
177
|
float freq;
|
178
178
|
bool first_time : 1;
|
@@ -184,7 +184,7 @@ static float spansc_score(FrtScorer *self) {
|
|
184
184
|
float raw = frt_sim_tf(spansc->sim, spansc->freq) * spansc->value;
|
185
185
|
|
186
186
|
/* normalize */
|
187
|
-
return raw * frt_sim_decode_norm(self->similarity, spansc->norms[self->
|
187
|
+
return raw * frt_sim_decode_norm(self->similarity, spansc->norms[self->doc_num]);
|
188
188
|
}
|
189
189
|
|
190
190
|
static bool spansc_next(FrtScorer *self) {
|
@@ -202,13 +202,13 @@ static bool spansc_next(FrtScorer *self) {
|
|
202
202
|
}
|
203
203
|
|
204
204
|
spansc->freq = 0.0f;
|
205
|
-
self->
|
205
|
+
self->doc_num = se->doc_num(se);
|
206
206
|
|
207
207
|
do {
|
208
208
|
match_length = se->end(se) - se->start(se);
|
209
209
|
spansc->freq += frt_sim_sloppy_freq(spansc->sim, match_length);
|
210
210
|
spansc->more = se->next(se);
|
211
|
-
} while (spansc->more && (self->
|
211
|
+
} while (spansc->more && (self->doc_num == se->doc_num(se)));
|
212
212
|
|
213
213
|
return (spansc->more || (spansc->freq != 0.0));
|
214
214
|
}
|
@@ -223,9 +223,9 @@ static bool spansc_skip_to(FrtScorer *self, int target) {
|
|
223
223
|
}
|
224
224
|
|
225
225
|
spansc->freq = 0.0f;
|
226
|
-
self->
|
226
|
+
self->doc_num = se->doc_num(se);
|
227
227
|
|
228
|
-
while (spansc->more && (se->
|
228
|
+
while (spansc->more && (se->doc_num(se) == target)) {
|
229
229
|
spansc->freq += frt_sim_sloppy_freq(spansc->sim, se->end(se) - se->start(se));
|
230
230
|
spansc->more = se->next(se);
|
231
231
|
if (spansc->first_time) {
|
@@ -241,7 +241,7 @@ static FrtExplanation *spansc_explain(FrtScorer *self, int target) {
|
|
241
241
|
SpanScorer *spansc = SpSc(self);
|
242
242
|
float phrase_freq;
|
243
243
|
self->skip_to(self, target);
|
244
|
-
phrase_freq = (self->
|
244
|
+
phrase_freq = (self->doc_num == target) ? spansc->freq : (float)0.0;
|
245
245
|
|
246
246
|
tf_explanation = frt_expl_new(frt_sim_tf(self->similarity, phrase_freq),
|
247
247
|
"tf(phrase_freq(%f)", phrase_freq);
|
@@ -264,20 +264,20 @@ static FrtScorer *spansc_new(FrtWeight *weight, FrtIndexReader *ir) {
|
|
264
264
|
FrtQuery *spanq = weight->query;
|
265
265
|
self = frt_scorer_new(SpanScorer, weight->similarity);
|
266
266
|
|
267
|
-
SpSc(self)->first_time
|
268
|
-
SpSc(self)->more
|
269
|
-
SpSc(self)->spans
|
270
|
-
SpSc(self)->sim
|
271
|
-
SpSc(self)->norms
|
272
|
-
SpSc(self)->weight
|
273
|
-
SpSc(self)->value
|
274
|
-
SpSc(self)->freq
|
275
|
-
|
276
|
-
self->score
|
277
|
-
self->next
|
278
|
-
self->skip_to
|
279
|
-
self->explain
|
280
|
-
self->destroy
|
267
|
+
SpSc(self)->first_time = true;
|
268
|
+
SpSc(self)->more = true;
|
269
|
+
SpSc(self)->spans = SpQ(spanq)->get_spans(spanq, ir);
|
270
|
+
SpSc(self)->sim = weight->similarity;
|
271
|
+
SpSc(self)->norms = ir->get_norms(ir, field_num);
|
272
|
+
SpSc(self)->weight = weight;
|
273
|
+
SpSc(self)->value = weight->value;
|
274
|
+
SpSc(self)->freq = 0.0f;
|
275
|
+
|
276
|
+
self->score = &spansc_score;
|
277
|
+
self->next = &spansc_next;
|
278
|
+
self->skip_to = &spansc_skip_to;
|
279
|
+
self->explain = &spansc_explain;
|
280
|
+
self->destroy = &spansc_destroy;
|
281
281
|
}
|
282
282
|
return self;
|
283
283
|
}
|
@@ -290,25 +290,24 @@ static FrtScorer *spansc_new(FrtWeight *weight, FrtIndexReader *ir) {
|
|
290
290
|
#define SpTQ(query) ((FrtSpanTermQuery *)(query))
|
291
291
|
|
292
292
|
typedef struct SpanTermEnum {
|
293
|
-
FrtSpanEnum
|
293
|
+
FrtSpanEnum super;
|
294
294
|
FrtTermDocEnum *positions;
|
295
|
-
int
|
296
|
-
int
|
297
|
-
int
|
298
|
-
int
|
295
|
+
int position;
|
296
|
+
int doc_num;
|
297
|
+
int count;
|
298
|
+
int freq;
|
299
299
|
} SpanTermEnum;
|
300
300
|
|
301
|
-
|
302
301
|
static bool spante_next(FrtSpanEnum *self) {
|
303
302
|
SpanTermEnum *ste = SpTEn(self);
|
304
303
|
FrtTermDocEnum *tde = ste->positions;
|
305
304
|
|
306
305
|
if (ste->count == ste->freq) {
|
307
306
|
if (! tde->next(tde)) {
|
308
|
-
ste->
|
307
|
+
ste->doc_num = INT_MAX;
|
309
308
|
return false;
|
310
309
|
}
|
311
|
-
ste->
|
310
|
+
ste->doc_num = tde->doc_num(tde);
|
312
311
|
ste->freq = tde->freq(tde);
|
313
312
|
ste->count = 0;
|
314
313
|
}
|
@@ -330,11 +329,11 @@ static bool spante_skip_to(FrtSpanEnum *self, int target) {
|
|
330
329
|
*/
|
331
330
|
|
332
331
|
if (! tde->skip_to(tde, target)) {
|
333
|
-
ste->
|
332
|
+
ste->doc_num = INT_MAX;
|
334
333
|
return false;
|
335
334
|
}
|
336
335
|
|
337
|
-
ste->
|
336
|
+
ste->doc_num = tde->doc_num(tde);
|
338
337
|
ste->freq = tde->freq(tde);
|
339
338
|
ste->count = 0;
|
340
339
|
|
@@ -343,8 +342,8 @@ static bool spante_skip_to(FrtSpanEnum *self, int target) {
|
|
343
342
|
return true;
|
344
343
|
}
|
345
344
|
|
346
|
-
static int
|
347
|
-
return SpTEn(self)->
|
345
|
+
static int spante_doc_num(FrtSpanEnum *self) {
|
346
|
+
return SpTEn(self)->doc_num;
|
348
347
|
}
|
349
348
|
|
350
349
|
static int spante_start(FrtSpanEnum *self) {
|
@@ -362,14 +361,14 @@ static char *spante_to_s(FrtSpanEnum *self) {
|
|
362
361
|
int pos;
|
363
362
|
char *str = FRT_ALLOC_N(char, len + 40);
|
364
363
|
|
365
|
-
if (self->
|
364
|
+
if (self->doc_num(self) < 0) {
|
366
365
|
sprintf(pos_str, "START");
|
367
366
|
} else {
|
368
|
-
if (self->
|
367
|
+
if (self->doc_num(self) == INT_MAX) {
|
369
368
|
sprintf(pos_str, "END");
|
370
369
|
} else {
|
371
370
|
pos = SpTEn(self)->position;
|
372
|
-
sprintf(pos_str, "%d", self->
|
371
|
+
sprintf(pos_str, "%d", self->doc_num(self) - pos);
|
373
372
|
}
|
374
373
|
}
|
375
374
|
sprintf(str, "SpanTermEnum(%s)@%s", query_str, pos_str);
|
@@ -387,21 +386,21 @@ static FrtSpanEnum *spante_new(FrtQuery *query, FrtIndexReader *ir) {
|
|
387
386
|
char *term = SpTQ(query)->term;
|
388
387
|
FrtSpanEnum *self = (FrtSpanEnum *)FRT_ALLOC(SpanTermEnum);
|
389
388
|
|
390
|
-
SpTEn(self)->positions
|
389
|
+
SpTEn(self)->positions = frt_ir_term_positions_for(ir, SpQ(query)->field,
|
391
390
|
term);
|
392
|
-
SpTEn(self)->position
|
393
|
-
SpTEn(self)->
|
394
|
-
SpTEn(self)->count
|
395
|
-
SpTEn(self)->freq
|
396
|
-
|
397
|
-
self->query
|
398
|
-
self->next
|
399
|
-
self->skip_to
|
400
|
-
self->
|
401
|
-
self->start
|
402
|
-
self->end
|
403
|
-
self->destroy
|
404
|
-
self->to_s
|
391
|
+
SpTEn(self)->position = -1;
|
392
|
+
SpTEn(self)->doc_num = -1;
|
393
|
+
SpTEn(self)->count = 0;
|
394
|
+
SpTEn(self)->freq = 0;
|
395
|
+
|
396
|
+
self->query = query;
|
397
|
+
self->next = &spante_next;
|
398
|
+
self->skip_to = &spante_skip_to;
|
399
|
+
self->doc_num = &spante_doc_num;
|
400
|
+
self->start = &spante_start;
|
401
|
+
self->end = &spante_end;
|
402
|
+
self->destroy = &spante_destroy;
|
403
|
+
self->to_s = &spante_to_s;
|
405
404
|
|
406
405
|
return self;
|
407
406
|
}
|
@@ -414,23 +413,23 @@ static FrtSpanEnum *spante_new(FrtQuery *query, FrtIndexReader *ir) {
|
|
414
413
|
#define TPE_READ_SIZE 16
|
415
414
|
|
416
415
|
typedef struct TermPosEnumWrapper {
|
417
|
-
const char
|
416
|
+
const char *term;
|
418
417
|
FrtTermDocEnum *tpe;
|
419
|
-
int
|
420
|
-
int
|
418
|
+
int doc_num;
|
419
|
+
int pos;
|
421
420
|
} TermPosEnumWrapper;
|
422
421
|
|
423
422
|
static bool tpew_less_than(const TermPosEnumWrapper *tpew1,
|
424
423
|
const TermPosEnumWrapper *tpew2) {
|
425
|
-
return (tpew1->
|
426
|
-
|| (tpew1->
|
424
|
+
return (tpew1->doc_num < tpew2->doc_num)
|
425
|
+
|| (tpew1->doc_num == tpew2->doc_num && tpew1->pos < tpew2->pos);
|
427
426
|
}
|
428
427
|
|
429
428
|
static bool tpew_next(TermPosEnumWrapper *self) {
|
430
429
|
FrtTermDocEnum *tpe = self->tpe;
|
431
430
|
if (0 > (self->pos = tpe->next_position(tpe))) {
|
432
431
|
if (!tpe->next(tpe)) return false;
|
433
|
-
self->
|
432
|
+
self->doc_num = tpe->doc_num(tpe);
|
434
433
|
self->pos = tpe->next_position(tpe);
|
435
434
|
}
|
436
435
|
return true;
|
@@ -440,7 +439,7 @@ static bool tpew_skip_to(TermPosEnumWrapper *self, int doc_num) {
|
|
440
439
|
FrtTermDocEnum *tpe = self->tpe;
|
441
440
|
|
442
441
|
if (tpe->skip_to(tpe, doc_num)) {
|
443
|
-
self->
|
442
|
+
self->doc_num = tpe->doc_num(tpe);
|
444
443
|
self->pos = tpe->next_position(tpe);
|
445
444
|
return true;
|
446
445
|
} else {
|
@@ -457,7 +456,7 @@ static TermPosEnumWrapper *tpew_new(const char *term, FrtTermDocEnum *tpe) {
|
|
457
456
|
TermPosEnumWrapper *self = FRT_ALLOC_AND_ZERO(TermPosEnumWrapper);
|
458
457
|
self->term = term;
|
459
458
|
self->tpe = tpe;
|
460
|
-
self->
|
459
|
+
self->doc_num = -1;
|
461
460
|
self->pos = -1;
|
462
461
|
return self;
|
463
462
|
}
|
@@ -465,16 +464,16 @@ static TermPosEnumWrapper *tpew_new(const char *term, FrtTermDocEnum *tpe) {
|
|
465
464
|
#define SpMTQ(query) ((FrtSpanMultiTermQuery *)(query))
|
466
465
|
|
467
466
|
typedef struct SpanMultiTermEnum {
|
468
|
-
FrtSpanEnum
|
469
|
-
FrtPriorityQueue
|
467
|
+
FrtSpanEnum super;
|
468
|
+
FrtPriorityQueue *tpew_pq;
|
470
469
|
TermPosEnumWrapper **tpews;
|
471
|
-
int
|
472
|
-
int
|
473
|
-
int
|
470
|
+
int tpew_cnt;
|
471
|
+
int pos;
|
472
|
+
int doc_num;
|
474
473
|
} SpanMultiTermEnum;
|
475
474
|
|
476
475
|
static bool spanmte_next(FrtSpanEnum *self) {
|
477
|
-
int
|
476
|
+
int curr_doc_num, curr_pos;
|
478
477
|
TermPosEnumWrapper *tpew;
|
479
478
|
SpanMultiTermEnum *mte = SpMTEn(self);
|
480
479
|
FrtPriorityQueue *tpew_pq = mte->tpew_pq;
|
@@ -495,7 +494,7 @@ static bool spanmte_next(FrtSpanEnum *self) {
|
|
495
494
|
return false;
|
496
495
|
}
|
497
496
|
|
498
|
-
mte->
|
497
|
+
mte->doc_num = curr_doc_num = tpew->doc_num;
|
499
498
|
mte->pos = curr_pos = tpew->pos;
|
500
499
|
|
501
500
|
do {
|
@@ -505,7 +504,7 @@ static bool spanmte_next(FrtSpanEnum *self) {
|
|
505
504
|
frt_pq_pop(tpew_pq);
|
506
505
|
}
|
507
506
|
} while (((tpew = (TermPosEnumWrapper *)frt_pq_top(tpew_pq)) != NULL)
|
508
|
-
&& tpew->
|
507
|
+
&& tpew->doc_num == curr_doc_num && tpew->pos == curr_pos);
|
509
508
|
return true;
|
510
509
|
}
|
511
510
|
|
@@ -524,11 +523,11 @@ static bool spanmte_skip_to(FrtSpanEnum *self, int target) {
|
|
524
523
|
mte->tpew_pq = tpew_pq;
|
525
524
|
}
|
526
525
|
if (tpew_pq->size == 0) {
|
527
|
-
mte->
|
526
|
+
mte->doc_num = -1;
|
528
527
|
return false;
|
529
528
|
}
|
530
529
|
while ((tpew = (TermPosEnumWrapper *)frt_pq_top(tpew_pq)) != NULL
|
531
|
-
&& (target > tpew->
|
530
|
+
&& (target > tpew->doc_num)) {
|
532
531
|
if (tpew_skip_to(tpew, target)) {
|
533
532
|
frt_pq_down(tpew_pq);
|
534
533
|
} else {
|
@@ -538,8 +537,8 @@ static bool spanmte_skip_to(FrtSpanEnum *self, int target) {
|
|
538
537
|
return spanmte_next(self);
|
539
538
|
}
|
540
539
|
|
541
|
-
static int
|
542
|
-
return SpMTEn(self)->
|
540
|
+
static int spanmte_doc_num(FrtSpanEnum *self) {
|
541
|
+
return SpMTEn(self)->doc_num;
|
543
542
|
}
|
544
543
|
|
545
544
|
static int spanmte_start(FrtSpanEnum *self) {
|
@@ -577,12 +576,12 @@ static FrtSpanEnum *spanmte_new(FrtQuery *query, FrtIndexReader *ir) {
|
|
577
576
|
smte->tpew_cnt = smtq->term_cnt;
|
578
577
|
smte->tpew_pq = NULL;
|
579
578
|
smte->pos = -1;
|
580
|
-
smte->
|
579
|
+
smte->doc_num = -1;
|
581
580
|
|
582
581
|
self->query = query;
|
583
582
|
self->next = &spanmte_next;
|
584
583
|
self->skip_to = &spanmte_skip_to;
|
585
|
-
self->
|
584
|
+
self->doc_num = &spanmte_doc_num;
|
586
585
|
self->start = &spanmte_start;
|
587
586
|
self->end = &spanmte_end;
|
588
587
|
self->destroy = &spanmte_destroy;
|
@@ -631,9 +630,9 @@ static bool spanfe_skip_to(FrtSpanEnum *self, int target) {
|
|
631
630
|
return spanfe_next(self); /* scan to next match */
|
632
631
|
}
|
633
632
|
|
634
|
-
static int
|
633
|
+
static int spanfe_doc_num(FrtSpanEnum *self) {
|
635
634
|
FrtSpanEnum *sub_enum = SpFEn(self)->sub_enum;
|
636
|
-
return sub_enum->
|
635
|
+
return sub_enum->doc_num(sub_enum);
|
637
636
|
}
|
638
637
|
|
639
638
|
static int spanfe_start(FrtSpanEnum *self) {
|
@@ -668,7 +667,7 @@ static FrtSpanEnum *spanfe_new(FrtQuery *query, FrtIndexReader *ir) {
|
|
668
667
|
self->query = query;
|
669
668
|
self->next = &spanfe_next;
|
670
669
|
self->skip_to = &spanfe_skip_to;
|
671
|
-
self->
|
670
|
+
self->doc_num = &spanfe_doc_num;
|
672
671
|
self->start = &spanfe_start;
|
673
672
|
self->end = &spanfe_end;
|
674
673
|
self->destroy = &spanfe_destroy;
|
@@ -696,7 +695,7 @@ typedef struct SpanOrEnum {
|
|
696
695
|
|
697
696
|
static bool span_less_than(FrtSpanEnum *s1, FrtSpanEnum *s2) {
|
698
697
|
int doc_diff, start_diff;
|
699
|
-
doc_diff = s1->
|
698
|
+
doc_diff = s1->doc_num(s1) - s2->doc_num(s2);
|
700
699
|
if (doc_diff == 0) {
|
701
700
|
start_diff = s1->start(s1) - s2->start(s2);
|
702
701
|
if (start_diff == 0) {
|
@@ -756,7 +755,7 @@ static bool spanoe_skip_to(FrtSpanEnum *self, int target) {
|
|
756
755
|
} else {
|
757
756
|
while ((soe->queue->size != 0) &&
|
758
757
|
((se = (FrtSpanEnum *)frt_pq_top(soe->queue)) != NULL) &&
|
759
|
-
(se->
|
758
|
+
(se->doc_num(se) < target)) {
|
760
759
|
if (se->skip_to(se, target)) {
|
761
760
|
frt_pq_down(soe->queue);
|
762
761
|
} else {
|
@@ -770,9 +769,9 @@ static bool spanoe_skip_to(FrtSpanEnum *self, int target) {
|
|
770
769
|
|
771
770
|
#define SpOEn_Top_SE(self) (FrtSpanEnum *)frt_pq_top(SpOEn(self)->queue)
|
772
771
|
|
773
|
-
static int
|
772
|
+
static int spanoe_doc_num(FrtSpanEnum *self) {
|
774
773
|
FrtSpanEnum *se = SpOEn_Top_SE(self);
|
775
|
-
return se->
|
774
|
+
return se->doc_num(se);
|
776
775
|
}
|
777
776
|
|
778
777
|
static int spanoe_start(FrtSpanEnum *self) {
|
@@ -798,7 +797,7 @@ static char *spanoe_to_s(FrtSpanEnum *self) {
|
|
798
797
|
if (soe->queue->size == 0) {
|
799
798
|
sprintf(doc_str, "END");
|
800
799
|
} else {
|
801
|
-
sprintf(doc_str, "%d:%d-%d", self->
|
800
|
+
sprintf(doc_str, "%d:%d-%d", self->doc_num(self),
|
802
801
|
self->start(self), self->end(self));
|
803
802
|
}
|
804
803
|
}
|
@@ -841,7 +840,7 @@ static FrtSpanEnum *spanoe_new(FrtQuery *query, FrtIndexReader *ir) {
|
|
841
840
|
self->query = query;
|
842
841
|
self->next = &spanoe_next;
|
843
842
|
self->skip_to = &spanoe_skip_to;
|
844
|
-
self->
|
843
|
+
self->doc_num = &spanoe_doc_num;
|
845
844
|
self->start = &spanoe_start;
|
846
845
|
self->end = &spanoe_end;
|
847
846
|
self->destroy = &spanoe_destroy;
|
@@ -858,12 +857,12 @@ static FrtSpanEnum *spanoe_new(FrtQuery *query, FrtIndexReader *ir) {
|
|
858
857
|
#define SpNQ(query) ((FrtSpanNearQuery *)(query))
|
859
858
|
|
860
859
|
typedef struct SpanNearEnum {
|
861
|
-
FrtSpanEnum
|
862
|
-
FrtSpanEnum
|
860
|
+
FrtSpanEnum super;
|
861
|
+
FrtSpanEnum **span_enums;
|
863
862
|
int s_cnt;
|
864
863
|
int slop;
|
865
864
|
int current;
|
866
|
-
int
|
865
|
+
int doc_num;
|
867
866
|
int start;
|
868
867
|
int end;
|
869
868
|
bool first_time : 1;
|
@@ -878,30 +877,30 @@ typedef struct SpanNearEnum {
|
|
878
877
|
|
879
878
|
static bool sne_init(SpanNearEnum *sne) {
|
880
879
|
FrtSpanEnum *se = sne->span_enums[sne->current];
|
881
|
-
int
|
880
|
+
int prev_doc_num = se->doc_num(se);
|
882
881
|
int i;
|
883
882
|
|
884
883
|
for (i = 1; i < sne->s_cnt; i++) {
|
885
884
|
SpNEn_NEXT();
|
886
|
-
if (!se->skip_to(se,
|
885
|
+
if (!se->skip_to(se, prev_doc_num)) {
|
887
886
|
return false;
|
888
887
|
}
|
889
|
-
|
888
|
+
prev_doc_num = se->doc_num(se);
|
890
889
|
}
|
891
890
|
return true;
|
892
891
|
}
|
893
892
|
|
894
893
|
static bool sne_goto_next_doc(SpanNearEnum *sne) {
|
895
894
|
FrtSpanEnum *se = sne->span_enums[sne->current];
|
896
|
-
int
|
895
|
+
int prev_doc_num = se->doc_num(se);
|
897
896
|
|
898
897
|
SpNEn_NEXT();
|
899
898
|
|
900
|
-
while (se->
|
901
|
-
if (! se->skip_to(se,
|
899
|
+
while (se->doc_num(se) < prev_doc_num) {
|
900
|
+
if (! se->skip_to(se, prev_doc_num)) {
|
902
901
|
return false;
|
903
902
|
}
|
904
|
-
|
903
|
+
prev_doc_num = se->doc_num(se);
|
905
904
|
SpNEn_NEXT();
|
906
905
|
}
|
907
906
|
return true;
|
@@ -911,7 +910,7 @@ static bool sne_next_unordered_match(FrtSpanEnum *self) {
|
|
911
910
|
SpanNearEnum *sne = SpNEn(self);
|
912
911
|
FrtSpanEnum *se, *min_se = NULL;
|
913
912
|
int i;
|
914
|
-
int max_end, end, min_start, start,
|
913
|
+
int max_end, end, min_start, start, doc_num;
|
915
914
|
int lengths_sum;
|
916
915
|
|
917
916
|
while (true) {
|
@@ -936,16 +935,16 @@ static bool sne_next_unordered_match(FrtSpanEnum *self) {
|
|
936
935
|
/* we have a match */
|
937
936
|
sne->start = min_start;
|
938
937
|
sne->end = max_end;
|
939
|
-
sne->
|
938
|
+
sne->doc_num = min_se->doc_num(min_se);
|
940
939
|
return true;
|
941
940
|
}
|
942
941
|
|
943
942
|
/* increment the minimum span_enum and try again */
|
944
|
-
|
943
|
+
doc_num = min_se->doc_num(min_se);
|
945
944
|
if (!min_se->next(min_se)) {
|
946
945
|
return false;
|
947
946
|
}
|
948
|
-
if (
|
947
|
+
if (doc_num < min_se->doc_num(min_se)) {
|
949
948
|
if (!sne_goto_next_doc(sne)) return false;
|
950
949
|
}
|
951
950
|
}
|
@@ -955,14 +954,14 @@ static bool sne_next_ordered_match(FrtSpanEnum *self) {
|
|
955
954
|
SpanNearEnum *sne = SpNEn(self);
|
956
955
|
FrtSpanEnum *se;
|
957
956
|
int i;
|
958
|
-
int
|
959
|
-
int
|
957
|
+
int prev_doc_num, prev_start, prev_end;
|
958
|
+
int doc_num = 0, start = 0, end = 0;
|
960
959
|
int lengths_sum;
|
961
960
|
|
962
961
|
while (true) {
|
963
962
|
se = sne->span_enums[0];
|
964
963
|
|
965
|
-
|
964
|
+
prev_doc_num = se->doc_num(se);
|
966
965
|
sne->start = prev_start = se->start(se);
|
967
966
|
prev_end = se->end(se);
|
968
967
|
|
@@ -971,19 +970,19 @@ static bool sne_next_ordered_match(FrtSpanEnum *self) {
|
|
971
970
|
|
972
971
|
while (i < sne->s_cnt) {
|
973
972
|
se = sne->span_enums[i];
|
974
|
-
|
973
|
+
doc_num = se->doc_num(se);
|
975
974
|
start = se->start(se);
|
976
975
|
end = se->end(se);
|
977
|
-
while ((
|
976
|
+
while ((doc_num == prev_doc_num) && ((start < prev_start) ||
|
978
977
|
((start == prev_start) && (end < prev_end)))) {
|
979
978
|
if (!se->next(se)) {
|
980
979
|
return false;
|
981
980
|
}
|
982
|
-
|
981
|
+
doc_num = se->doc_num(se);
|
983
982
|
start = se->start(se);
|
984
983
|
end = se->end(se);
|
985
984
|
}
|
986
|
-
if (
|
985
|
+
if (doc_num != prev_doc_num) {
|
987
986
|
sne->current = i;
|
988
987
|
if (!sne_goto_next_doc(sne)) {
|
989
988
|
return false;
|
@@ -992,7 +991,7 @@ static bool sne_next_ordered_match(FrtSpanEnum *self) {
|
|
992
991
|
}
|
993
992
|
i++;
|
994
993
|
lengths_sum += end - start;
|
995
|
-
|
994
|
+
prev_doc_num = doc_num;
|
996
995
|
prev_start = start;
|
997
996
|
prev_end = end;
|
998
997
|
}
|
@@ -1000,7 +999,7 @@ static bool sne_next_ordered_match(FrtSpanEnum *self) {
|
|
1000
999
|
if ((end - sne->start - lengths_sum) <= sne->slop) {
|
1001
1000
|
/* we have a match */
|
1002
1001
|
sne->end = end;
|
1003
|
-
sne->
|
1002
|
+
sne->doc_num = doc_num;
|
1004
1003
|
|
1005
1004
|
/* the minimum span is always the first span so it needs to be
|
1006
1005
|
* incremented next time around */
|
@@ -1012,7 +1011,7 @@ static bool sne_next_ordered_match(FrtSpanEnum *self) {
|
|
1012
1011
|
if (!se->next(se)) {
|
1013
1012
|
return false;
|
1014
1013
|
}
|
1015
|
-
if (se->
|
1014
|
+
if (se->doc_num(se) != prev_doc_num) {
|
1016
1015
|
sne->current = 0;
|
1017
1016
|
if (!sne_goto_next_doc(sne)) {
|
1018
1017
|
return false;
|
@@ -1035,7 +1034,7 @@ static bool sne_next_match(FrtSpanEnum *self) {
|
|
1035
1034
|
}
|
1036
1035
|
se_curr = sne->span_enums[sne->current];
|
1037
1036
|
se_next = sne->span_enums[(sne->current+1)%sne->s_cnt];
|
1038
|
-
if (se_curr->
|
1037
|
+
if (se_curr->doc_num(se_curr) > se_next->doc_num(se_next)) {
|
1039
1038
|
if (!sne_goto_next_doc(sne)) {
|
1040
1039
|
return false;
|
1041
1040
|
}
|
@@ -1067,8 +1066,8 @@ static bool spanne_skip_to(FrtSpanEnum *self, int target) {
|
|
1067
1066
|
return sne_next_match(self);
|
1068
1067
|
}
|
1069
1068
|
|
1070
|
-
static int
|
1071
|
-
return SpNEn(self)->
|
1069
|
+
static int spanne_doc_num(FrtSpanEnum *self) {
|
1070
|
+
return SpNEn(self)->doc_num;
|
1072
1071
|
}
|
1073
1072
|
|
1074
1073
|
static int spanne_start(FrtSpanEnum *self) {
|
@@ -1089,7 +1088,7 @@ static char *spanne_to_s(FrtSpanEnum *self) {
|
|
1089
1088
|
if (sne->first_time) {
|
1090
1089
|
sprintf(doc_str, "START");
|
1091
1090
|
} else {
|
1092
|
-
sprintf(doc_str, "%d:%d-%d", self->
|
1091
|
+
sprintf(doc_str, "%d:%d-%d", self->doc_num(self),
|
1093
1092
|
self->start(self), self->end(self));
|
1094
1093
|
}
|
1095
1094
|
sprintf(str, "SpanNearEnum(%s)@%s", query_str, doc_str);
|
@@ -1112,8 +1111,8 @@ static void spanne_destroy(FrtSpanEnum *self) {
|
|
1112
1111
|
static FrtSpanEnum *spanne_new(FrtQuery *query, FrtIndexReader *ir) {
|
1113
1112
|
int i;
|
1114
1113
|
FrtQuery *clause;
|
1115
|
-
FrtSpanEnum *self
|
1116
|
-
FrtSpanNearQuery *snq
|
1114
|
+
FrtSpanEnum *self = (FrtSpanEnum *)FRT_ALLOC(SpanNearEnum);
|
1115
|
+
FrtSpanNearQuery *snq = SpNQ(query);
|
1117
1116
|
|
1118
1117
|
SpNEn(self)->first_time = true;
|
1119
1118
|
SpNEn(self)->in_order = snq->in_order;
|
@@ -1127,14 +1126,14 @@ static FrtSpanEnum *spanne_new(FrtQuery *query, FrtIndexReader *ir) {
|
|
1127
1126
|
}
|
1128
1127
|
SpNEn(self)->current = 0;
|
1129
1128
|
|
1130
|
-
SpNEn(self)->
|
1129
|
+
SpNEn(self)->doc_num = -1;
|
1131
1130
|
SpNEn(self)->start = -1;
|
1132
1131
|
SpNEn(self)->end = -1;
|
1133
1132
|
|
1134
1133
|
self->query = query;
|
1135
1134
|
self->next = &spanne_next;
|
1136
1135
|
self->skip_to = &spanne_skip_to;
|
1137
|
-
self->
|
1136
|
+
self->doc_num = &spanne_doc_num;
|
1138
1137
|
self->start = &spanne_start;
|
1139
1138
|
self->end = &spanne_end;
|
1140
1139
|
self->destroy = &spanne_destroy;
|
@@ -1169,18 +1168,18 @@ static bool spanxe_next(FrtSpanEnum *self) {
|
|
1169
1168
|
}
|
1170
1169
|
|
1171
1170
|
while (sxe->more_inc && sxe->more_exc) {
|
1172
|
-
if (inc->
|
1173
|
-
sxe->more_exc = exc->skip_to(exc, inc->
|
1171
|
+
if (inc->doc_num(inc) > exc->doc_num(exc)) { /* skip excl */
|
1172
|
+
sxe->more_exc = exc->skip_to(exc, inc->doc_num(inc));
|
1174
1173
|
}
|
1175
1174
|
|
1176
1175
|
while (sxe->more_exc /* while excl is before */
|
1177
|
-
&& (inc->
|
1176
|
+
&& (inc->doc_num(inc) == exc->doc_num(exc))
|
1178
1177
|
&& (exc->end(exc) <= inc->start(inc))) {
|
1179
1178
|
sxe->more_exc = exc->next(exc); /* increment excl */
|
1180
1179
|
}
|
1181
1180
|
|
1182
1181
|
if (! sxe->more_exc || /* if no intersection */
|
1183
|
-
(inc->
|
1182
|
+
(inc->doc_num(inc) != exc->doc_num(exc)) ||
|
1184
1183
|
inc->end(inc) <= exc->start(exc)) {
|
1185
1184
|
break; /* we found a match */
|
1186
1185
|
}
|
@@ -1193,24 +1192,24 @@ static bool spanxe_next(FrtSpanEnum *self) {
|
|
1193
1192
|
static bool spanxe_skip_to(FrtSpanEnum *self, int target) {
|
1194
1193
|
SpanNotEnum *sxe = SpXEn(self);
|
1195
1194
|
FrtSpanEnum *inc = sxe->inc, *exc = sxe->exc;
|
1196
|
-
int
|
1195
|
+
int doc_num;
|
1197
1196
|
|
1198
1197
|
if (sxe->more_inc) { /* move to next incl */
|
1199
1198
|
if (!(sxe->more_inc=sxe->inc->skip_to(sxe->inc, target))) return false;
|
1200
1199
|
}
|
1201
1200
|
|
1202
|
-
if (sxe->more_inc && ((
|
1203
|
-
sxe->more_exc = exc->skip_to(exc,
|
1201
|
+
if (sxe->more_inc && ((doc_num = inc->doc_num(inc)) > exc->doc_num(exc))) {
|
1202
|
+
sxe->more_exc = exc->skip_to(exc, doc_num);
|
1204
1203
|
}
|
1205
1204
|
|
1206
1205
|
while (sxe->more_exc /* while excl is before */
|
1207
|
-
&& inc->
|
1206
|
+
&& inc->doc_num(inc) == exc->doc_num(exc)
|
1208
1207
|
&& exc->end(exc) <= inc->start(inc)) {
|
1209
1208
|
sxe->more_exc = exc->next(exc); /* increment excl */
|
1210
1209
|
}
|
1211
1210
|
|
1212
1211
|
if (!sxe->more_exc || /* if no intersection */
|
1213
|
-
inc->
|
1212
|
+
inc->doc_num(inc) != exc->doc_num(exc) ||
|
1214
1213
|
inc->end(inc) <= exc->start(exc)) {
|
1215
1214
|
return true; /* we found a match */
|
1216
1215
|
}
|
@@ -1218,9 +1217,9 @@ static bool spanxe_skip_to(FrtSpanEnum *self, int target) {
|
|
1218
1217
|
return spanxe_next(self); /* scan to next match */
|
1219
1218
|
}
|
1220
1219
|
|
1221
|
-
static int
|
1220
|
+
static int spanxe_doc_num(FrtSpanEnum *self) {
|
1222
1221
|
FrtSpanEnum *inc = SpXEn(self)->inc;
|
1223
|
-
return inc->
|
1222
|
+
return inc->doc_num(inc);
|
1224
1223
|
}
|
1225
1224
|
|
1226
1225
|
static int spanxe_start(FrtSpanEnum *self) {
|
@@ -1248,9 +1247,9 @@ static void spanxe_destroy(FrtSpanEnum *self) {
|
|
1248
1247
|
}
|
1249
1248
|
|
1250
1249
|
static FrtSpanEnum *spanxe_new(FrtQuery *query, FrtIndexReader *ir) {
|
1251
|
-
FrtSpanEnum *self
|
1250
|
+
FrtSpanEnum *self = (FrtSpanEnum *)FRT_ALLOC(SpanNotEnum);
|
1252
1251
|
SpanNotEnum *sxe = SpXEn(self);
|
1253
|
-
FrtSpanNotQuery *sxq
|
1252
|
+
FrtSpanNotQuery *sxq = SpXQ(query);
|
1254
1253
|
|
1255
1254
|
sxe->inc = SpQ(sxq->inc)->get_spans(sxq->inc, ir);
|
1256
1255
|
sxe->exc = SpQ(sxq->exc)->get_spans(sxq->exc, ir);
|
@@ -1260,7 +1259,7 @@ static FrtSpanEnum *spanxe_new(FrtQuery *query, FrtIndexReader *ir) {
|
|
1260
1259
|
self->query = query;
|
1261
1260
|
self->next = &spanxe_next;
|
1262
1261
|
self->skip_to = &spanxe_skip_to;
|
1263
|
-
self->
|
1262
|
+
self->doc_num = &spanxe_doc_num;
|
1264
1263
|
self->start = &spanxe_start;
|
1265
1264
|
self->end = &spanxe_end;
|
1266
1265
|
self->destroy = &spanxe_destroy;
|