isomorfeus-ferret 0.17.3 → 0.17.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +48 -67
- data/ext/isomorfeus_ferret_ext/frb_search.c +47 -47
- data/ext/isomorfeus_ferret_ext/frt_document.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_filter.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +46 -62
- data/ext/isomorfeus_ferret_ext/frt_index.h +3 -3
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +48 -48
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +4 -4
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +10 -10
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +26 -26
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -12
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +144 -145
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +9 -9
- data/ext/isomorfeus_ferret_ext/frt_search.c +31 -31
- data/ext/isomorfeus_ferret_ext/frt_search.h +6 -6
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_sort.c +20 -20
- data/ext/isomorfeus_ferret_ext/test.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_filter.c +5 -6
- data/ext/isomorfeus_ferret_ext/test_index.c +30 -32
- data/ext/isomorfeus_ferret_ext/test_search.c +7 -7
- data/ext/isomorfeus_ferret_ext/test_sort.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_threading.c +1 -1
- data/lib/isomorfeus/ferret/index/index.rb +7 -7
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +11 -19
@@ -84,13 +84,13 @@ static char *qfilt_to_s(FrtFilter *filt) {
|
|
84
84
|
}
|
85
85
|
|
86
86
|
static FrtBitVector *qfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir) {
|
87
|
-
FrtBitVector *bv = frt_bv_new_capa(ir->
|
87
|
+
FrtBitVector *bv = frt_bv_new_capa(ir->max_doc_num(ir));
|
88
88
|
FrtSearcher *sea = frt_isea_new(ir);
|
89
89
|
FrtWeight *weight = frt_q_weight(QF(filt)->query, sea);
|
90
90
|
FrtScorer *scorer = weight->scorer(weight, ir);
|
91
91
|
if (scorer) {
|
92
92
|
while (scorer->next(scorer)) {
|
93
|
-
frt_bv_set(bv, scorer->
|
93
|
+
frt_bv_set(bv, scorer->doc_num);
|
94
94
|
}
|
95
95
|
scorer->destroy(scorer);
|
96
96
|
}
|
@@ -176,7 +176,7 @@ static void index_del_doc_with_key_i(FrtIndex *self, FrtDocument *doc,
|
|
176
176
|
frt_td_destroy(td);
|
177
177
|
FRT_RAISE(FRT_ARG_ERROR, "%s", NON_UNIQUE_KEY_ERROR_MSG);
|
178
178
|
} else if (td->total_hits == 1) {
|
179
|
-
frt_ir_delete_doc(self->ir, td->hits[0]->
|
179
|
+
frt_ir_delete_doc(self->ir, td->hits[0]->doc_num);
|
180
180
|
}
|
181
181
|
frt_q_deref(q);
|
182
182
|
frt_td_destroy(td);
|
@@ -255,7 +255,7 @@ FrtDocument *frt_index_get_doc_term(FrtIndex *self, ID field, const char *term)
|
|
255
255
|
return doc;
|
256
256
|
}
|
257
257
|
|
258
|
-
FrtDocument *
|
258
|
+
FrtDocument *frt_index_get_doc_num(FrtIndex *self, const char *id) {
|
259
259
|
return frt_index_get_doc_term(self, self->id_field, id);
|
260
260
|
}
|
261
261
|
|
@@ -38,7 +38,7 @@ extern FrtTopDocs *frt_index_search_str(FrtIndex *self, char *query, int first_d
|
|
38
38
|
extern FrtQuery *frt_index_get_query(FrtIndex *self, char *qstr, rb_encoding *encoding);
|
39
39
|
extern FrtDocument *frt_index_get_doc(FrtIndex *self, int doc_num);
|
40
40
|
extern FrtDocument *frt_index_get_doc_ts(FrtIndex *self, int doc_num);
|
41
|
-
extern FrtDocument *
|
41
|
+
extern FrtDocument *frt_index_get_doc_num(FrtIndex *self, const char *id);
|
42
42
|
extern FrtDocument *frt_index_get_doc_term(FrtIndex *self, ID field, const char *term);
|
43
43
|
extern void frt_index_delete(FrtIndex *self, int doc_num);
|
44
44
|
extern void frt_index_delete_term(FrtIndex *self, ID field, const char *term);
|
@@ -1898,7 +1898,7 @@ static void stde_seek_ti(FrtSegmentTermDocEnum *stde, FrtTermInfo *ti) {
|
|
1898
1898
|
stde->count = 0;
|
1899
1899
|
stde->doc_freq = ti->doc_freq;
|
1900
1900
|
stde->doc_num = 0;
|
1901
|
-
stde->
|
1901
|
+
stde->skip_doc_num = 0;
|
1902
1902
|
stde->skip_count = 0;
|
1903
1903
|
stde->num_skips = stde->doc_freq / stde->skip_interval;
|
1904
1904
|
stde->frq_ptr = ti->frq_ptr;
|
@@ -1934,7 +1934,6 @@ static int stde_freq(FrtTermDocEnum *tde) {
|
|
1934
1934
|
}
|
1935
1935
|
|
1936
1936
|
static bool stde_next(FrtTermDocEnum *tde) {
|
1937
|
-
int doc_code;
|
1938
1937
|
FrtSegmentTermDocEnum *stde = STDE(tde);
|
1939
1938
|
|
1940
1939
|
while (true) {
|
@@ -1942,13 +1941,8 @@ static bool stde_next(FrtTermDocEnum *tde) {
|
|
1942
1941
|
return false;
|
1943
1942
|
}
|
1944
1943
|
|
1945
|
-
|
1946
|
-
stde->
|
1947
|
-
if (0 != (doc_code & 1)) { /* if low bit is set */
|
1948
|
-
stde->freq = 1; /* freq is one */
|
1949
|
-
} else {
|
1950
|
-
stde->freq = (int)frt_is_read_vint(stde->frq_in); /* read freq */
|
1951
|
-
}
|
1944
|
+
stde->doc_num += frt_is_read_vint(stde->frq_in);
|
1945
|
+
stde->freq = frt_is_read_vint(stde->frq_in);
|
1952
1946
|
|
1953
1947
|
stde->count++;
|
1954
1948
|
|
@@ -1964,17 +1958,11 @@ static bool stde_next(FrtTermDocEnum *tde) {
|
|
1964
1958
|
static int stde_read(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num) {
|
1965
1959
|
FrtSegmentTermDocEnum *stde = STDE(tde);
|
1966
1960
|
int i = 0;
|
1967
|
-
int doc_code;
|
1968
1961
|
|
1969
1962
|
while (i < req_num && stde->count < stde->doc_freq) {
|
1970
1963
|
/* manually inlined call to next() for speed */
|
1971
|
-
|
1972
|
-
stde->
|
1973
|
-
if (0 != (doc_code & 1)) { /* if low bit is set */
|
1974
|
-
stde->freq = 1; /* freq is one */
|
1975
|
-
} else {
|
1976
|
-
stde->freq = frt_is_read_vint(stde->frq_in); /* else read freq */
|
1977
|
-
}
|
1964
|
+
stde->doc_num += frt_is_read_vint(stde->frq_in);
|
1965
|
+
stde->freq = frt_is_read_vint(stde->frq_in);
|
1978
1966
|
|
1979
1967
|
stde->count++;
|
1980
1968
|
|
@@ -1993,7 +1981,7 @@ static bool stde_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
|
|
1993
1981
|
|
1994
1982
|
if (stde->doc_freq >= stde->skip_interval
|
1995
1983
|
&& target_doc_num > stde->doc_num) { /* optimized case */
|
1996
|
-
int
|
1984
|
+
int last_skip_doc_num;
|
1997
1985
|
frt_off_t last_frq_ptr;
|
1998
1986
|
frt_off_t last_prx_ptr;
|
1999
1987
|
int num_skipped;
|
@@ -2008,17 +1996,17 @@ static bool stde_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
|
|
2008
1996
|
}
|
2009
1997
|
|
2010
1998
|
/* scan skip data */
|
2011
|
-
|
1999
|
+
last_skip_doc_num = stde->skip_doc_num;
|
2012
2000
|
last_frq_ptr = frt_is_pos(stde->frq_in);
|
2013
2001
|
last_prx_ptr = -1;
|
2014
2002
|
num_skipped = -1 - (stde->count % stde->skip_interval);
|
2015
2003
|
|
2016
|
-
while (target_doc_num > stde->
|
2017
|
-
|
2004
|
+
while (target_doc_num > stde->skip_doc_num) {
|
2005
|
+
last_skip_doc_num = stde->skip_doc_num;
|
2018
2006
|
last_frq_ptr = stde->frq_ptr;
|
2019
2007
|
last_prx_ptr = stde->prx_ptr;
|
2020
2008
|
|
2021
|
-
if (0 != stde->
|
2009
|
+
if (0 != stde->skip_doc_num && stde->skip_doc_num >= stde->doc_num) {
|
2022
2010
|
num_skipped += stde->skip_interval;
|
2023
2011
|
}
|
2024
2012
|
|
@@ -2026,9 +2014,9 @@ static bool stde_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
|
|
2026
2014
|
break;
|
2027
2015
|
}
|
2028
2016
|
|
2029
|
-
stde->
|
2030
|
-
stde->frq_ptr
|
2031
|
-
stde->prx_ptr
|
2017
|
+
stde->skip_doc_num += frt_is_read_vint(stde->skip_in);
|
2018
|
+
stde->frq_ptr += frt_is_read_vint(stde->skip_in);
|
2019
|
+
stde->prx_ptr += frt_is_read_vint(stde->skip_in);
|
2032
2020
|
|
2033
2021
|
stde->skip_count++;
|
2034
2022
|
}
|
@@ -2038,7 +2026,7 @@ static bool stde_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
|
|
2038
2026
|
frt_is_seek(stde->frq_in, last_frq_ptr);
|
2039
2027
|
stde->seek_prox(stde, last_prx_ptr);
|
2040
2028
|
|
2041
|
-
stde->doc_num =
|
2029
|
+
stde->doc_num = last_skip_doc_num;
|
2042
2030
|
stde->count += num_skipped;
|
2043
2031
|
}
|
2044
2032
|
}
|
@@ -2432,7 +2420,7 @@ static int mtdpe_freq(FrtTermDocEnum *tde) {
|
|
2432
2420
|
static bool mtdpe_next(FrtTermDocEnum *tde) {
|
2433
2421
|
FrtTermDocEnum *sub_tde;
|
2434
2422
|
int pos = 0, freq = 0;
|
2435
|
-
int
|
2423
|
+
int doc_num;
|
2436
2424
|
MultipleTermDocPosEnum *mtdpe = MTDPE(tde);
|
2437
2425
|
|
2438
2426
|
if (0 == mtdpe->pq->size) {
|
@@ -2440,7 +2428,7 @@ static bool mtdpe_next(FrtTermDocEnum *tde) {
|
|
2440
2428
|
}
|
2441
2429
|
|
2442
2430
|
sub_tde = (FrtTermDocEnum *)frt_pq_top(mtdpe->pq);
|
2443
|
-
|
2431
|
+
doc_num = sub_tde->doc_num(sub_tde);
|
2444
2432
|
|
2445
2433
|
do {
|
2446
2434
|
freq += sub_tde->freq(sub_tde);
|
@@ -2463,13 +2451,13 @@ static bool mtdpe_next(FrtTermDocEnum *tde) {
|
|
2463
2451
|
sub_tde->close(sub_tde);
|
2464
2452
|
}
|
2465
2453
|
sub_tde = (FrtTermDocEnum *)frt_pq_top(mtdpe->pq);
|
2466
|
-
} while ((mtdpe->pq->size > 0) && (sub_tde->doc_num(sub_tde) ==
|
2454
|
+
} while ((mtdpe->pq->size > 0) && (sub_tde->doc_num(sub_tde) == doc_num));
|
2467
2455
|
|
2468
2456
|
qsort(mtdpe->pos_queue, freq, sizeof(int), &frt_icmp_risky);
|
2469
2457
|
|
2470
2458
|
mtdpe->pos_queue_index = 0;
|
2471
2459
|
mtdpe->freq = freq;
|
2472
|
-
mtdpe->doc_num =
|
2460
|
+
mtdpe->doc_num = doc_num;
|
2473
2461
|
|
2474
2462
|
return true;
|
2475
2463
|
}
|
@@ -2849,7 +2837,7 @@ frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num) {
|
|
2849
2837
|
}
|
2850
2838
|
if (!norms) {
|
2851
2839
|
if (NULL == ir->fake_norms) {
|
2852
|
-
ir->fake_norms = FRT_ALLOC_AND_ZERO_N(frt_uchar, ir->
|
2840
|
+
ir->fake_norms = FRT_ALLOC_AND_ZERO_N(frt_uchar, ir->max_doc_num(ir));
|
2853
2841
|
}
|
2854
2842
|
norms = ir->fake_norms;
|
2855
2843
|
}
|
@@ -2866,7 +2854,7 @@ frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, ID field, frt_uchar *buf) {
|
|
2866
2854
|
if (field_num >= 0) {
|
2867
2855
|
ir->get_norms_into(ir, field_num, buf);
|
2868
2856
|
} else {
|
2869
|
-
memset(buf, 0, ir->
|
2857
|
+
memset(buf, 0, ir->max_doc_num(ir));
|
2870
2858
|
}
|
2871
2859
|
return buf;
|
2872
2860
|
}
|
@@ -2880,7 +2868,7 @@ void frt_ir_undelete_all(FrtIndexReader *ir) {
|
|
2880
2868
|
}
|
2881
2869
|
|
2882
2870
|
void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num) {
|
2883
|
-
if (doc_num >= 0 && doc_num < ir->
|
2871
|
+
if (doc_num >= 0 && doc_num < ir->max_doc_num(ir)) {
|
2884
2872
|
pthread_mutex_lock(&ir->mutex);
|
2885
2873
|
ir->acquire_write_lock(ir);
|
2886
2874
|
ir->delete_doc_i(ir, doc_num);
|
@@ -3252,7 +3240,7 @@ static int sr_num_docs(FrtIndexReader *ir) {
|
|
3252
3240
|
return num_docs;
|
3253
3241
|
}
|
3254
3242
|
|
3255
|
-
static int
|
3243
|
+
static int sr_max_doc_num(FrtIndexReader *ir) {
|
3256
3244
|
return SR(ir)->fr->size;
|
3257
3245
|
}
|
3258
3246
|
|
@@ -3383,7 +3371,7 @@ static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr) {
|
|
3383
3371
|
char *sr_segment = sr->si->name;
|
3384
3372
|
|
3385
3373
|
ir->num_docs = &sr_num_docs;
|
3386
|
-
ir->
|
3374
|
+
ir->max_doc_num = &sr_max_doc_num;
|
3387
3375
|
ir->get_doc = &sr_get_doc;
|
3388
3376
|
ir->get_lazy_doc = &sr_get_lazy_doc;
|
3389
3377
|
ir->get_norms = &sr_get_norms;
|
@@ -3500,8 +3488,8 @@ static int mr_num_docs(FrtIndexReader *ir) {
|
|
3500
3488
|
return num_docs;
|
3501
3489
|
}
|
3502
3490
|
|
3503
|
-
static int
|
3504
|
-
return MR(ir)->
|
3491
|
+
static int mr_max_doc_num(FrtIndexReader *ir) {
|
3492
|
+
return MR(ir)->max_doc_num;
|
3505
3493
|
}
|
3506
3494
|
|
3507
3495
|
#define GET_READER()\
|
@@ -3535,7 +3523,7 @@ static frt_uchar *mr_get_norms(FrtIndexReader *ir, int field_num) {
|
|
3535
3523
|
int i;
|
3536
3524
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
3537
3525
|
|
3538
|
-
bytes = FRT_ALLOC_AND_ZERO_N(frt_uchar, MR(ir)->
|
3526
|
+
bytes = FRT_ALLOC_AND_ZERO_N(frt_uchar, MR(ir)->max_doc_num);
|
3539
3527
|
|
3540
3528
|
for (i = 0; i < mr_reader_cnt; i++) {
|
3541
3529
|
int fnum = frt_mr_get_field_num(MR(ir), i, field_num);
|
@@ -3557,7 +3545,7 @@ static frt_uchar *mr_get_norms_into(FrtIndexReader *ir, int field_num, frt_uchar
|
|
3557
3545
|
pthread_mutex_lock(&ir->mutex);
|
3558
3546
|
bytes = (frt_uchar *)frt_h_get_int(MR(ir)->norms_cache, field_num);
|
3559
3547
|
if (NULL != bytes) {
|
3560
|
-
memcpy(buf, bytes, MR(ir)->
|
3548
|
+
memcpy(buf, bytes, MR(ir)->max_doc_num);
|
3561
3549
|
} else {
|
3562
3550
|
int i;
|
3563
3551
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
@@ -3706,7 +3694,7 @@ FrtMultiReader *frt_mr_init(FrtMultiReader *mr, FrtIndexReader **sub_readers, co
|
|
3706
3694
|
|
3707
3695
|
mr->sub_readers = sub_readers;
|
3708
3696
|
mr->r_cnt = r_cnt;
|
3709
|
-
mr->
|
3697
|
+
mr->max_doc_num = 0;
|
3710
3698
|
mr->num_docs_cache = -1;
|
3711
3699
|
mr->has_deletions = false;
|
3712
3700
|
mr->starts = FRT_ALLOC_N(int, (r_cnt+1));
|
@@ -3714,18 +3702,18 @@ FrtMultiReader *frt_mr_init(FrtMultiReader *mr, FrtIndexReader **sub_readers, co
|
|
3714
3702
|
for (i = 0; i < r_cnt; i++) {
|
3715
3703
|
FrtIndexReader *sub_reader = sub_readers[i];
|
3716
3704
|
FRT_REF(sub_reader);
|
3717
|
-
mr->starts[i] = mr->
|
3718
|
-
mr->
|
3705
|
+
mr->starts[i] = mr->max_doc_num;
|
3706
|
+
mr->max_doc_num += sub_reader->max_doc_num(sub_reader); /* compute max_docs */
|
3719
3707
|
|
3720
3708
|
if (sub_reader->has_deletions(sub_reader)) {
|
3721
3709
|
mr->has_deletions = true;
|
3722
3710
|
}
|
3723
3711
|
}
|
3724
|
-
mr->starts[r_cnt] = mr->
|
3712
|
+
mr->starts[r_cnt] = mr->max_doc_num;
|
3725
3713
|
mr->norms_cache = frt_h_new_int(&free);
|
3726
3714
|
|
3727
3715
|
ir->num_docs = &mr_num_docs;
|
3728
|
-
ir->
|
3716
|
+
ir->max_doc_num = &mr_max_doc_num;
|
3729
3717
|
ir->get_doc = &mr_get_doc;
|
3730
3718
|
ir->get_lazy_doc = &mr_get_lazy_doc;
|
3731
3719
|
ir->get_norms = &mr_get_norms;
|
@@ -3980,14 +3968,14 @@ typedef struct SkipBuffer {
|
|
3980
3968
|
FrtOutStream *buf;
|
3981
3969
|
FrtOutStream *frq_out;
|
3982
3970
|
FrtOutStream *prx_out;
|
3983
|
-
int
|
3984
|
-
frt_off_t
|
3985
|
-
frt_off_t
|
3971
|
+
int last_doc_num;
|
3972
|
+
frt_off_t last_frq_ptr;
|
3973
|
+
frt_off_t last_prx_ptr;
|
3986
3974
|
} SkipBuffer;
|
3987
3975
|
|
3988
3976
|
static void skip_buf_reset(SkipBuffer *skip_buf) {
|
3989
3977
|
frt_ramo_reset(skip_buf->buf);
|
3990
|
-
skip_buf->
|
3978
|
+
skip_buf->last_doc_num = 0;
|
3991
3979
|
skip_buf->last_frq_ptr = frt_os_pos(skip_buf->frq_out);
|
3992
3980
|
skip_buf->last_prx_ptr = frt_os_pos(skip_buf->prx_out);
|
3993
3981
|
}
|
@@ -4000,15 +3988,15 @@ static SkipBuffer *skip_buf_new(FrtOutStream *frq_out, FrtOutStream *prx_out) {
|
|
4000
3988
|
return skip_buf;
|
4001
3989
|
}
|
4002
3990
|
|
4003
|
-
static void skip_buf_add(SkipBuffer *skip_buf, int
|
3991
|
+
static void skip_buf_add(SkipBuffer *skip_buf, int doc_num) {
|
4004
3992
|
frt_off_t frq_ptr = frt_os_pos(skip_buf->frq_out);
|
4005
3993
|
frt_off_t prx_ptr = frt_os_pos(skip_buf->prx_out);
|
4006
3994
|
|
4007
|
-
frt_os_write_vint(skip_buf->buf,
|
3995
|
+
frt_os_write_vint(skip_buf->buf, doc_num - skip_buf->last_doc_num);
|
4008
3996
|
frt_os_write_vint(skip_buf->buf, frq_ptr - skip_buf->last_frq_ptr);
|
4009
3997
|
frt_os_write_vint(skip_buf->buf, prx_ptr - skip_buf->last_prx_ptr);
|
4010
3998
|
|
4011
|
-
skip_buf->
|
3999
|
+
skip_buf->last_doc_num = doc_num;
|
4012
4000
|
skip_buf->last_frq_ptr = frq_ptr;
|
4013
4001
|
skip_buf->last_prx_ptr = prx_ptr;
|
4014
4002
|
}
|
@@ -4069,7 +4057,7 @@ static void dw_flush_streams(FrtDocWriter *dw) {
|
|
4069
4057
|
}
|
4070
4058
|
|
4071
4059
|
static void dw_flush(FrtDocWriter *dw) {
|
4072
|
-
int i, j, last_doc,
|
4060
|
+
int i, j, last_doc, doc_num, doc_freq, last_pos, posting_count;
|
4073
4061
|
int skip_interval = dw->skip_interval;
|
4074
4062
|
FrtFieldInfos *fis = dw->fis;
|
4075
4063
|
const int fields_count = fis->size;
|
@@ -4117,15 +4105,11 @@ static void dw_flush(FrtDocWriter *dw) {
|
|
4117
4105
|
skip_buf_add(skip_buf, last_doc);
|
4118
4106
|
}
|
4119
4107
|
|
4120
|
-
|
4108
|
+
doc_num = (p->doc_num - last_doc);
|
4121
4109
|
last_doc = p->doc_num;
|
4122
4110
|
|
4123
|
-
|
4124
|
-
|
4125
|
-
} else {
|
4126
|
-
frt_os_write_vint(frq_out, doc_code);
|
4127
|
-
frt_os_write_vint(frq_out, p->freq);
|
4128
|
-
}
|
4111
|
+
frt_os_write_vint(frq_out, doc_num);
|
4112
|
+
frt_os_write_vint(frq_out, p->freq);
|
4129
4113
|
|
4130
4114
|
last_pos = 0;
|
4131
4115
|
for (occ = p->first_occ; NULL != occ; occ = occ->next) {
|
@@ -4616,8 +4600,8 @@ static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *s
|
|
4616
4600
|
|
4617
4601
|
if (map) {
|
4618
4602
|
int i;
|
4619
|
-
const int
|
4620
|
-
for (i = 0; i <
|
4603
|
+
const int max_doc_num = sr_max_doc_num(IR(sr));
|
4604
|
+
for (i = 0; i < max_doc_num; i++) {
|
4621
4605
|
int j, data_len = 0;
|
4622
4606
|
const int field_cnt = frt_is_read_vint(fdt_in);
|
4623
4607
|
int tv_cnt;
|
@@ -4801,7 +4785,7 @@ static void iw_add_segment(FrtIndexWriter *iw, FrtSegmentReader *sr) {
|
|
4801
4785
|
const int fis_size = sub_fis->size;
|
4802
4786
|
bool must_map_fields = false;
|
4803
4787
|
|
4804
|
-
si->doc_cnt = IR(sr)->
|
4788
|
+
si->doc_cnt = IR(sr)->max_doc_num(IR(sr));
|
4805
4789
|
/* Merge FrtFieldInfos */
|
4806
4790
|
for (j = 0; j < fis_size; j++) {
|
4807
4791
|
FrtFieldInfo *fi = sub_fis->fields[j];
|
@@ -297,7 +297,7 @@ struct FrtSegmentTermDocEnum {
|
|
297
297
|
int num_skips;
|
298
298
|
int skip_interval;
|
299
299
|
int skip_count;
|
300
|
-
int
|
300
|
+
int skip_doc_num;
|
301
301
|
int prx_cnt;
|
302
302
|
int position;
|
303
303
|
frt_off_t frq_ptr;
|
@@ -507,7 +507,7 @@ typedef enum {
|
|
507
507
|
struct FrtIndexReader {
|
508
508
|
int type;
|
509
509
|
int (*num_docs)(FrtIndexReader *ir);
|
510
|
-
int (*
|
510
|
+
int (*max_doc_num)(FrtIndexReader *ir);
|
511
511
|
FrtDocument *(*get_doc)(FrtIndexReader *ir, int doc_num);
|
512
512
|
FrtLazyDoc *(*get_lazy_doc)(FrtIndexReader *ir, int doc_num);
|
513
513
|
frt_uchar *(*get_norms)(FrtIndexReader *ir, int field_num);
|
@@ -595,7 +595,7 @@ extern FrtSegmentReader *frt_sr_alloc();
|
|
595
595
|
|
596
596
|
struct FrtMultiReader {
|
597
597
|
FrtIndexReader ir;
|
598
|
-
int
|
598
|
+
int max_doc_num;
|
599
599
|
int num_docs_cache;
|
600
600
|
int r_cnt;
|
601
601
|
int *starts;
|
@@ -82,7 +82,7 @@ static bool dssc_advance_after_current(FrtScorer *self) {
|
|
82
82
|
/* repeat until minimum number of matches is found */
|
83
83
|
while (true) {
|
84
84
|
FrtScorer *top = (FrtScorer *)frt_pq_top(scorer_queue);
|
85
|
-
self->
|
85
|
+
self->doc_num = top->doc_num;
|
86
86
|
dssc->cum_score = top->score(top);
|
87
87
|
dssc->num_matches = 1;
|
88
88
|
/* Until all sub-scorers are after self->doc */
|
@@ -103,7 +103,7 @@ static bool dssc_advance_after_current(FrtScorer *self) {
|
|
103
103
|
}
|
104
104
|
}
|
105
105
|
top = (FrtScorer *)frt_pq_top(scorer_queue);
|
106
|
-
if (top->
|
106
|
+
if (top->doc_num != self->doc_num) {
|
107
107
|
/* All remaining subscorers are after self->doc */
|
108
108
|
break;
|
109
109
|
} else {
|
@@ -144,12 +144,12 @@ static bool dssc_skip_to(FrtScorer *self, int doc_num) {
|
|
144
144
|
if (scorer_queue->size < dssc->min_num_matches) {
|
145
145
|
return false;
|
146
146
|
}
|
147
|
-
if (doc_num <= self->
|
148
|
-
doc_num = self->
|
147
|
+
if (doc_num <= self->doc_num) {
|
148
|
+
doc_num = self->doc_num + 1;
|
149
149
|
}
|
150
150
|
while (true) {
|
151
151
|
FrtScorer *top = (FrtScorer *)frt_pq_top(scorer_queue);
|
152
|
-
if (top->
|
152
|
+
if (top->doc_num >= doc_num) {
|
153
153
|
return dssc_advance_after_current(self);
|
154
154
|
} else if (top->skip_to(top, doc_num)) {
|
155
155
|
frt_pq_down(scorer_queue);
|
@@ -193,7 +193,7 @@ static FrtScorer *disjunction_sum_scorer_new(FrtScorer **sub_scorers, int ss_cnt
|
|
193
193
|
DSSc(self)->ss_cnt = ss_cnt;
|
194
194
|
|
195
195
|
/* The document number of the current match */
|
196
|
-
self->
|
196
|
+
self->doc_num = -1;
|
197
197
|
DSSc(self)->cum_score = -1.0;
|
198
198
|
|
199
199
|
/* The number of subscorers that provide the current match. */
|
@@ -246,15 +246,15 @@ static FrtScorer *counting_disjunction_sum_scorer_new(
|
|
246
246
|
#define CSc(scorer) ((ConjunctionScorer *)(scorer))
|
247
247
|
|
248
248
|
typedef struct ConjunctionScorer {
|
249
|
-
FrtScorer
|
250
|
-
bool
|
251
|
-
bool
|
252
|
-
float
|
253
|
-
FrtScorer
|
254
|
-
int
|
255
|
-
int
|
256
|
-
Coordinator
|
257
|
-
int
|
249
|
+
FrtScorer super;
|
250
|
+
bool first_time : 1;
|
251
|
+
bool more : 1;
|
252
|
+
float coord;
|
253
|
+
FrtScorer **sub_scorers;
|
254
|
+
int ss_cnt;
|
255
|
+
int first_idx;
|
256
|
+
Coordinator *coordinator;
|
257
|
+
int last_scored_doc_num;
|
258
258
|
} ConjunctionScorer;
|
259
259
|
|
260
260
|
static void csc_sort_scorers(ConjunctionScorer *csc) {
|
@@ -263,8 +263,8 @@ static void csc_sort_scorers(ConjunctionScorer *csc) {
|
|
263
263
|
for (i = 1; i < csc->ss_cnt; i++) {
|
264
264
|
previous = current;
|
265
265
|
current = csc->sub_scorers[i];
|
266
|
-
if (previous->
|
267
|
-
if (!current->skip_to(current, previous->
|
266
|
+
if (previous->doc_num > current->doc_num) {
|
267
|
+
if (!current->skip_to(current, previous->doc_num)) {
|
268
268
|
csc->more = false;
|
269
269
|
return;
|
270
270
|
}
|
@@ -322,15 +322,15 @@ static bool csc_do_next(FrtScorer *self) {
|
|
322
322
|
FrtScorer *last_sc = csc->sub_scorers[FRT_PREV_NUM(first_idx, sub_sc_cnt)];
|
323
323
|
|
324
324
|
/* skip to doc with all clauses */
|
325
|
-
while (csc->more && (first_sc->
|
325
|
+
while (csc->more && (first_sc->doc_num < last_sc->doc_num)) {
|
326
326
|
/* skip first upto last */
|
327
|
-
csc->more = first_sc->skip_to(first_sc, last_sc->
|
327
|
+
csc->more = first_sc->skip_to(first_sc, last_sc->doc_num);
|
328
328
|
/* move first to last */
|
329
329
|
last_sc = first_sc;
|
330
330
|
first_idx = FRT_NEXT_NUM(first_idx, sub_sc_cnt);
|
331
331
|
first_sc = csc->sub_scorers[first_idx];
|
332
332
|
}
|
333
|
-
self->
|
333
|
+
self->doc_num = first_sc->doc_num;
|
334
334
|
csc->first_idx = first_idx;
|
335
335
|
return csc->more;
|
336
336
|
}
|
@@ -404,9 +404,9 @@ static FrtScorer *conjunction_scorer_new(FrtSimilarity *similarity) {
|
|
404
404
|
static float ccsc_score(FrtScorer *self) {
|
405
405
|
ConjunctionScorer *csc = CSc(self);
|
406
406
|
|
407
|
-
int
|
408
|
-
if ((
|
409
|
-
csc->
|
407
|
+
int doc_num;
|
408
|
+
if ((doc_num = self->doc_num) > csc->last_scored_doc_num) {
|
409
|
+
csc->last_scored_doc_num = doc_num;
|
410
410
|
csc->coordinator->num_matches += csc->ss_cnt;
|
411
411
|
}
|
412
412
|
|
@@ -418,7 +418,7 @@ static FrtScorer *counting_conjunction_sum_scorer_new(
|
|
418
418
|
FrtScorer *self = conjunction_scorer_new(frt_sim_create_default());
|
419
419
|
ConjunctionScorer *csc = CSc(self);
|
420
420
|
csc->coordinator = coordinator;
|
421
|
-
csc->
|
421
|
+
csc->last_scored_doc_num = -1;
|
422
422
|
csc->sub_scorers = FRT_ALLOC_N(FrtScorer *, ss_cnt);
|
423
423
|
memcpy(csc->sub_scorers, sub_scorers, sizeof(FrtScorer *) * ss_cnt);
|
424
424
|
csc->ss_cnt = ss_cnt;
|
@@ -449,7 +449,7 @@ static float smsc_score(FrtScorer *self) {
|
|
449
449
|
static bool smsc_next(FrtScorer *self) {
|
450
450
|
FrtScorer *scorer = SMSc(self)->scorer;
|
451
451
|
if (scorer->next(scorer)) {
|
452
|
-
self->
|
452
|
+
self->doc_num = scorer->doc_num;
|
453
453
|
return true;
|
454
454
|
}
|
455
455
|
return false;
|
@@ -458,7 +458,7 @@ static bool smsc_next(FrtScorer *self) {
|
|
458
458
|
static bool smsc_skip_to(FrtScorer *self, int doc_num) {
|
459
459
|
FrtScorer *scorer = SMSc(self)->scorer;
|
460
460
|
if (scorer->skip_to(scorer, doc_num)) {
|
461
|
-
self->
|
461
|
+
self->doc_num = scorer->doc_num;
|
462
462
|
return true;
|
463
463
|
}
|
464
464
|
return false;
|
@@ -505,24 +505,24 @@ static float rossc_score(FrtScorer *self) {
|
|
505
505
|
ReqOptSumScorer *rossc = ROSSc(self);
|
506
506
|
FrtScorer *req_scorer = rossc->req_scorer;
|
507
507
|
FrtScorer *opt_scorer = rossc->opt_scorer;
|
508
|
-
int
|
508
|
+
int cur_doc_num = req_scorer->doc_num;
|
509
509
|
float req_score = req_scorer->score(req_scorer);
|
510
510
|
|
511
511
|
if (rossc->first_time_opt) {
|
512
512
|
rossc->first_time_opt = false;
|
513
|
-
if (! opt_scorer->skip_to(opt_scorer,
|
513
|
+
if (! opt_scorer->skip_to(opt_scorer, cur_doc_num)) {
|
514
514
|
FRT_SCORER_NULLIFY(rossc->opt_scorer);
|
515
515
|
return req_score;
|
516
516
|
}
|
517
517
|
} else if (opt_scorer == NULL) {
|
518
518
|
return req_score;
|
519
|
-
} else if ((opt_scorer->
|
520
|
-
&& ! opt_scorer->skip_to(opt_scorer,
|
519
|
+
} else if ((opt_scorer->doc_num < cur_doc_num)
|
520
|
+
&& ! opt_scorer->skip_to(opt_scorer, cur_doc_num)) {
|
521
521
|
FRT_SCORER_NULLIFY(rossc->opt_scorer);
|
522
522
|
return req_score;
|
523
523
|
}
|
524
524
|
/* assert (@opt_scorer != nil) and (@opt_scorer.doc() >= cur_doc) */
|
525
|
-
return (opt_scorer->
|
525
|
+
return (opt_scorer->doc_num == cur_doc_num)
|
526
526
|
? req_score + opt_scorer->score(opt_scorer)
|
527
527
|
: req_score;
|
528
528
|
}
|
@@ -530,7 +530,7 @@ static float rossc_score(FrtScorer *self) {
|
|
530
530
|
static bool rossc_next(FrtScorer *self) {
|
531
531
|
FrtScorer *req_scorer = ROSSc(self)->req_scorer;
|
532
532
|
if (req_scorer->next(req_scorer)) {
|
533
|
-
self->
|
533
|
+
self->doc_num = req_scorer->doc_num;
|
534
534
|
return true;
|
535
535
|
}
|
536
536
|
return false;
|
@@ -539,7 +539,7 @@ static bool rossc_next(FrtScorer *self) {
|
|
539
539
|
static bool rossc_skip_to(FrtScorer *self, int doc_num) {
|
540
540
|
FrtScorer *req_scorer = ROSSc(self)->req_scorer;
|
541
541
|
if (req_scorer->skip_to(req_scorer, doc_num)) {
|
542
|
-
self->
|
542
|
+
self->doc_num = req_scorer->doc_num;
|
543
543
|
return true;
|
544
544
|
}
|
545
545
|
return false;
|
@@ -598,25 +598,25 @@ typedef struct ReqExclScorer {
|
|
598
598
|
static bool rxsc_to_non_excluded(FrtScorer *self) {
|
599
599
|
FrtScorer *req_scorer = RXSc(self)->req_scorer;
|
600
600
|
FrtScorer *excl_scorer = RXSc(self)->excl_scorer;
|
601
|
-
int
|
601
|
+
int excl_doc_num = excl_scorer->doc_num, req_doc_num;
|
602
602
|
|
603
603
|
do {
|
604
604
|
/* may be excluded */
|
605
|
-
|
606
|
-
if (
|
605
|
+
req_doc_num = req_scorer->doc_num;
|
606
|
+
if (req_doc_num < excl_doc_num) {
|
607
607
|
/* req_scorer advanced to before excl_scorer, ie. not excluded */
|
608
|
-
self->
|
608
|
+
self->doc_num = req_doc_num;
|
609
609
|
return true;
|
610
|
-
} else if (
|
611
|
-
if (! excl_scorer->skip_to(excl_scorer,
|
610
|
+
} else if (req_doc_num > excl_doc_num) {
|
611
|
+
if (! excl_scorer->skip_to(excl_scorer, req_doc_num)) {
|
612
612
|
/* emptied, no more exclusions */
|
613
613
|
FRT_SCORER_NULLIFY(RXSc(self)->excl_scorer);
|
614
|
-
self->
|
614
|
+
self->doc_num = req_doc_num;
|
615
615
|
return true;
|
616
616
|
}
|
617
|
-
|
618
|
-
if (
|
619
|
-
self->
|
617
|
+
excl_doc_num = excl_scorer->doc_num;
|
618
|
+
if (excl_doc_num > req_doc_num) {
|
619
|
+
self->doc_num = req_doc_num;
|
620
620
|
return true; /* not excluded */
|
621
621
|
}
|
622
622
|
}
|
@@ -648,7 +648,7 @@ static bool rxsc_next(FrtScorer *self) {
|
|
648
648
|
return false;
|
649
649
|
}
|
650
650
|
if (excl_scorer == NULL) {
|
651
|
-
self->
|
651
|
+
self->doc_num = req_scorer->doc_num;
|
652
652
|
/* req_scorer->next() already returned true */
|
653
653
|
return true;
|
654
654
|
}
|
@@ -673,7 +673,7 @@ static bool rxsc_skip_to(FrtScorer *self, int doc_num) {
|
|
673
673
|
}
|
674
674
|
if (excl_scorer == NULL) {
|
675
675
|
if (req_scorer->skip_to(req_scorer, doc_num)) {
|
676
|
-
self->
|
676
|
+
self->doc_num = req_scorer->doc_num;
|
677
677
|
return true;
|
678
678
|
}
|
679
679
|
return false;
|
@@ -697,7 +697,7 @@ static FrtExplanation *rxsc_explain(FrtScorer *self, int doc_num) {
|
|
697
697
|
FrtExplanation *e;
|
698
698
|
|
699
699
|
if (excl_scorer->skip_to(excl_scorer, doc_num)
|
700
|
-
&& excl_scorer->
|
700
|
+
&& excl_scorer->doc_num == doc_num) {
|
701
701
|
e = frt_expl_new(0.0, "excluded:");
|
702
702
|
} else {
|
703
703
|
e = frt_expl_new(0.0, "not excluded:");
|
@@ -927,7 +927,7 @@ static bool bsc_next(FrtScorer *self) {
|
|
927
927
|
cnt_sum_sc = bsc_init_counting_sum_scorer(BSc(self));
|
928
928
|
}
|
929
929
|
if (cnt_sum_sc->next(cnt_sum_sc)) {
|
930
|
-
self->
|
930
|
+
self->doc_num = cnt_sum_sc->doc_num;
|
931
931
|
return true;
|
932
932
|
} else {
|
933
933
|
return false;
|
@@ -941,7 +941,7 @@ static bool bsc_skip_to(FrtScorer *self, int doc_num) {
|
|
941
941
|
cnt_sum_sc = bsc_init_counting_sum_scorer(BSc(self));
|
942
942
|
}
|
943
943
|
if (cnt_sum_sc->skip_to(cnt_sum_sc, doc_num)) {
|
944
|
-
self->
|
944
|
+
self->doc_num = cnt_sum_sc->doc_num;
|
945
945
|
return true;
|
946
946
|
} else {
|
947
947
|
return false;
|
@@ -21,11 +21,11 @@ static float cssc_score(FrtScorer *self) {
|
|
21
21
|
}
|
22
22
|
|
23
23
|
static bool cssc_next(FrtScorer *self) {
|
24
|
-
return ((self->
|
24
|
+
return ((self->doc_num = frt_bv_scan_next(CScSc(self)->bv)) >= 0);
|
25
25
|
}
|
26
26
|
|
27
27
|
static bool cssc_skip_to(FrtScorer *self, int doc_num) {
|
28
|
-
return ((self->
|
28
|
+
return ((self->doc_num = frt_bv_scan_next_from(CScSc(self)->bv, doc_num)) >= 0);
|
29
29
|
}
|
30
30
|
|
31
31
|
static FrtExplanation *cssc_explain(FrtScorer *self, int doc_num) {
|