isomorfeus-ferret 0.17.3 → 0.17.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +48 -67
- data/ext/isomorfeus_ferret_ext/frb_search.c +47 -47
- data/ext/isomorfeus_ferret_ext/frt_document.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_filter.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +46 -62
- data/ext/isomorfeus_ferret_ext/frt_index.h +3 -3
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +48 -48
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +4 -4
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +10 -10
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +26 -26
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -12
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +144 -145
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +9 -9
- data/ext/isomorfeus_ferret_ext/frt_search.c +31 -31
- data/ext/isomorfeus_ferret_ext/frt_search.h +6 -6
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_sort.c +20 -20
- data/ext/isomorfeus_ferret_ext/test.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_filter.c +5 -6
- data/ext/isomorfeus_ferret_ext/test_index.c +30 -32
- data/ext/isomorfeus_ferret_ext/test_search.c +7 -7
- data/ext/isomorfeus_ferret_ext/test_sort.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_threading.c +1 -1
- data/lib/isomorfeus/ferret/index/index.rb +7 -7
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +12 -6
@@ -19,7 +19,7 @@
|
|
19
19
|
|
20
20
|
typedef struct TermScorer {
|
21
21
|
FrtScorer super;
|
22
|
-
int
|
22
|
+
int doc_nums[TDE_READ_SIZE];
|
23
23
|
int freqs[TDE_READ_SIZE];
|
24
24
|
int pointer;
|
25
25
|
int pointer_max;
|
@@ -42,7 +42,7 @@ static float tsc_score(FrtScorer *self) {
|
|
42
42
|
score = frt_sim_tf(self->similarity, (float)freq) * ts->weight_value;
|
43
43
|
}
|
44
44
|
/* normalize for field */
|
45
|
-
score *= frt_sim_decode_norm(self->similarity, ts->norms[self->
|
45
|
+
score *= frt_sim_decode_norm(self->similarity, ts->norms[self->doc_num]);
|
46
46
|
return score;
|
47
47
|
}
|
48
48
|
|
@@ -52,14 +52,14 @@ static bool tsc_next(FrtScorer *self) {
|
|
52
52
|
ts->pointer++;
|
53
53
|
if (ts->pointer >= ts->pointer_max) {
|
54
54
|
/* refill buffer */
|
55
|
-
ts->pointer_max = ts->tde->read(ts->tde, ts->
|
55
|
+
ts->pointer_max = ts->tde->read(ts->tde, ts->doc_nums, ts->freqs, TDE_READ_SIZE);
|
56
56
|
if (ts->pointer_max != 0) {
|
57
57
|
ts->pointer = 0;
|
58
58
|
} else {
|
59
59
|
return false;
|
60
60
|
}
|
61
61
|
}
|
62
|
-
self->
|
62
|
+
self->doc_num = ts->doc_nums[ts->pointer];
|
63
63
|
return true;
|
64
64
|
}
|
65
65
|
|
@@ -69,8 +69,8 @@ static bool tsc_skip_to(FrtScorer *self, int doc_num) {
|
|
69
69
|
|
70
70
|
/* first scan in cache */
|
71
71
|
while (++(ts->pointer) < ts->pointer_max) {
|
72
|
-
if (ts->
|
73
|
-
self->
|
72
|
+
if (ts->doc_nums[ts->pointer] >= doc_num) {
|
73
|
+
self->doc_num = ts->doc_nums[ts->pointer];
|
74
74
|
return true;
|
75
75
|
}
|
76
76
|
}
|
@@ -79,7 +79,7 @@ static bool tsc_skip_to(FrtScorer *self, int doc_num) {
|
|
79
79
|
if (tde->skip_to(tde, doc_num)) {
|
80
80
|
ts->pointer_max = 1;
|
81
81
|
ts->pointer = 0;
|
82
|
-
ts->
|
82
|
+
ts->doc_nums[0] = self->doc_num = tde->doc_num(tde);
|
83
83
|
ts->freqs[0] = tde->freq(tde);
|
84
84
|
return true;
|
85
85
|
} else {
|
@@ -93,7 +93,7 @@ static FrtExplanation *tsc_explain(FrtScorer *self, int doc_num) {
|
|
93
93
|
int tf = 0;
|
94
94
|
|
95
95
|
tsc_skip_to(self, doc_num);
|
96
|
-
if (self->
|
96
|
+
if (self->doc_num == doc_num) {
|
97
97
|
tf = ts->freqs[ts->pointer];
|
98
98
|
}
|
99
99
|
return frt_expl_new(frt_sim_tf(self->similarity, (float)tf),
|
@@ -208,7 +208,7 @@ static FrtWeight *tw_new(FrtQuery *query, FrtSearcher *searcher) {
|
|
208
208
|
searcher->doc_freq(searcher,
|
209
209
|
TQ(query)->field,
|
210
210
|
TQ(query)->term),
|
211
|
-
searcher->
|
211
|
+
searcher->max_doc_num(searcher)); /* compute idf */
|
212
212
|
|
213
213
|
return self;
|
214
214
|
}
|
@@ -76,7 +76,7 @@ char *frt_expl_to_html(FrtExplanation *expl) {
|
|
76
76
|
|
77
77
|
static bool hit_lt(FrtHit *hit1, FrtHit *hit2) {
|
78
78
|
if (hit1->score == hit2->score) {
|
79
|
-
return hit1->
|
79
|
+
return hit1->doc_num > hit2->doc_num;
|
80
80
|
} else {
|
81
81
|
return hit1->score < hit2->score;
|
82
82
|
}
|
@@ -188,7 +188,7 @@ char *frt_td_to_s(FrtTopDocs *td) {
|
|
188
188
|
td->total_hits);
|
189
189
|
for (i = 0; i < td->size; i++) {
|
190
190
|
hit = td->hits[i];
|
191
|
-
frt_estrcat(buffer, frt_strfmt("\t%d:%f\n", hit->
|
191
|
+
frt_estrcat(buffer, frt_strfmt("\t%d:%f\n", hit->doc_num, hit->score));
|
192
192
|
}
|
193
193
|
return buffer;
|
194
194
|
}
|
@@ -432,11 +432,11 @@ FrtScorer *frt_scorer_create(size_t size, FrtSimilarity *similarity) {
|
|
432
432
|
}
|
433
433
|
|
434
434
|
bool frt_scorer_doc_less_than(const FrtScorer *s1, const FrtScorer *s2) {
|
435
|
-
return s1->
|
435
|
+
return s1->doc_num < s2->doc_num;
|
436
436
|
}
|
437
437
|
|
438
438
|
int frt_scorer_doc_cmp(const void *p1, const void *p2) {
|
439
|
-
return (*(FrtScorer **)p1)->
|
439
|
+
return (*(FrtScorer **)p1)->doc_num - (*(FrtScorer **)p2)->doc_num;
|
440
440
|
}
|
441
441
|
|
442
442
|
/***************************************************************************
|
@@ -912,9 +912,9 @@ static FrtLazyDoc *isea_get_lazy_doc(FrtSearcher *self, int doc_num) {
|
|
912
912
|
return ir->get_lazy_doc(ir, doc_num);
|
913
913
|
}
|
914
914
|
|
915
|
-
static int
|
915
|
+
static int isea_max_doc_num(FrtSearcher *self) {
|
916
916
|
FrtIndexReader *ir = ISEA(self)->ir;
|
917
|
-
return ir->
|
917
|
+
return ir->max_doc_num(ir);
|
918
918
|
}
|
919
919
|
|
920
920
|
#define IS_FILTERED(bits, post_filter, scorer, searcher) \
|
@@ -976,10 +976,10 @@ static FrtTopDocs *isea_search_w(FrtSearcher *self,
|
|
976
976
|
}
|
977
977
|
|
978
978
|
while (scorer->next(scorer)) {
|
979
|
-
if (bits && !frt_bv_get(bits, scorer->
|
979
|
+
if (bits && !frt_bv_get(bits, scorer->doc_num)) continue;
|
980
980
|
score = scorer->score(scorer);
|
981
981
|
if (post_filter &&
|
982
|
-
!(filter_factor = post_filter->filter_func(scorer->
|
982
|
+
!(filter_factor = post_filter->filter_func(scorer->doc_num,
|
983
983
|
score,
|
984
984
|
self,
|
985
985
|
post_filter->arg))) {
|
@@ -988,7 +988,7 @@ static FrtTopDocs *isea_search_w(FrtSearcher *self,
|
|
988
988
|
total_hits++;
|
989
989
|
if (filter_factor < 1.0f) score *= filter_factor;
|
990
990
|
if (score > max_score) max_score = score;
|
991
|
-
hit.
|
991
|
+
hit.doc_num = scorer->doc_num; hit.score = score;
|
992
992
|
hq_insert(hq, &hit);
|
993
993
|
}
|
994
994
|
scorer->destroy(scorer);
|
@@ -1039,16 +1039,16 @@ static void isea_search_each_w(FrtSearcher *self, FrtWeight *weight, FrtFilter *
|
|
1039
1039
|
|
1040
1040
|
while (scorer->next(scorer)) {
|
1041
1041
|
float score;
|
1042
|
-
if (bits && !frt_bv_get(bits, scorer->
|
1042
|
+
if (bits && !frt_bv_get(bits, scorer->doc_num)) continue;
|
1043
1043
|
score = scorer->score(scorer);
|
1044
1044
|
if (post_filter &&
|
1045
|
-
!(filter_factor = post_filter->filter_func(scorer->
|
1045
|
+
!(filter_factor = post_filter->filter_func(scorer->doc_num,
|
1046
1046
|
score,
|
1047
1047
|
self,
|
1048
1048
|
post_filter->arg))) {
|
1049
1049
|
continue;
|
1050
1050
|
}
|
1051
|
-
fn(self, scorer->
|
1051
|
+
fn(self, scorer->doc_num, filter_factor * score, arg);
|
1052
1052
|
}
|
1053
1053
|
scorer->destroy(scorer);
|
1054
1054
|
}
|
@@ -1076,7 +1076,7 @@ static int isea_search_unscored_w(FrtSearcher *self, FrtWeight *weight, int *buf
|
|
1076
1076
|
if (scorer) {
|
1077
1077
|
if (scorer->skip_to(scorer, offset_docnum)) {
|
1078
1078
|
do {
|
1079
|
-
buf[count++] = scorer->
|
1079
|
+
buf[count++] = scorer->doc_num;
|
1080
1080
|
} while (count < limit && scorer->next(scorer));
|
1081
1081
|
}
|
1082
1082
|
scorer->destroy(scorer);
|
@@ -1142,7 +1142,7 @@ FrtSearcher *frt_isea_init(FrtSearcher *self, FrtIndexReader *ir) {
|
|
1142
1142
|
self->doc_freq = &frt_isea_doc_freq;
|
1143
1143
|
self->get_doc = &isea_get_doc;
|
1144
1144
|
self->get_lazy_doc = &isea_get_lazy_doc;
|
1145
|
-
self->
|
1145
|
+
self->max_doc_num = &isea_max_doc_num;
|
1146
1146
|
self->create_weight = &sea_create_weight;
|
1147
1147
|
self->search = &isea_search;
|
1148
1148
|
self->search_w = &isea_search_w;
|
@@ -1175,7 +1175,7 @@ FrtSearcher *frt_isea_new(FrtIndexReader *ir) {
|
|
1175
1175
|
typedef struct CachedDFSearcher {
|
1176
1176
|
FrtSearcher super;
|
1177
1177
|
FrtHash *df_map;
|
1178
|
-
int
|
1178
|
+
int max_doc_num;
|
1179
1179
|
} CachedDFSearcher;
|
1180
1180
|
|
1181
1181
|
static int cdfsea_doc_freq(FrtSearcher *self, ID field, const char *text) {
|
@@ -1193,9 +1193,9 @@ static FrtDocument *cdfsea_get_doc(FrtSearcher *self, int doc_num) {
|
|
1193
1193
|
return NULL;
|
1194
1194
|
}
|
1195
1195
|
|
1196
|
-
static int
|
1196
|
+
static int cdfsea_max_doc_num(FrtSearcher *self) {
|
1197
1197
|
(void)self;
|
1198
|
-
return CDFSEA(self)->
|
1198
|
+
return CDFSEA(self)->max_doc_num;
|
1199
1199
|
}
|
1200
1200
|
|
1201
1201
|
static FrtWeight *cdfsea_create_weight(FrtSearcher *self, FrtQuery *query) {
|
@@ -1269,16 +1269,16 @@ static void cdfsea_close(FrtSearcher *self) {
|
|
1269
1269
|
free(self);
|
1270
1270
|
}
|
1271
1271
|
|
1272
|
-
static FrtSearcher *cdfsea_new(FrtHash *df_map, int
|
1273
|
-
FrtSearcher *self
|
1272
|
+
static FrtSearcher *cdfsea_new(FrtHash *df_map, int max_doc_num) {
|
1273
|
+
FrtSearcher *self = (FrtSearcher *)FRT_ALLOC(CachedDFSearcher);
|
1274
1274
|
|
1275
1275
|
CDFSEA(self)->df_map = df_map;
|
1276
|
-
CDFSEA(self)->
|
1276
|
+
CDFSEA(self)->max_doc_num = max_doc_num;
|
1277
1277
|
|
1278
1278
|
self->similarity = frt_sim_create_default();
|
1279
1279
|
self->doc_freq = &cdfsea_doc_freq;
|
1280
1280
|
self->get_doc = &cdfsea_get_doc;
|
1281
|
-
self->
|
1281
|
+
self->max_doc_num = &cdfsea_max_doc_num;
|
1282
1282
|
self->create_weight = &cdfsea_create_weight;
|
1283
1283
|
self->search = &cdfsea_search;
|
1284
1284
|
self->search_w = &cdfsea_search_w;
|
@@ -1350,8 +1350,8 @@ static FrtLazyDoc *msea_get_lazy_doc(FrtSearcher *self, int doc_num) {
|
|
1350
1350
|
return s->get_lazy_doc(s, doc_num - msea->starts[i]);
|
1351
1351
|
}
|
1352
1352
|
|
1353
|
-
static int
|
1354
|
-
return MSEA(self)->
|
1353
|
+
static int msea_max_doc_num(FrtSearcher *self) {
|
1354
|
+
return MSEA(self)->max_doc_num;
|
1355
1355
|
}
|
1356
1356
|
|
1357
1357
|
static int *msea_get_doc_freqs(FrtSearcher *self, FrtHashSet *terms) {
|
@@ -1389,7 +1389,7 @@ static FrtWeight *msea_create_weight(FrtSearcher *self, FrtQuery *query) {
|
|
1389
1389
|
frt_hs_destroy(terms);
|
1390
1390
|
free(doc_freqs);
|
1391
1391
|
|
1392
|
-
cdfsea = cdfsea_new(df_map, MSEA(self)->
|
1392
|
+
cdfsea = cdfsea_new(df_map, MSEA(self)->max_doc_num);
|
1393
1393
|
|
1394
1394
|
w = frt_q_weight(rewritten_query, cdfsea);
|
1395
1395
|
frt_q_deref(rewritten_query);
|
@@ -1546,7 +1546,7 @@ static FrtTopDocs *msea_search_w(FrtSearcher *self,
|
|
1546
1546
|
int start = MSEA(self)->starts[i];
|
1547
1547
|
for (j = 0; j < td->size; j++) {
|
1548
1548
|
FrtHit *hit = td->hits[j];
|
1549
|
-
hit->
|
1549
|
+
hit->doc_num += start;
|
1550
1550
|
hq_insert(hq, hit);
|
1551
1551
|
}
|
1552
1552
|
td->size = 0;
|
@@ -1657,25 +1657,25 @@ FrtSearcher *frt_msea_alloc(void) {
|
|
1657
1657
|
}
|
1658
1658
|
|
1659
1659
|
FrtSearcher *frt_msea_init(FrtSearcher *self, FrtSearcher **searchers, int s_cnt) {
|
1660
|
-
int i,
|
1660
|
+
int i, max_doc_num = 0;
|
1661
1661
|
int *starts = FRT_ALLOC_N(int, s_cnt + 1);
|
1662
1662
|
for (i = 0; i < s_cnt; i++) {
|
1663
|
-
starts[i] =
|
1664
|
-
|
1663
|
+
starts[i] = max_doc_num;
|
1664
|
+
max_doc_num += searchers[i]->max_doc_num(searchers[i]);
|
1665
1665
|
FRT_REF(searchers[i]);
|
1666
1666
|
}
|
1667
|
-
starts[i] =
|
1667
|
+
starts[i] = max_doc_num;
|
1668
1668
|
|
1669
1669
|
MSEA(self)->s_cnt = s_cnt;
|
1670
1670
|
MSEA(self)->searchers = searchers;
|
1671
1671
|
MSEA(self)->starts = starts;
|
1672
|
-
MSEA(self)->
|
1672
|
+
MSEA(self)->max_doc_num = max_doc_num;
|
1673
1673
|
self->ref_cnt = 1;
|
1674
1674
|
self->similarity = frt_sim_create_default();
|
1675
1675
|
self->doc_freq = &msea_doc_freq;
|
1676
1676
|
self->get_doc = &msea_get_doc;
|
1677
1677
|
self->get_lazy_doc = &msea_get_lazy_doc;
|
1678
|
-
self->
|
1678
|
+
self->max_doc_num = &msea_max_doc_num;
|
1679
1679
|
self->create_weight = &msea_create_weight;
|
1680
1680
|
self->search = &msea_search;
|
1681
1681
|
self->search_w = &msea_search_w;
|
@@ -68,7 +68,7 @@ extern FrtMatchVector *frt_matchv_compact_with_breaks(FrtMatchVector *self);
|
|
68
68
|
***************************************************************************/
|
69
69
|
|
70
70
|
typedef struct FrtHit {
|
71
|
-
int
|
71
|
+
int doc_num;
|
72
72
|
float score;
|
73
73
|
} FrtHit;
|
74
74
|
|
@@ -504,7 +504,7 @@ struct FrtSpanEnum {
|
|
504
504
|
FrtQuery *query;
|
505
505
|
bool (*next)(FrtSpanEnum *self);
|
506
506
|
bool (*skip_to)(FrtSpanEnum *self, int target_doc);
|
507
|
-
int (*
|
507
|
+
int (*doc_num)(FrtSpanEnum *self);
|
508
508
|
int (*start)(FrtSpanEnum *self);
|
509
509
|
int (*end)(FrtSpanEnum *self);
|
510
510
|
char *(*to_s)(FrtSpanEnum *self);
|
@@ -645,7 +645,7 @@ extern FrtQuery *frt_spanprq_new(ID field, const char *prefix);
|
|
645
645
|
|
646
646
|
struct FrtScorer {
|
647
647
|
FrtSimilarity *similarity;
|
648
|
-
int
|
648
|
+
int doc_num;
|
649
649
|
float (*score)(FrtScorer *self);
|
650
650
|
bool (*next)(FrtScorer *self);
|
651
651
|
bool (*skip_to)(FrtScorer *self, int doc_num);
|
@@ -795,7 +795,7 @@ struct FrtSearcher {
|
|
795
795
|
int (*doc_freq)(FrtSearcher *self, ID field, const char *term);
|
796
796
|
FrtDocument *(*get_doc)(FrtSearcher *self, int doc_num);
|
797
797
|
FrtLazyDoc *(*get_lazy_doc)(FrtSearcher *self, int doc_num);
|
798
|
-
int (*
|
798
|
+
int (*max_doc_num)(FrtSearcher *self);
|
799
799
|
FrtWeight *(*create_weight)(FrtSearcher *self, FrtQuery *query);
|
800
800
|
FrtTopDocs *(*search)(FrtSearcher *self, FrtQuery *query, int first_doc, int num_docs, FrtFilter *filter, FrtSort *sort, FrtPostFilter *post_filter, bool load_fields);
|
801
801
|
FrtTopDocs *(*search_w)(FrtSearcher *self, FrtWeight *weight, int first_doc, int num_docs, FrtFilter *filter, FrtSort *sort, FrtPostFilter *post_filter, bool load_fields);
|
@@ -822,7 +822,7 @@ struct FrtSearcher {
|
|
822
822
|
|
823
823
|
#define frt_searcher_get_doc(s, dn) s->get_doc(s, dn)
|
824
824
|
#define frt_searcher_get_lazy_doc(s, dn) s->get_lazy_doc(s, dn)
|
825
|
-
#define frt_searcher_max_doc(s) s->
|
825
|
+
#define frt_searcher_max_doc(s) s->max_doc_num(s)
|
826
826
|
#define frt_searcher_rewrite(s, q) s->rewrite(s, q)
|
827
827
|
#define frt_searcher_explain(s, q, dn) s->explain(s, q, dn)
|
828
828
|
#define frt_searcher_close(s) s->close(s)
|
@@ -868,7 +868,7 @@ typedef struct FrtMultiSearcher {
|
|
868
868
|
int s_cnt;
|
869
869
|
FrtSearcher **searchers;
|
870
870
|
int *starts;
|
871
|
-
int
|
871
|
+
int max_doc_num;
|
872
872
|
} FrtMultiSearcher;
|
873
873
|
|
874
874
|
extern FrtSearcher *frt_msea_alloc(void);
|
@@ -63,7 +63,7 @@ static float simdef_sloppy_freq(struct FrtSimilarity *s, int distance) {
|
|
63
63
|
|
64
64
|
static float simdef_idf_term(struct FrtSimilarity *s, ID field, char *term, FrtSearcher *searcher) {
|
65
65
|
return s->idf(s, searcher->doc_freq(searcher, field, term),
|
66
|
-
searcher->
|
66
|
+
searcher->max_doc_num(searcher));
|
67
67
|
}
|
68
68
|
|
69
69
|
static float simdef_idf_phrase(struct FrtSimilarity *s, ID field, FrtPhrasePosition *positions, int pp_cnt, FrtSearcher *searcher) {
|
@@ -145,12 +145,12 @@ FrtSortField *FRT_SORT_FIELD_SCORE_REV;
|
|
145
145
|
|
146
146
|
void frt_sort_field_doc_get_val(void *index, FrtHit *hit, FrtComparable *comparable) {
|
147
147
|
(void)index;
|
148
|
-
comparable->val.l = hit->
|
148
|
+
comparable->val.l = hit->doc_num;
|
149
149
|
}
|
150
150
|
|
151
151
|
int frt_sort_field_doc_compare(void *index_ptr, FrtHit *hit1, FrtHit *hit2) {
|
152
|
-
int val1 = hit1->
|
153
|
-
int val2 = hit2->
|
152
|
+
int val1 = hit1->doc_num;
|
153
|
+
int val2 = hit2->doc_num;
|
154
154
|
(void)index_ptr;
|
155
155
|
|
156
156
|
if (val1 > val2) return 1;
|
@@ -174,12 +174,12 @@ FrtSortField *FRT_SORT_FIELD_DOC_REV;
|
|
174
174
|
***************************************************************************/
|
175
175
|
|
176
176
|
static void sf_byte_get_val(void *index, FrtHit *hit, FrtComparable *comparable) {
|
177
|
-
comparable->val.l = ((long *)index)[hit->
|
177
|
+
comparable->val.l = ((long *)index)[hit->doc_num];
|
178
178
|
}
|
179
179
|
|
180
180
|
static int sf_byte_compare(void *index, FrtHit *hit1, FrtHit *hit2) {
|
181
|
-
long val1 = ((long *)index)[hit1->
|
182
|
-
long val2 = ((long *)index)[hit2->
|
181
|
+
long val1 = ((long *)index)[hit1->doc_num];
|
182
|
+
long val2 = ((long *)index)[hit2->doc_num];
|
183
183
|
if (val1 > val2) return 1;
|
184
184
|
else if (val1 < val2) return -1;
|
185
185
|
else return 0;
|
@@ -198,12 +198,12 @@ FrtSortField *frt_sort_field_byte_new(ID field, bool reverse) {
|
|
198
198
|
***************************************************************************/
|
199
199
|
|
200
200
|
static void sf_int_get_val(void *index, FrtHit *hit, FrtComparable *comparable) {
|
201
|
-
comparable->val.l = ((long *)index)[hit->
|
201
|
+
comparable->val.l = ((long *)index)[hit->doc_num];
|
202
202
|
}
|
203
203
|
|
204
204
|
static int sf_int_compare(void *index, FrtHit *hit1, FrtHit *hit2) {
|
205
|
-
long val1 = ((long *)index)[hit1->
|
206
|
-
long val2 = ((long *)index)[hit2->
|
205
|
+
long val1 = ((long *)index)[hit1->doc_num];
|
206
|
+
long val2 = ((long *)index)[hit2->doc_num];
|
207
207
|
if (val1 > val2) return 1;
|
208
208
|
else if (val1 < val2) return -1;
|
209
209
|
else return 0;
|
@@ -222,12 +222,12 @@ FrtSortField *frt_sort_field_int_new(ID field, bool reverse) {
|
|
222
222
|
***************************************************************************/
|
223
223
|
|
224
224
|
static void sf_float_get_val(void *index, FrtHit *hit, FrtComparable *comparable) {
|
225
|
-
comparable->val.f = ((float *)index)[hit->
|
225
|
+
comparable->val.f = ((float *)index)[hit->doc_num];
|
226
226
|
}
|
227
227
|
|
228
228
|
static int sf_float_compare(void *index, FrtHit *hit1, FrtHit *hit2) {
|
229
|
-
float val1 = ((float *)index)[hit1->
|
230
|
-
float val2 = ((float *)index)[hit2->
|
229
|
+
float val1 = ((float *)index)[hit1->doc_num];
|
230
|
+
float val2 = ((float *)index)[hit2->doc_num];
|
231
231
|
if (val1 > val2) return 1;
|
232
232
|
else if (val1 < val2) return -1;
|
233
233
|
else return 0;
|
@@ -248,14 +248,14 @@ FrtSortField *frt_sort_field_float_new(ID field, bool reverse) {
|
|
248
248
|
static void sf_string_get_val(void *index, FrtHit *hit, FrtComparable *comparable) {
|
249
249
|
comparable->val.s
|
250
250
|
= ((FrtStringIndex *)index)->values[
|
251
|
-
((FrtStringIndex *)index)->index[hit->
|
251
|
+
((FrtStringIndex *)index)->index[hit->doc_num]];
|
252
252
|
}
|
253
253
|
|
254
254
|
static int sf_string_compare(void *index, FrtHit *hit1, FrtHit *hit2) {
|
255
255
|
char *s1 = ((FrtStringIndex *)index)->values[
|
256
|
-
((FrtStringIndex *)index)->index[hit1->
|
256
|
+
((FrtStringIndex *)index)->index[hit1->doc_num]];
|
257
257
|
char *s2 = ((FrtStringIndex *)index)->values[
|
258
|
-
((FrtStringIndex *)index)->index[hit2->
|
258
|
+
((FrtStringIndex *)index)->index[hit2->doc_num]];
|
259
259
|
|
260
260
|
if (s1 == NULL) return s2 ? 1 : 0;
|
261
261
|
if (s2 == NULL) return -1;
|
@@ -427,7 +427,7 @@ static bool fshq_lt(Sorter *sorter, FrtHit *hit1, FrtHit *hit2) {
|
|
427
427
|
if (diff != 0) {
|
428
428
|
return diff > 0;
|
429
429
|
} else {
|
430
|
-
return hit1->
|
430
|
+
return hit1->doc_num > hit2->doc_num;
|
431
431
|
}
|
432
432
|
}
|
433
433
|
|
@@ -584,7 +584,7 @@ bool frt_fdshq_lt(FrtFieldDoc *fd1, FrtFieldDoc *fd2) {
|
|
584
584
|
else { all_equal = false; }
|
585
585
|
break;
|
586
586
|
case FRT_SORT_TYPE_DOC:
|
587
|
-
if (fd1->hit.
|
587
|
+
if (fd1->hit.doc_num < fd2->hit.doc_num) { all_equal = false; c = true; }
|
588
588
|
break;
|
589
589
|
case FRT_SORT_TYPE_INTEGER:
|
590
590
|
if (cmps1[i].val.l < cmps2[i].val.l) { all_equal = false; c = true; }
|
@@ -631,7 +631,7 @@ bool frt_fdshq_lt(FrtFieldDoc *fd1, FrtFieldDoc *fd2) {
|
|
631
631
|
else { all_equal = false; }
|
632
632
|
break;
|
633
633
|
case FRT_SORT_TYPE_DOC:
|
634
|
-
if (fd1->hit.
|
634
|
+
if (fd1->hit.doc_num > fd2->hit.doc_num) { all_equal = false; c = true; }
|
635
635
|
break;
|
636
636
|
case FRT_SORT_TYPE_INTEGER:
|
637
637
|
if (cmps1[i].val.l > cmps2[i].val.l) { all_equal = false; c = true; }
|
@@ -670,9 +670,9 @@ bool frt_fdshq_lt(FrtFieldDoc *fd1, FrtFieldDoc *fd2) {
|
|
670
670
|
}
|
671
671
|
if (all_equal) {
|
672
672
|
if (cmps1[0].reverse) {
|
673
|
-
if (fd1->hit.
|
673
|
+
if (fd1->hit.doc_num > fd2->hit.doc_num) c = true;
|
674
674
|
} else {
|
675
|
-
if (fd1->hit.
|
675
|
+
if (fd1->hit.doc_num > fd2->hit.doc_num) c = true;
|
676
676
|
}
|
677
677
|
}
|
678
678
|
return c;
|
@@ -250,7 +250,7 @@ static const char *curr_err_func = "";
|
|
250
250
|
static void vappend_to_msg_buf(const char *fmt, va_list args) {
|
251
251
|
int v = vsnprintf(msg_bufp, MSG_BUF_HAVE, fmt, args);
|
252
252
|
if (v < 0) {
|
253
|
-
rb_raise(rb_eStandardError, "Error: can't write to test message buffer\n");
|
253
|
+
rb_raise(rb_eStandardError, "Error: can't write to test message buffer, error: %i\n", v);
|
254
254
|
} else {
|
255
255
|
msg_bufp += v;
|
256
256
|
}
|
@@ -16,7 +16,6 @@ const FrtConfig lucene_config = {
|
|
16
16
|
10000, /* maximum field length (number of terms) */
|
17
17
|
};
|
18
18
|
|
19
|
-
|
20
19
|
static FrtFieldInfos *prep_fis(void) {
|
21
20
|
return frt_fis_new(0 | FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM);
|
22
21
|
}
|
@@ -72,7 +71,7 @@ static void test_delete_leftover_files(TestCase *tc, void *data) {
|
|
72
71
|
/* Delete one doc so we get a .del file: */
|
73
72
|
ir = frt_ir_open(NULL, store);
|
74
73
|
frt_ir_delete_doc(ir, 7);
|
75
|
-
Aiequal(1, ir->
|
74
|
+
Aiequal(1, ir->max_doc_num(ir) - ir->num_docs(ir));
|
76
75
|
|
77
76
|
/* Set one norm so we get a .s0 file: */
|
78
77
|
frt_ir_set_norm(ir, 21, rb_intern(content_f), 12);
|
@@ -65,22 +65,21 @@ static void check_filtered_hits(TestCase *tc, FrtSearcher *searcher, FrtQuery *q
|
|
65
65
|
Aiequal(total_hits, top_docs->size);
|
66
66
|
|
67
67
|
if ((top >= 0) && top_docs->size) {
|
68
|
-
Aiequal(top, top_docs->hits[0]->
|
68
|
+
Aiequal(top, top_docs->hits[0]->doc_num);
|
69
69
|
}
|
70
70
|
|
71
71
|
for (i = 0; i < top_docs->size; i++) {
|
72
72
|
FrtHit *hit = top_docs->hits[i];
|
73
73
|
char buf[1000];
|
74
|
-
sprintf(buf, "doc %d was found unexpectedly", hit->
|
75
|
-
Assert(frt_ary_includes(num_array, total_hits, hit->
|
74
|
+
sprintf(buf, "doc %d was found unexpectedly", hit->doc_num);
|
75
|
+
Assert(frt_ary_includes(num_array, total_hits, hit->doc_num), buf);
|
76
76
|
/* only check the explanation if we got the correct docs. Obviously we
|
77
77
|
* might want to remove this to visually check the explanations */
|
78
78
|
if (total_hits == top_docs->total_hits) {
|
79
|
-
FrtExplanation *e = searcher->explain(searcher, query, hit->
|
79
|
+
FrtExplanation *e = searcher->explain(searcher, query, hit->doc_num);
|
80
80
|
float escore = e->value;
|
81
81
|
if (post_filter) {
|
82
|
-
escore *= post_filter->filter_func(hit->
|
83
|
-
post_filter->arg);
|
82
|
+
escore *= post_filter->filter_func(hit->doc_num, escore, searcher, post_filter->arg);
|
84
83
|
}
|
85
84
|
Afequal(hit->score, escore);
|
86
85
|
frt_expl_destroy(e);
|