ferret 0.9.4 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +1 -1
- data/Rakefile +1 -0
- data/ext/field.c +87 -87
- data/ext/index.h +253 -255
- data/ext/index_io.c +15 -6
- data/ext/index_rw.c +6 -0
- data/ext/nix_io.c +4 -6
- data/ext/q_boolean.c +0 -6
- data/ext/q_fuzzy.c +10 -7
- data/ext/q_multi_phrase.c +2 -2
- data/ext/q_term.c +2 -2
- data/ext/q_wildcard.c +5 -4
- data/ext/search.c +3 -5
- data/ext/search.h +439 -400
- data/ext/store.h +1 -0
- data/ext/termdocs.c +3 -7
- data/ext/vector.c +1 -1
- data/lib/ferret.rb +1 -1
- data/lib/ferret/store/ram_store.rb +5 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/index/tc_index_reader.rb +6 -1
- data/test/unit/search/tc_search_and_sort.rb +1 -1
- data/test/unit/store/tc_fs_store.rb +1 -1
- metadata +4 -4
data/ext/index_io.c
CHANGED
@@ -252,14 +252,23 @@ is_read_vint(InStream *is)
|
|
252
252
|
}
|
253
253
|
|
254
254
|
inline void
|
255
|
-
|
255
|
+
is_skip_vints(InStream *is, register int cnt)
|
256
256
|
{
|
257
|
-
|
258
|
-
|
259
|
-
|
257
|
+
for (; cnt > 0; cnt--) {
|
258
|
+
while ((is_read_byte(is) & 0x80) != 0) {
|
259
|
+
}
|
260
|
+
}
|
261
|
+
}
|
260
262
|
|
261
|
-
|
262
|
-
|
263
|
+
inline void
|
264
|
+
is_read_chars(InStream *is, char* buffer, int off, int len)
|
265
|
+
{
|
266
|
+
int end, i;
|
267
|
+
|
268
|
+
end = off + len;
|
269
|
+
|
270
|
+
for(i = off; i < end; i++) {
|
271
|
+
buffer[i] = is_read_byte(is);
|
263
272
|
}
|
264
273
|
}
|
265
274
|
|
data/ext/index_rw.c
CHANGED
@@ -186,6 +186,8 @@ void dw_invert_doc(DocumentWriter *self, Document *doc)
|
|
186
186
|
TokenStream *stream;
|
187
187
|
Token *token;
|
188
188
|
FieldInfo *fi;
|
189
|
+
char text_buf[MAX_WORD_SIZE];
|
190
|
+
text_buf[MAX_WORD_SIZE - 1] = '\0';
|
189
191
|
|
190
192
|
DocField **fields = doc->df_arr, *field;
|
191
193
|
for (i = 0; i < dfcnt; i++) {
|
@@ -202,6 +204,10 @@ void dw_invert_doc(DocumentWriter *self, Document *doc)
|
|
202
204
|
if (!field->is_tokenized) { /* un-tokenized field */
|
203
205
|
text = field->data;
|
204
206
|
slen = (int)strlen(text);
|
207
|
+
if (slen >= MAX_WORD_SIZE) {
|
208
|
+
slen = MAX_WORD_SIZE - 1;
|
209
|
+
text = strncpy(text_buf, text, MAX_WORD_SIZE - 1);
|
210
|
+
}
|
205
211
|
if (fi->store_offset) {
|
206
212
|
dw_add_position(self, field_name, text, position,
|
207
213
|
tvoi_create(offset, offset+slen));
|
data/ext/nix_io.c
CHANGED
@@ -40,7 +40,7 @@ int fcount(char *path)
|
|
40
40
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
41
41
|
|
42
42
|
while ((de = readdir(d)) != NULL) {
|
43
|
-
if (
|
43
|
+
if (de->d_name[0] != '.') {
|
44
44
|
cnt++;
|
45
45
|
}
|
46
46
|
}
|
@@ -57,8 +57,7 @@ void dir_each(char *path, void (*func)(char *fname, void *arg), void *arg)
|
|
57
57
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
58
58
|
|
59
59
|
while ((de = readdir(d)) != NULL) {
|
60
|
-
if (
|
61
|
-
&& !file_is_lock(de->d_name)) {
|
60
|
+
if (de->d_name[0] != '.' && !file_is_lock(de->d_name)) {
|
62
61
|
func(de->d_name, arg);
|
63
62
|
}
|
64
63
|
}
|
@@ -101,8 +100,7 @@ void fs_clear(Store *store)
|
|
101
100
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
102
101
|
|
103
102
|
while ((de = readdir(d)) != NULL) {
|
104
|
-
if (
|
105
|
-
&& !file_is_lock(de->d_name)) {
|
103
|
+
if (de->d_name[0] != '.' && !file_is_lock(de->d_name)) {
|
106
104
|
char buf[MAX_FILE_PATH];
|
107
105
|
remove(join_path(buf, store->dir.path, de->d_name));
|
108
106
|
}
|
@@ -124,7 +122,7 @@ void fs_clear_all(Store *store)
|
|
124
122
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
125
123
|
|
126
124
|
while ((de = readdir(d)) != NULL) {
|
127
|
-
if (
|
125
|
+
if (de->d_name[0] != '.') {
|
128
126
|
char buf[MAX_FILE_PATH];
|
129
127
|
remove(join_path(buf, store->dir.path, de->d_name));
|
130
128
|
}
|
data/ext/q_boolean.c
CHANGED
@@ -731,12 +731,6 @@ void csc_init(Scorer *self, bool init_scorers)
|
|
731
731
|
csc->first_time = false;
|
732
732
|
}
|
733
733
|
|
734
|
-
void csc_add_scorer(ConjunctionScorer *csc, Scorer *scorer)
|
735
|
-
{
|
736
|
-
RECAPA(csc, ss_cnt, ss_capa, sub_scorers, Scorer *);
|
737
|
-
csc->sub_scorers[csc->ss_cnt++] = scorer;
|
738
|
-
}
|
739
|
-
|
740
734
|
float csc_score(Scorer *self)
|
741
735
|
{
|
742
736
|
ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
|
data/ext/q_fuzzy.c
CHANGED
@@ -63,14 +63,15 @@ float fuzq_score(FuzzyQuery *fuzq, char *target)
|
|
63
63
|
/* Let's make sure we have enough room in our array to do the distance
|
64
64
|
* calculations. */
|
65
65
|
if (((m+1) * (n+1)) >= fuzq->da_capa) {
|
66
|
-
fuzq->da_capa = (m * (
|
66
|
+
fuzq->da_capa = ((m+1) * (n+1)) * 2;
|
67
67
|
REALLOC_N(fuzq->da, int, fuzq->da_capa);
|
68
|
+
printf("making capa %d -> %d\n", fuzq->da_capa, (int)fuzq->da);
|
68
69
|
d = fuzq->da;
|
69
70
|
}
|
70
71
|
|
71
72
|
/* init matrix d */
|
72
|
-
for (i = 0; i <= n; i++) d[i +
|
73
|
-
for (j = 0; j <= m; j++) d[0 +
|
73
|
+
for (i = 0; i <= n; i++) d[i + n * 0] = i;
|
74
|
+
for (j = 0; j <= m; j++) d[0 + n * j] = j;
|
74
75
|
|
75
76
|
/* start computing edit distance */
|
76
77
|
for (i = 1; i <= n; i++) {
|
@@ -78,11 +79,11 @@ float fuzq_score(FuzzyQuery *fuzq, char *target)
|
|
78
79
|
char s_i = text[i - 1];
|
79
80
|
for (j = 1; j <= m; j++) {
|
80
81
|
if (s_i != target[j-1]) {
|
81
|
-
d[i +
|
82
|
+
d[i + n*j] = min3(d[i-1 + n*j], d[i + n*(j-1)], d[i-1 + n*(j-1)])+1;
|
82
83
|
} else {
|
83
|
-
d[i +
|
84
|
+
d[i + n*j] = min3(d[i-1 + n*j]+1, d[i + n*(j-1)]+1, d[i-1 + n*(j-1)]);
|
84
85
|
}
|
85
|
-
best_pos_ed_dist = min2(best_pos_ed_dist, d[i +
|
86
|
+
best_pos_ed_dist = min2(best_pos_ed_dist, d[i + n*j]);
|
86
87
|
}
|
87
88
|
//printf("(bped = %d, i = %d, md = %d)", best_pos_ed_dist, i, max_distance);
|
88
89
|
|
@@ -104,7 +105,7 @@ float fuzq_score(FuzzyQuery *fuzq, char *target)
|
|
104
105
|
* number of characters in the shorter word. but this was the formula that
|
105
106
|
* was previously used in FuzzyTermEnum, so it has not been changed (even
|
106
107
|
* though min_sim must be greater than 0.0) */
|
107
|
-
return 1.0f - ((float)d[n +
|
108
|
+
return 1.0f - ((float)d[n + n*m] / (float) (fuzq->pre_len + min2(n, m)));
|
108
109
|
}
|
109
110
|
|
110
111
|
/****************************************************************************
|
@@ -252,7 +253,9 @@ void fuzq_destroy(Query *self)
|
|
252
253
|
{
|
253
254
|
FuzzyQuery *fuzq = (FuzzyQuery *)self->data;
|
254
255
|
if (self->destroy_all) term_destroy((Term *)fuzq->term);
|
256
|
+
printf("freeing %d -> %d\n", fuzq->da_capa, (int)fuzq->da);
|
255
257
|
free(fuzq->da);
|
258
|
+
printf("success\n");
|
256
259
|
free(fuzq);
|
257
260
|
q_destroy_i(self);
|
258
261
|
}
|
data/ext/q_multi_phrase.c
CHANGED
@@ -34,7 +34,7 @@ Scorer *mphw_scorer(Weight *self, IndexReader *ir)
|
|
34
34
|
tps[i] = mtdpe_create(ir, mphq->terms[i], mphq->pt_cnt[i]);
|
35
35
|
}
|
36
36
|
if (tps[i] == NULL) {
|
37
|
-
|
37
|
+
/* free everything we just created and return NULL */
|
38
38
|
int j;
|
39
39
|
for (j = 0; j < i; j++) {
|
40
40
|
tps[i]->close(tps[i]);
|
@@ -44,7 +44,7 @@ Scorer *mphw_scorer(Weight *self, IndexReader *ir)
|
|
44
44
|
}
|
45
45
|
}
|
46
46
|
|
47
|
-
if (mphq->slop == 0) {
|
47
|
+
if (mphq->slop == 0) { /* optimize exact case */
|
48
48
|
phsc = exact_phrase_scorer_create(self, tps, mphq->positions, mphq->t_cnt,
|
49
49
|
self->similarity,
|
50
50
|
ir->get_norms(ir, mphq->field));
|
data/ext/q_term.c
CHANGED
@@ -212,12 +212,12 @@ bool tsc_next(Scorer *self)
|
|
212
212
|
|
213
213
|
ts->pointer++;
|
214
214
|
if (ts->pointer >= ts->pointer_max) {
|
215
|
-
|
215
|
+
/* refill buffer */
|
216
216
|
ts->pointer_max = ts->tde->read(ts->tde, ts->docs, ts->freqs, TDE_READ_SIZE);
|
217
217
|
if (ts->pointer_max != 0) {
|
218
218
|
ts->pointer = 0;
|
219
219
|
} else {
|
220
|
-
ts->tde->close(ts->tde);
|
220
|
+
ts->tde->close(ts->tde); /* close stream */
|
221
221
|
ts->tde = NULL;
|
222
222
|
return false;
|
223
223
|
}
|
data/ext/q_wildcard.c
CHANGED
@@ -77,16 +77,17 @@ Query *wcq_rewrite(Query *self, IndexReader *ir)
|
|
77
77
|
Term *term = (Term *)self->data;
|
78
78
|
char *text = term->text;
|
79
79
|
char *field = term->field;
|
80
|
-
char *first_star =
|
81
|
-
char *first_ques =
|
82
|
-
|
80
|
+
char *first_star = strchr(text, WILD_STRING);
|
81
|
+
char *first_ques = strchr(text, WILD_CHAR);
|
82
|
+
|
83
|
+
if (first_star == NULL && first_ques == NULL) {
|
83
84
|
q = tq_create(term_clone(term));
|
84
85
|
} else {
|
85
86
|
TermEnum *te;
|
86
87
|
Term prefix_term;
|
87
88
|
char *prefix = NULL;
|
88
89
|
|
89
|
-
char *pattern = (first_ques && first_star > first_ques)
|
90
|
+
char *pattern = (first_ques && (!first_star || (first_star > first_ques)))
|
90
91
|
? first_ques : first_star;
|
91
92
|
|
92
93
|
int prefix_len = (int)(pattern - text);
|
data/ext/search.c
CHANGED
@@ -562,8 +562,7 @@ static void s_search_each_w(Searcher *self, Weight *weight, Filter *filter,
|
|
562
562
|
static void s_search_each(Searcher *self, Query *query, Filter *filter,
|
563
563
|
void (*fn)(Searcher *, int, float, void *), void *arg)
|
564
564
|
{
|
565
|
-
Weight *weight;
|
566
|
-
weight = q_weight(query, self);
|
565
|
+
Weight *weight = q_weight(query, self);
|
567
566
|
s_search_each_w(self, weight, filter, fn, arg);
|
568
567
|
weight->destroy(weight);
|
569
568
|
}
|
@@ -602,8 +601,9 @@ static Similarity *s_get_similarity(Searcher *self)
|
|
602
601
|
|
603
602
|
static void s_close(Searcher *self)
|
604
603
|
{
|
605
|
-
if (self->ir && self->close_ir)
|
604
|
+
if (self->ir && self->close_ir) {
|
606
605
|
ir_close(self->ir);
|
606
|
+
}
|
607
607
|
free(self);
|
608
608
|
}
|
609
609
|
|
@@ -865,7 +865,6 @@ static void msea_search_each(Searcher *self, Query *query, Filter *filter,
|
|
865
865
|
|
866
866
|
struct MultiSearchArg {
|
867
867
|
int total_hits, max_size;
|
868
|
-
float min_score;
|
869
868
|
PriorityQueue *hq;
|
870
869
|
void (*hq_insert)(PriorityQueue *pq, Hit *hit);
|
871
870
|
};
|
@@ -919,7 +918,6 @@ static TopDocs *msea_search(Searcher *self, Query *query, int first_doc,
|
|
919
918
|
ms_arg.hq = hq;
|
920
919
|
ms_arg.total_hits = 0;
|
921
920
|
ms_arg.max_size = max_size;
|
922
|
-
ms_arg.min_score = 0.0;
|
923
921
|
ms_arg.hq_insert = hq_insert;
|
924
922
|
|
925
923
|
msea_search_each_w(self, weight, filter, msea_search_i, &ms_arg);
|
data/ext/search.h
CHANGED
@@ -10,7 +10,7 @@ typedef struct Scorer Scorer;
|
|
10
10
|
#include "similarity.h"
|
11
11
|
|
12
12
|
#define term_set_create() \
|
13
|
-
|
13
|
+
hs_create((hash_ft)&term_hash, (eq_ft)&term_eq, (free_ft)&term_destroy)
|
14
14
|
|
15
15
|
/***************************************************************************
|
16
16
|
*
|
@@ -19,12 +19,13 @@ typedef struct Scorer Scorer;
|
|
19
19
|
***************************************************************************/
|
20
20
|
|
21
21
|
#define EXPLANATION_DETAILS_START_SIZE 4
|
22
|
-
typedef struct Explanation
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
22
|
+
typedef struct Explanation
|
23
|
+
{
|
24
|
+
float value;
|
25
|
+
char *description;
|
26
|
+
struct Explanation **details;
|
27
|
+
int dcnt;
|
28
|
+
int dcapa;
|
28
29
|
} Explanation;
|
29
30
|
|
30
31
|
extern Explanation *expl_create(float value, char *description);
|
@@ -39,23 +40,25 @@ extern char *expl_to_html(Explanation *self);
|
|
39
40
|
*
|
40
41
|
***************************************************************************/
|
41
42
|
|
42
|
-
typedef struct Hit
|
43
|
-
|
44
|
-
|
43
|
+
typedef struct Hit
|
44
|
+
{
|
45
|
+
int doc;
|
46
|
+
float score;
|
45
47
|
} Hit;
|
46
48
|
|
47
49
|
extern bool hit_less_than(void *p1, void *p2);
|
48
|
-
|
50
|
+
|
49
51
|
/***************************************************************************
|
50
52
|
*
|
51
53
|
* TopDocs
|
52
54
|
*
|
53
55
|
***************************************************************************/
|
54
56
|
|
55
|
-
typedef struct TopDocs
|
56
|
-
|
57
|
-
|
58
|
-
|
57
|
+
typedef struct TopDocs
|
58
|
+
{
|
59
|
+
int total_hits;
|
60
|
+
int size;
|
61
|
+
Hit **hits;
|
59
62
|
} TopDocs;
|
60
63
|
|
61
64
|
extern TopDocs *td_create(int total_hits, int size, Hit **hits);
|
@@ -68,15 +71,16 @@ extern char *td_to_s(TopDocs *td);
|
|
68
71
|
*
|
69
72
|
***************************************************************************/
|
70
73
|
|
71
|
-
typedef struct Filter
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
74
|
+
typedef struct Filter
|
75
|
+
{
|
76
|
+
void *data;
|
77
|
+
char *name;
|
78
|
+
HshTable *cache;
|
79
|
+
BitVector *(*get_bv)(struct Filter *self, IndexReader *ir);
|
80
|
+
char *(*to_s)(struct Filter *self);
|
81
|
+
uint (*hash)(struct Filter *self);
|
82
|
+
int (*eq)(struct Filter *self, struct Filter *o);
|
83
|
+
void (*destroy)(struct Filter *self);
|
80
84
|
} Filter;
|
81
85
|
|
82
86
|
extern Filter *filt_create(char *name);
|
@@ -93,7 +97,7 @@ extern int filt_eq(Filter *self, Filter *o);
|
|
93
97
|
***************************************************************************/
|
94
98
|
|
95
99
|
extern Filter *rfilt_create(const char *field, char *lower_term,
|
96
|
-
|
100
|
+
char *upper_term, bool include_lower, bool include_upper);
|
97
101
|
|
98
102
|
/***************************************************************************
|
99
103
|
*
|
@@ -101,8 +105,9 @@ extern Filter *rfilt_create(const char *field, char *lower_term,
|
|
101
105
|
*
|
102
106
|
***************************************************************************/
|
103
107
|
|
104
|
-
typedef struct QueryFilter
|
105
|
-
|
108
|
+
typedef struct QueryFilter
|
109
|
+
{
|
110
|
+
Query *query;
|
106
111
|
} QueryFilter;
|
107
112
|
|
108
113
|
extern Filter *qfilt_create(Query *query);
|
@@ -113,22 +118,23 @@ extern Filter *qfilt_create(Query *query);
|
|
113
118
|
*
|
114
119
|
***************************************************************************/
|
115
120
|
|
116
|
-
struct Weight
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
121
|
+
struct Weight
|
122
|
+
{
|
123
|
+
void *data;
|
124
|
+
float value;
|
125
|
+
float qweight;
|
126
|
+
float qnorm;
|
127
|
+
float idf;
|
128
|
+
Query *query;
|
129
|
+
Similarity *similarity;
|
130
|
+
Query *(*get_query)(Weight *self);
|
131
|
+
float (*get_value)(Weight *self);
|
132
|
+
void (*normalize)(Weight *self, float normalization_factor);
|
133
|
+
Scorer *(*scorer)(Weight *self, IndexReader *ir);
|
134
|
+
Explanation *(*explain)(Weight *self, IndexReader *ir, int doc_num);
|
135
|
+
float (*sum_of_squared_weights)(Weight *self);
|
136
|
+
char *(*to_s)(Weight *self);
|
137
|
+
void (*destroy)(Weight *self);
|
132
138
|
};
|
133
139
|
|
134
140
|
extern Weight *w_create(Query *query);
|
@@ -154,8 +160,8 @@ extern Weight *tw_create(Query *query, Searcher *searcher);
|
|
154
160
|
***************************************************************************/
|
155
161
|
|
156
162
|
typedef struct BooleanWeight {
|
157
|
-
|
158
|
-
|
163
|
+
Weight **weights;
|
164
|
+
int w_cnt;
|
159
165
|
} BooleanWeight;
|
160
166
|
|
161
167
|
extern Weight *bw_create(Query *query, Searcher *searcher);
|
@@ -199,39 +205,40 @@ extern Weight *spanw_create(Query *query, Searcher *searcher);
|
|
199
205
|
***************************************************************************/
|
200
206
|
|
201
207
|
enum QUERY_TYPE {
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
208
|
+
TERM_QUERY,
|
209
|
+
BOOLEAN_QUERY,
|
210
|
+
PHRASE_QUERY,
|
211
|
+
MULTI_PHRASE_QUERY,
|
212
|
+
CONSTANT_QUERY,
|
213
|
+
FILTERED_QUERY,
|
214
|
+
MATCH_ALL_QUERY,
|
215
|
+
RANGE_QUERY,
|
216
|
+
WILD_CARD_QUERY,
|
217
|
+
FUZZY_QUERY,
|
218
|
+
PREFIX_QUERY,
|
219
|
+
SPAN_TERM_QUERY,
|
220
|
+
SPAN_FIRST_QUERY,
|
221
|
+
SPAN_OR_QUERY,
|
222
|
+
SPAN_NOT_QUERY,
|
223
|
+
SPAN_NEAR_QUERY
|
218
224
|
};
|
219
225
|
|
220
|
-
struct Query
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
226
|
+
struct Query
|
227
|
+
{
|
228
|
+
uchar type;
|
229
|
+
int ref_cnt;
|
230
|
+
void *data;
|
231
|
+
float boost;
|
232
|
+
Weight *weight;
|
233
|
+
Query *(*rewrite)(Query *self, IndexReader *ir);
|
234
|
+
void (*extract_terms)(Query *self, HashSet *terms);
|
235
|
+
Similarity *(*get_similarity)(Query *self, Searcher *searcher);
|
236
|
+
char *(*to_s)(Query *self, char *field);
|
237
|
+
uint (*hash)(Query *self);
|
238
|
+
int (*eq)(Query *self, Query *o);
|
239
|
+
void (*destroy_i)(Query *self);
|
240
|
+
Weight *(*create_weight_i)(Query *self, Searcher *searcher);
|
241
|
+
bool destroy_all : 1;
|
235
242
|
};
|
236
243
|
|
237
244
|
/* Internal Query Functions */
|
@@ -253,8 +260,9 @@ extern int q_eq(Query *self, Query *o);
|
|
253
260
|
*
|
254
261
|
***************************************************************************/
|
255
262
|
|
256
|
-
typedef struct TermQuery
|
257
|
-
|
263
|
+
typedef struct TermQuery
|
264
|
+
{
|
265
|
+
Term *term;
|
258
266
|
} TermQuery;
|
259
267
|
|
260
268
|
extern Query *tq_create(Term *term);
|
@@ -269,19 +277,20 @@ extern Query *tq_create(Term *term);
|
|
269
277
|
* BooleanClause
|
270
278
|
***************************************************************************/
|
271
279
|
|
272
|
-
enum BC_TYPE
|
273
|
-
|
274
|
-
|
275
|
-
|
280
|
+
enum BC_TYPE
|
281
|
+
{
|
282
|
+
BC_SHOULD,
|
283
|
+
BC_MUST,
|
284
|
+
BC_MUST_NOT
|
276
285
|
};
|
277
286
|
|
278
287
|
typedef struct BooleanClause {
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
288
|
+
int ref_cnt;
|
289
|
+
Query *query;
|
290
|
+
Query *rewritten;
|
291
|
+
unsigned int occur : 4;
|
292
|
+
bool is_prohibited : 1;
|
293
|
+
bool is_required : 1;
|
285
294
|
} BooleanClause;
|
286
295
|
|
287
296
|
extern BooleanClause *bc_create(Query *query, unsigned int occur);
|
@@ -296,19 +305,20 @@ extern void bc_set_occur(BooleanClause *self, unsigned int occur);
|
|
296
305
|
#define BOOLEAN_CLAUSES_START_CAPA 4
|
297
306
|
#define QUERY_STRING_START_SIZE 64
|
298
307
|
|
299
|
-
typedef struct BooleanQuery
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
308
|
+
typedef struct BooleanQuery
|
309
|
+
{
|
310
|
+
bool coord_disabled;
|
311
|
+
int max_clause_cnt;
|
312
|
+
int clause_cnt;
|
313
|
+
int clause_capa;
|
314
|
+
float original_boost;
|
315
|
+
BooleanClause **clauses;
|
316
|
+
Similarity *similarity;
|
307
317
|
} BooleanQuery;
|
308
318
|
|
309
319
|
extern Query *bq_create(bool coord_disabled);
|
310
320
|
extern BooleanClause *bq_add_query(Query *self, Query *sub_query,
|
311
|
-
|
321
|
+
unsigned int occur);
|
312
322
|
extern BooleanClause *bq_add_clause(Query *self, BooleanClause *bc);
|
313
323
|
|
314
324
|
/***************************************************************************
|
@@ -318,13 +328,14 @@ extern BooleanClause *bq_add_clause(Query *self, BooleanClause *bc);
|
|
318
328
|
***************************************************************************/
|
319
329
|
|
320
330
|
#define PHQ_INIT_CAPA 4
|
321
|
-
typedef struct PhraseQuery
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
331
|
+
typedef struct PhraseQuery
|
332
|
+
{
|
333
|
+
int slop;
|
334
|
+
Term **terms;
|
335
|
+
int *positions;
|
336
|
+
int t_cnt;
|
337
|
+
int t_capa;
|
338
|
+
char *field;
|
328
339
|
} PhraseQuery;
|
329
340
|
|
330
341
|
extern Query *phq_create();
|
@@ -336,14 +347,15 @@ extern void phq_add_term(Query *self, Term *term, int pos_inc);
|
|
336
347
|
*
|
337
348
|
***************************************************************************/
|
338
349
|
|
339
|
-
typedef struct MultiPhraseQuery
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
350
|
+
typedef struct MultiPhraseQuery
|
351
|
+
{
|
352
|
+
int slop;
|
353
|
+
Term ***terms;
|
354
|
+
int *positions;
|
355
|
+
int *pt_cnt;
|
356
|
+
int t_cnt;
|
357
|
+
int t_capa;
|
358
|
+
char *field;
|
347
359
|
} MultiPhraseQuery;
|
348
360
|
|
349
361
|
extern Query *mphq_create();
|
@@ -379,16 +391,17 @@ extern bool wc_match(char *pattern, char *text);
|
|
379
391
|
#define DEF_PRE_LEN 0
|
380
392
|
#define TYPICAL_LONGEST_WORD 20
|
381
393
|
|
382
|
-
typedef struct FuzzyQuery
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
394
|
+
typedef struct FuzzyQuery
|
395
|
+
{
|
396
|
+
Term *term;
|
397
|
+
char *text; /* term text after prefix */
|
398
|
+
int text_len;
|
399
|
+
int pre_len;
|
400
|
+
float min_sim;
|
401
|
+
float scale_factor;
|
402
|
+
int max_distances[TYPICAL_LONGEST_WORD];
|
403
|
+
int *da;
|
404
|
+
int da_capa;
|
392
405
|
} FuzzyQuery;
|
393
406
|
|
394
407
|
extern Query *fuzq_create(Term *term);
|
@@ -408,6 +421,12 @@ extern Query *csq_create(Filter *filter);
|
|
408
421
|
*
|
409
422
|
***************************************************************************/
|
410
423
|
|
424
|
+
typedef struct FilteredQuery
|
425
|
+
{
|
426
|
+
Query *query;
|
427
|
+
Filter *filter;
|
428
|
+
} FilteredQuery;
|
429
|
+
|
411
430
|
extern Query *fq_create(Query *query, Filter *filter);
|
412
431
|
|
413
432
|
/***************************************************************************
|
@@ -424,31 +443,21 @@ extern Query *maq_create();
|
|
424
443
|
*
|
425
444
|
***************************************************************************/
|
426
445
|
|
427
|
-
typedef struct Range
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
446
|
+
typedef struct Range
|
447
|
+
{
|
448
|
+
char *field;
|
449
|
+
char *lower_term;
|
450
|
+
char *upper_term;
|
451
|
+
bool include_lower : 1;
|
452
|
+
bool include_upper : 1;
|
433
453
|
} Range;
|
434
454
|
|
435
455
|
extern Query *rq_create(const char *field, char *lower_term,
|
436
|
-
|
456
|
+
char *upper_term, bool include_lower, bool include_upper);
|
437
457
|
extern Query *rq_create_less(const char *field, char *upper_term,
|
438
|
-
|
458
|
+
bool include_upper);
|
439
459
|
extern Query *rq_create_more(const char *field, char *lower_term,
|
440
|
-
|
441
|
-
|
442
|
-
/***************************************************************************
|
443
|
-
*
|
444
|
-
* FilteredQuery
|
445
|
-
*
|
446
|
-
***************************************************************************/
|
447
|
-
|
448
|
-
typedef struct FilteredQuery {
|
449
|
-
Query *query;
|
450
|
-
Filter *filter;
|
451
|
-
} FilteredQuery;
|
460
|
+
bool include_lower);
|
452
461
|
|
453
462
|
/***************************************************************************
|
454
463
|
*
|
@@ -461,16 +470,17 @@ typedef struct FilteredQuery {
|
|
461
470
|
***************************************************************************/
|
462
471
|
|
463
472
|
typedef struct SpanEnum SpanEnum;
|
464
|
-
struct SpanEnum
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
473
|
+
struct SpanEnum
|
474
|
+
{
|
475
|
+
void *data;
|
476
|
+
Query *query;
|
477
|
+
bool (*next)(SpanEnum *self);
|
478
|
+
bool (*skip_to)(SpanEnum *self, int target_doc);
|
479
|
+
int (*doc)(SpanEnum *self);
|
480
|
+
int (*start)(SpanEnum *self);
|
481
|
+
int (*end)(SpanEnum *self);
|
482
|
+
char *(*to_s)(SpanEnum *self);
|
483
|
+
void (*destroy)(SpanEnum *self);
|
474
484
|
};
|
475
485
|
|
476
486
|
/***************************************************************************
|
@@ -478,12 +488,13 @@ struct SpanEnum {
|
|
478
488
|
***************************************************************************/
|
479
489
|
|
480
490
|
typedef struct SpanTermEnum SpanTermEnum;
|
481
|
-
struct SpanTermEnum
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
491
|
+
struct SpanTermEnum
|
492
|
+
{
|
493
|
+
TermDocEnum *positions;
|
494
|
+
int position;
|
495
|
+
int doc;
|
496
|
+
int count;
|
497
|
+
int freq;
|
487
498
|
};
|
488
499
|
|
489
500
|
extern SpanEnum *spante_create(Query *query, IndexReader *ir);
|
@@ -498,11 +509,12 @@ extern SpanEnum *spanfe_create(Query *query, IndexReader *ir);
|
|
498
509
|
* SpanOrEnum
|
499
510
|
***************************************************************************/
|
500
511
|
|
501
|
-
typedef struct SpanOrEnum
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
512
|
+
typedef struct SpanOrEnum
|
513
|
+
{
|
514
|
+
PriorityQueue *queue;
|
515
|
+
SpanEnum **span_enums;
|
516
|
+
int s_cnt;
|
517
|
+
bool first_time;
|
506
518
|
} SpanOrEnum;
|
507
519
|
|
508
520
|
extern SpanEnum *spanoe_create(Query *query, IndexReader *ir);
|
@@ -511,11 +523,12 @@ extern SpanEnum *spanoe_create(Query *query, IndexReader *ir);
|
|
511
523
|
* SpanEnumCell
|
512
524
|
***************************************************************************/
|
513
525
|
|
514
|
-
typedef struct SpanEnumCell
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
526
|
+
typedef struct SpanEnumCell
|
527
|
+
{
|
528
|
+
SpanEnum *parent;
|
529
|
+
SpanEnum *se;
|
530
|
+
int index;
|
531
|
+
int length;
|
519
532
|
} SpanEnumCell;
|
520
533
|
|
521
534
|
extern SpanEnum *spanec_create(Query *parent, Query *child, int index);
|
@@ -524,16 +537,17 @@ extern SpanEnum *spanec_create(Query *parent, Query *child, int index);
|
|
524
537
|
* SpanNearEnum
|
525
538
|
***************************************************************************/
|
526
539
|
|
527
|
-
typedef struct SpanNearEnum
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
540
|
+
typedef struct SpanNearEnum
|
541
|
+
{
|
542
|
+
SpanEnum **span_enums;
|
543
|
+
int s_cnt;
|
544
|
+
int slop;
|
545
|
+
int current;
|
546
|
+
bool first_time : 1;
|
547
|
+
bool in_order : 1;
|
548
|
+
int doc;
|
549
|
+
int start;
|
550
|
+
int end;
|
537
551
|
} SpanNearEnum;
|
538
552
|
|
539
553
|
extern SpanEnum *spanne_create(Query *query, IndexReader *ir);
|
@@ -542,11 +556,12 @@ extern SpanEnum *spanne_create(Query *query, IndexReader *ir);
|
|
542
556
|
* SpanNotEnum
|
543
557
|
***************************************************************************/
|
544
558
|
|
545
|
-
typedef struct SpanNotEnum
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
559
|
+
typedef struct SpanNotEnum
|
560
|
+
{
|
561
|
+
SpanEnum *inc;
|
562
|
+
SpanEnum *exc;
|
563
|
+
bool more_inc : 1;
|
564
|
+
bool more_exc : 1;
|
550
565
|
} SpanNotEnum;
|
551
566
|
|
552
567
|
extern SpanEnum *spanxe_create(Query *query, IndexReader *ir);
|
@@ -556,11 +571,12 @@ extern SpanEnum *spanxe_create(Query *query, IndexReader *ir);
|
|
556
571
|
***************************************************************************/
|
557
572
|
|
558
573
|
typedef struct SpanQuery SpanQuery;
|
559
|
-
struct SpanQuery
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
574
|
+
struct SpanQuery
|
575
|
+
{
|
576
|
+
void *data;
|
577
|
+
char *field;
|
578
|
+
SpanEnum *(*get_spans)(Query *self, IndexReader *ir);
|
579
|
+
HashSet *(*get_terms)(Query *self);
|
564
580
|
};
|
565
581
|
|
566
582
|
/***************************************************************************
|
@@ -573,9 +589,10 @@ extern Query *spantq_create(Term *term);
|
|
573
589
|
* SpanFirstQuery
|
574
590
|
***************************************************************************/
|
575
591
|
|
576
|
-
typedef struct SpanFirstQuery
|
577
|
-
|
578
|
-
|
592
|
+
typedef struct SpanFirstQuery
|
593
|
+
{
|
594
|
+
int end;
|
595
|
+
Query *match;
|
579
596
|
} SpanFirstQuery;
|
580
597
|
|
581
598
|
extern Query *spanfq_create(Query *match, int end);
|
@@ -584,9 +601,10 @@ extern Query *spanfq_create(Query *match, int end);
|
|
584
601
|
* SpanOrQuery
|
585
602
|
***************************************************************************/
|
586
603
|
|
587
|
-
typedef struct SpanOrQuery
|
588
|
-
|
589
|
-
|
604
|
+
typedef struct SpanOrQuery
|
605
|
+
{
|
606
|
+
Query **clauses;
|
607
|
+
int c_cnt;
|
590
608
|
} SpanOrQuery;
|
591
609
|
|
592
610
|
extern Query *spanoq_create(Query **clauses, int c_cnt);
|
@@ -595,24 +613,26 @@ extern Query *spanoq_create(Query **clauses, int c_cnt);
|
|
595
613
|
* SpanNearQuery
|
596
614
|
***************************************************************************/
|
597
615
|
|
598
|
-
typedef struct SpanNearQuery
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
616
|
+
typedef struct SpanNearQuery
|
617
|
+
{
|
618
|
+
Query **clauses;
|
619
|
+
int c_cnt;
|
620
|
+
int slop;
|
621
|
+
bool in_order;
|
603
622
|
} SpanNearQuery;
|
604
623
|
|
605
624
|
extern Query *spannq_create(Query **clauses, int c_cnt, int slop,
|
606
|
-
|
625
|
+
bool in_order);
|
607
626
|
|
608
627
|
|
609
628
|
/***************************************************************************
|
610
629
|
* SpanNotQuery
|
611
630
|
***************************************************************************/
|
612
631
|
|
613
|
-
typedef struct SpanNotQuery
|
614
|
-
|
615
|
-
|
632
|
+
typedef struct SpanNotQuery
|
633
|
+
{
|
634
|
+
Query *inc;
|
635
|
+
Query *exc;
|
616
636
|
} SpanNotQuery;
|
617
637
|
|
618
638
|
extern Query *spanxq_create(Query *inc, Query *exc);
|
@@ -625,15 +645,16 @@ extern Query *spanxq_create(Query *inc, Query *exc);
|
|
625
645
|
|
626
646
|
#define SCORER_NULLIFY(mscorer) mscorer->destroy(mscorer); mscorer = NULL
|
627
647
|
|
628
|
-
struct Scorer
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
648
|
+
struct Scorer
|
649
|
+
{
|
650
|
+
void *data;
|
651
|
+
Similarity *similarity;
|
652
|
+
int doc;
|
653
|
+
float (*score)(Scorer *self);
|
654
|
+
bool (*next)(Scorer *self);
|
655
|
+
bool (*skip_to)(Scorer *self, int doc_num);
|
656
|
+
Explanation *(*explain)(Scorer *self, int doc_num);
|
657
|
+
void (*destroy)(Scorer *self);
|
637
658
|
};
|
638
659
|
|
639
660
|
/* Internal Scorer Function */
|
@@ -653,16 +674,17 @@ extern int scorer_doc_cmp(const void *p1, const void *p2);
|
|
653
674
|
#define SCORE_CACHE_SIZE 32
|
654
675
|
#define TDE_READ_SIZE 32
|
655
676
|
|
656
|
-
typedef struct TermScorer
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
677
|
+
typedef struct TermScorer
|
678
|
+
{
|
679
|
+
int docs[TDE_READ_SIZE];
|
680
|
+
int freqs[TDE_READ_SIZE];
|
681
|
+
int pointer;
|
682
|
+
int pointer_max;
|
683
|
+
float score_cache[SCORE_CACHE_SIZE];
|
684
|
+
Weight *weight;
|
685
|
+
TermDocEnum *tde;
|
686
|
+
uchar *norms;
|
687
|
+
float weight_value;
|
666
688
|
} TermScorer;
|
667
689
|
|
668
690
|
extern Scorer *tsc_create(Weight *weight, TermDocEnum *tde, uchar *norms);
|
@@ -677,89 +699,96 @@ extern Scorer *tsc_create(Weight *weight, TermDocEnum *tde, uchar *norms);
|
|
677
699
|
* Coordinator
|
678
700
|
***************************************************************************/
|
679
701
|
|
680
|
-
typedef struct Coordinator
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
702
|
+
typedef struct Coordinator
|
703
|
+
{
|
704
|
+
int max_coord;
|
705
|
+
float *coord_factors;
|
706
|
+
Similarity *similarity;
|
707
|
+
int num_matches;
|
685
708
|
} Coordinator;
|
686
709
|
|
687
710
|
/***************************************************************************
|
688
711
|
* DisjunctionSumScorer
|
689
712
|
***************************************************************************/
|
690
713
|
|
691
|
-
typedef struct DisjunctionSumScorer
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
714
|
+
typedef struct DisjunctionSumScorer
|
715
|
+
{
|
716
|
+
float cum_score;
|
717
|
+
int num_matches;
|
718
|
+
int min_num_matches;
|
719
|
+
Scorer **sub_scorers;
|
720
|
+
int ss_cnt;
|
721
|
+
PriorityQueue *scorer_queue;
|
722
|
+
Coordinator *coordinator;
|
699
723
|
} DisjunctionSumScorer;
|
700
724
|
|
701
725
|
/***************************************************************************
|
702
726
|
* ConjunctionScorer
|
703
727
|
***************************************************************************/
|
704
728
|
|
705
|
-
typedef struct ConjunctionScorer
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
729
|
+
typedef struct ConjunctionScorer
|
730
|
+
{
|
731
|
+
bool first_time : 1;
|
732
|
+
bool more : 1;
|
733
|
+
float coord;
|
734
|
+
int ss_cnt;
|
735
|
+
int ss_capa;
|
736
|
+
Scorer **sub_scorers;
|
737
|
+
int first;
|
738
|
+
int last;
|
739
|
+
Coordinator *coordinator;
|
740
|
+
int last_scored_doc;
|
716
741
|
} ConjunctionScorer;
|
717
742
|
|
718
743
|
/***************************************************************************
|
719
744
|
* SingleMatchScorer
|
720
745
|
***************************************************************************/
|
721
746
|
|
722
|
-
typedef struct SingleMatchScorer
|
723
|
-
|
724
|
-
|
747
|
+
typedef struct SingleMatchScorer
|
748
|
+
{
|
749
|
+
Coordinator *coordinator;
|
750
|
+
Scorer *scorer;
|
725
751
|
} SingleMatchScorer;
|
726
752
|
|
727
753
|
/***************************************************************************
|
728
754
|
* ReqOptSumScorer
|
729
755
|
***************************************************************************/
|
730
756
|
|
731
|
-
typedef struct ReqOptSumScorer
|
732
|
-
|
733
|
-
|
734
|
-
|
757
|
+
typedef struct ReqOptSumScorer
|
758
|
+
{
|
759
|
+
Scorer *req_scorer;
|
760
|
+
Scorer *opt_scorer;
|
761
|
+
bool first_time_opt;
|
735
762
|
} ReqOptSumScorer;
|
736
763
|
|
737
764
|
/***************************************************************************
|
738
765
|
* ReqExclScorer
|
739
766
|
***************************************************************************/
|
740
767
|
|
741
|
-
typedef struct ReqExclScorer
|
742
|
-
|
743
|
-
|
744
|
-
|
768
|
+
typedef struct ReqExclScorer
|
769
|
+
{
|
770
|
+
Scorer *req_scorer;
|
771
|
+
Scorer *excl_scorer;
|
772
|
+
bool first_time;
|
745
773
|
} ReqExclScorer;
|
746
774
|
|
747
775
|
/***************************************************************************
|
748
776
|
* BooleanScorer
|
749
777
|
***************************************************************************/
|
750
778
|
|
751
|
-
typedef struct BooleanScorer
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
779
|
+
typedef struct BooleanScorer
|
780
|
+
{
|
781
|
+
Scorer **required_scorers;
|
782
|
+
int rs_cnt;
|
783
|
+
int rs_capa;
|
784
|
+
Scorer **optional_scorers;
|
785
|
+
int os_cnt;
|
786
|
+
int os_capa;
|
787
|
+
Scorer **prohibited_scorers;
|
788
|
+
int ps_cnt;
|
789
|
+
int ps_capa;
|
790
|
+
Scorer *counting_sum_scorer;
|
791
|
+
Coordinator *coordinator;
|
763
792
|
} BooleanScorer;
|
764
793
|
|
765
794
|
extern Scorer *bsc_create(Similarity *similarity);
|
@@ -774,12 +803,13 @@ extern void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur);
|
|
774
803
|
/***************************************************************************
|
775
804
|
* PhrasePosition
|
776
805
|
***************************************************************************/
|
777
|
-
typedef struct PhrasePosition
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
806
|
+
typedef struct PhrasePosition
|
807
|
+
{
|
808
|
+
TermDocEnum *tpe;
|
809
|
+
int offset;
|
810
|
+
int count;
|
811
|
+
int doc;
|
812
|
+
int position;
|
783
813
|
} PhrasePosition;
|
784
814
|
|
785
815
|
extern PhrasePosition *pp_create(TermDocEnum *tpe, int offset);
|
@@ -788,39 +818,40 @@ extern PhrasePosition *pp_create(TermDocEnum *tpe, int offset);
|
|
788
818
|
* PhraseScorer
|
789
819
|
***************************************************************************/
|
790
820
|
|
791
|
-
typedef struct PhraseScorer
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
821
|
+
typedef struct PhraseScorer
|
822
|
+
{
|
823
|
+
float freq;
|
824
|
+
uchar *norms;
|
825
|
+
float value;
|
826
|
+
Weight *weight;
|
827
|
+
bool first_time : 1;
|
828
|
+
bool more : 1;
|
829
|
+
int pp_first;
|
830
|
+
int pp_last;
|
831
|
+
int pp_cnt;
|
832
|
+
PhrasePosition **phrase_pos;
|
833
|
+
float (*phrase_freq)(Scorer *self);
|
834
|
+
int slop;
|
804
835
|
} PhraseScorer;
|
805
836
|
|
806
837
|
extern Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
|
807
|
-
|
838
|
+
int *positions, int t_cnt, Similarity *similarity, uchar *norms);
|
808
839
|
|
809
840
|
/***************************************************************************
|
810
841
|
* ExactPhraseScorer
|
811
842
|
***************************************************************************/
|
812
843
|
|
813
844
|
extern Scorer *exact_phrase_scorer_create(Weight *weight,
|
814
|
-
|
815
|
-
|
845
|
+
TermDocEnum **term_pos_enum, int *positions, int t_cnt,
|
846
|
+
Similarity *similarity, uchar *norms);
|
816
847
|
|
817
848
|
/***************************************************************************
|
818
849
|
* SloppyPhraseScorer
|
819
850
|
***************************************************************************/
|
820
851
|
|
821
852
|
extern Scorer *sloppy_phrase_scorer_create(Weight *weight,
|
822
|
-
|
823
|
-
|
853
|
+
TermDocEnum **term_pos_enum, int *positions, int t_cnt,
|
854
|
+
Similarity *similarity, int slop, uchar *norms);
|
824
855
|
|
825
856
|
/***************************************************************************
|
826
857
|
*
|
@@ -828,9 +859,10 @@ extern Scorer *sloppy_phrase_scorer_create(Weight *weight,
|
|
828
859
|
*
|
829
860
|
***************************************************************************/
|
830
861
|
|
831
|
-
typedef struct ConstantScoreScorer
|
832
|
-
|
833
|
-
|
862
|
+
typedef struct ConstantScoreScorer
|
863
|
+
{
|
864
|
+
BitVector *bv;
|
865
|
+
float score;
|
834
866
|
} ConstantScoreScorer;
|
835
867
|
|
836
868
|
extern Scorer *cssc_create(Weight *weight, IndexReader *ir);
|
@@ -842,10 +874,11 @@ extern Scorer *cssc_create(Weight *weight, IndexReader *ir);
|
|
842
874
|
*
|
843
875
|
***************************************************************************/
|
844
876
|
|
845
|
-
typedef struct MatchAllScorer
|
846
|
-
|
847
|
-
|
848
|
-
|
877
|
+
typedef struct MatchAllScorer
|
878
|
+
{
|
879
|
+
IndexReader *ir;
|
880
|
+
int max_doc;
|
881
|
+
float score;
|
849
882
|
} MatchAllScorer;
|
850
883
|
|
851
884
|
extern Scorer *masc_create(Weight *weight, IndexReader *ir);
|
@@ -857,16 +890,17 @@ extern Scorer *masc_create(Weight *weight, IndexReader *ir);
|
|
857
890
|
*
|
858
891
|
***************************************************************************/
|
859
892
|
|
860
|
-
typedef struct SpanScorer
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
893
|
+
typedef struct SpanScorer
|
894
|
+
{
|
895
|
+
bool first_time : 1;
|
896
|
+
bool more : 1;
|
897
|
+
IndexReader *ir;
|
898
|
+
SpanEnum *spans;
|
899
|
+
Similarity *sim;
|
900
|
+
uchar *norms;
|
901
|
+
Weight *weight;
|
902
|
+
float value;
|
903
|
+
float freq;
|
870
904
|
} SpanScorer;
|
871
905
|
|
872
906
|
extern Scorer *spansc_create(Weight *weight, IndexReader *ir);
|
@@ -878,28 +912,29 @@ extern Scorer *spansc_create(Weight *weight, IndexReader *ir);
|
|
878
912
|
***************************************************************************/
|
879
913
|
|
880
914
|
enum SORT_TYPE {
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
915
|
+
SORT_TYPE_SCORE,
|
916
|
+
SORT_TYPE_DOC,
|
917
|
+
SORT_TYPE_INTEGER,
|
918
|
+
SORT_TYPE_FLOAT,
|
919
|
+
SORT_TYPE_STRING,
|
920
|
+
SORT_TYPE_AUTO
|
887
921
|
};
|
888
922
|
|
889
923
|
/***************************************************************************
|
890
924
|
* SortField
|
891
925
|
***************************************************************************/
|
892
926
|
|
893
|
-
typedef struct SortField
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
927
|
+
typedef struct SortField
|
928
|
+
{
|
929
|
+
mutex_t mutex;
|
930
|
+
char *field;
|
931
|
+
int type;
|
932
|
+
bool reverse : 1;
|
933
|
+
void *index;
|
934
|
+
int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
|
935
|
+
void *(*create_index)(int size);
|
936
|
+
void (*destroy_index)(void *p);
|
937
|
+
void (*handle_term)(void *index, TermDocEnum *tde, char *text);
|
903
938
|
} SortField;
|
904
939
|
|
905
940
|
extern SortField *sort_field_create(char *field, int type, bool reverse);
|
@@ -921,11 +956,12 @@ extern SortField SORT_FIELD_DOC_REV;
|
|
921
956
|
* Sort
|
922
957
|
***************************************************************************/
|
923
958
|
|
924
|
-
typedef struct Sort
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
959
|
+
typedef struct Sort
|
960
|
+
{
|
961
|
+
SortField **sort_fields;
|
962
|
+
int sf_cnt;
|
963
|
+
int sf_capa;
|
964
|
+
bool destroy_all : 1;
|
929
965
|
} Sort;
|
930
966
|
|
931
967
|
extern Sort *sort_create();
|
@@ -951,27 +987,27 @@ extern PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
|
|
951
987
|
***************************************************************************/
|
952
988
|
|
953
989
|
struct Searcher {
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
990
|
+
void *data;
|
991
|
+
IndexReader *ir;
|
992
|
+
Similarity *similarity;
|
993
|
+
bool close_ir : 1;
|
994
|
+
int (*doc_freq)(Searcher *self, Term *term);
|
995
|
+
int *(*doc_freqs)(Searcher *self, Term **terms, int tcnt);
|
996
|
+
Document *(*get_doc)(Searcher *self, int doc_num);
|
997
|
+
int (*max_doc)(Searcher *self);
|
998
|
+
Weight *(*create_weight)(Searcher *self, Query *query);
|
999
|
+
TopDocs *(*search)(Searcher *self, Query *query, int first_doc,
|
1000
|
+
int num_docs, Filter *filter, Sort *sort);
|
1001
|
+
void (*search_each)(Searcher *self, Query *query, Filter *filter,
|
1002
|
+
void (*fn)(Searcher *, int, float, void *), void *arg);
|
1003
|
+
void (*search_each_w)(Searcher *self, Weight *weight,
|
1004
|
+
Filter *filter, void (*fn)(Searcher *, int, float, void *),
|
1005
|
+
void *arg);
|
1006
|
+
Query *(*rewrite)(Searcher *self, Query *original);
|
1007
|
+
Explanation *(*explain)(Searcher *self, Query *query, int doc_num);
|
1008
|
+
Explanation *(*explain_w)(Searcher *self, Weight *weight, int doc_num);
|
1009
|
+
Similarity *(*get_similarity)(Searcher *self);
|
1010
|
+
void (*close)(Searcher *self);
|
975
1011
|
};
|
976
1012
|
|
977
1013
|
#define sea_doc_freq(s, t) s->doc_freq(s, t)
|
@@ -979,11 +1015,11 @@ struct Searcher {
|
|
979
1015
|
#define sea_get_doc(s, dn) s->get_doc(s, dn)
|
980
1016
|
#define sea_max_doc(s) s->max_doc(s)
|
981
1017
|
#define sea_search(s, q, fd, nd, filt, sort)\
|
982
|
-
|
1018
|
+
s->search(s, q, fd, nd, filt, sort)
|
983
1019
|
#define sea_search_each(s, q, filt, fn, arg)\
|
984
|
-
|
1020
|
+
s->search_each(s, q, filt, fn, arg)
|
985
1021
|
#define sea_search_each_w(s, q, filt, fn, arg)\
|
986
|
-
|
1022
|
+
s->search_each_w(s, q, filt, fn, arg)
|
987
1023
|
#define sea_rewrite(s, q) s->rewrite(s, q)
|
988
1024
|
#define sea_explain(s, q, dn) s->explain(s, q, dn)
|
989
1025
|
#define sea_explain_w(s, q, dn) s->explain_w(s, q, dn)
|
@@ -998,16 +1034,17 @@ extern Searcher *sea_create(IndexReader *ir);
|
|
998
1034
|
*
|
999
1035
|
***************************************************************************/
|
1000
1036
|
|
1001
|
-
typedef struct MultiSearcher
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1037
|
+
typedef struct MultiSearcher
|
1038
|
+
{
|
1039
|
+
int s_cnt;
|
1040
|
+
Searcher **searchers;
|
1041
|
+
int *starts;
|
1042
|
+
int max_doc;
|
1043
|
+
bool close_subs : 1;
|
1007
1044
|
} MultiSearcher;
|
1008
1045
|
|
1009
1046
|
extern Searcher *msea_create(Searcher **searchers, int s_cnt,
|
1010
|
-
|
1047
|
+
bool close_subs);
|
1011
1048
|
|
1012
1049
|
/***************************************************************************
|
1013
1050
|
*
|
@@ -1017,29 +1054,30 @@ extern Searcher *msea_create(Searcher **searchers, int s_cnt,
|
|
1017
1054
|
|
1018
1055
|
#define CONC_WORDS 2
|
1019
1056
|
|
1020
|
-
typedef struct QParser
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1057
|
+
typedef struct QParser
|
1058
|
+
{
|
1059
|
+
mutex_t mutex;
|
1060
|
+
bool or_default : 1;
|
1061
|
+
bool wild_lower : 1;
|
1062
|
+
bool clean_str : 1;
|
1063
|
+
bool handle_parse_errors : 1;
|
1064
|
+
bool allow_any_fields : 1;
|
1065
|
+
bool close_def_fields : 1;
|
1066
|
+
int def_slop;
|
1067
|
+
char *qstr;
|
1068
|
+
char *qstrp;
|
1069
|
+
char buf[CONC_WORDS][MAX_WORD_SIZE];
|
1070
|
+
int buf_index;
|
1071
|
+
HashSet *fields;
|
1072
|
+
HashSet *fields_buf;
|
1073
|
+
HashSet *def_fields;
|
1074
|
+
HashSet *all_fields;
|
1075
|
+
Analyzer *analyzer;
|
1076
|
+
Query *result;
|
1039
1077
|
} QParser;
|
1040
1078
|
|
1041
1079
|
extern QParser *qp_create(HashSet *all_fields, HashSet *def_fields,
|
1042
|
-
|
1080
|
+
Analyzer *analyzer);
|
1043
1081
|
extern void qp_destroy(QParser *self);
|
1044
1082
|
extern Query *qp_parse(QParser *self, char *qstr);
|
1045
1083
|
extern char *qp_clean_str(char *str);
|
@@ -1050,26 +1088,27 @@ extern char *qp_clean_str(char *str);
|
|
1050
1088
|
*
|
1051
1089
|
***************************************************************************/
|
1052
1090
|
|
1053
|
-
typedef struct Index
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1091
|
+
typedef struct Index
|
1092
|
+
{
|
1093
|
+
mutex_t mutex;
|
1094
|
+
Store *store;
|
1095
|
+
Analyzer *analyzer;
|
1096
|
+
IndexReader *ir;
|
1097
|
+
IndexWriter *iw;
|
1098
|
+
Searcher *sea;
|
1099
|
+
QParser *qp;
|
1100
|
+
HashSet *key;
|
1101
|
+
char *id_field;
|
1102
|
+
char *def_field;
|
1103
|
+
/* for IndexWriter */
|
1104
|
+
bool use_compound_file : 1;
|
1105
|
+
bool auto_flush : 1;
|
1106
|
+
bool has_writes : 1;
|
1107
|
+
bool check_latest : 1;
|
1069
1108
|
} Index;
|
1070
1109
|
|
1071
1110
|
extern Index *index_create(Store *store, Analyzer *analyzer,
|
1072
|
-
|
1111
|
+
HashSet *def_fields, bool create);
|
1073
1112
|
extern void index_destroy(Index *self);
|
1074
1113
|
extern void index_flush(Index *self);
|
1075
1114
|
extern int index_size(Index *self);
|
@@ -1081,7 +1120,7 @@ extern void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
|
|
1081
1120
|
extern void index_add_string(Index *self, char *str, Analyzer *analyzer);
|
1082
1121
|
extern void index_add_array(Index *self, Array *ary, Analyzer *analyzer);
|
1083
1122
|
extern TopDocs *index_search_str(Index *self, char *query, int first_doc,
|
1084
|
-
|
1123
|
+
int num_docs, Filter *filter, Sort *sort);
|
1085
1124
|
extern Query *index_get_query(Index *self, char *qstr);
|
1086
1125
|
extern Document *index_get_doc(Index *self, int doc_num);
|
1087
1126
|
extern Document *index_get_doc_ts(Index *self, int doc_num);
|