ferret 0.9.4 → 0.9.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README +1 -1
- data/Rakefile +1 -0
- data/ext/field.c +87 -87
- data/ext/index.h +253 -255
- data/ext/index_io.c +15 -6
- data/ext/index_rw.c +6 -0
- data/ext/nix_io.c +4 -6
- data/ext/q_boolean.c +0 -6
- data/ext/q_fuzzy.c +10 -7
- data/ext/q_multi_phrase.c +2 -2
- data/ext/q_term.c +2 -2
- data/ext/q_wildcard.c +5 -4
- data/ext/search.c +3 -5
- data/ext/search.h +439 -400
- data/ext/store.h +1 -0
- data/ext/termdocs.c +3 -7
- data/ext/vector.c +1 -1
- data/lib/ferret.rb +1 -1
- data/lib/ferret/store/ram_store.rb +5 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/index/tc_index_reader.rb +6 -1
- data/test/unit/search/tc_search_and_sort.rb +1 -1
- data/test/unit/store/tc_fs_store.rb +1 -1
- metadata +4 -4
data/ext/index_io.c
CHANGED
@@ -252,14 +252,23 @@ is_read_vint(InStream *is)
|
|
252
252
|
}
|
253
253
|
|
254
254
|
inline void
|
255
|
-
|
255
|
+
is_skip_vints(InStream *is, register int cnt)
|
256
256
|
{
|
257
|
-
|
258
|
-
|
259
|
-
|
257
|
+
for (; cnt > 0; cnt--) {
|
258
|
+
while ((is_read_byte(is) & 0x80) != 0) {
|
259
|
+
}
|
260
|
+
}
|
261
|
+
}
|
260
262
|
|
261
|
-
|
262
|
-
|
263
|
+
inline void
|
264
|
+
is_read_chars(InStream *is, char* buffer, int off, int len)
|
265
|
+
{
|
266
|
+
int end, i;
|
267
|
+
|
268
|
+
end = off + len;
|
269
|
+
|
270
|
+
for(i = off; i < end; i++) {
|
271
|
+
buffer[i] = is_read_byte(is);
|
263
272
|
}
|
264
273
|
}
|
265
274
|
|
data/ext/index_rw.c
CHANGED
@@ -186,6 +186,8 @@ void dw_invert_doc(DocumentWriter *self, Document *doc)
|
|
186
186
|
TokenStream *stream;
|
187
187
|
Token *token;
|
188
188
|
FieldInfo *fi;
|
189
|
+
char text_buf[MAX_WORD_SIZE];
|
190
|
+
text_buf[MAX_WORD_SIZE - 1] = '\0';
|
189
191
|
|
190
192
|
DocField **fields = doc->df_arr, *field;
|
191
193
|
for (i = 0; i < dfcnt; i++) {
|
@@ -202,6 +204,10 @@ void dw_invert_doc(DocumentWriter *self, Document *doc)
|
|
202
204
|
if (!field->is_tokenized) { /* un-tokenized field */
|
203
205
|
text = field->data;
|
204
206
|
slen = (int)strlen(text);
|
207
|
+
if (slen >= MAX_WORD_SIZE) {
|
208
|
+
slen = MAX_WORD_SIZE - 1;
|
209
|
+
text = strncpy(text_buf, text, MAX_WORD_SIZE - 1);
|
210
|
+
}
|
205
211
|
if (fi->store_offset) {
|
206
212
|
dw_add_position(self, field_name, text, position,
|
207
213
|
tvoi_create(offset, offset+slen));
|
data/ext/nix_io.c
CHANGED
@@ -40,7 +40,7 @@ int fcount(char *path)
|
|
40
40
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
41
41
|
|
42
42
|
while ((de = readdir(d)) != NULL) {
|
43
|
-
if (
|
43
|
+
if (de->d_name[0] != '.') {
|
44
44
|
cnt++;
|
45
45
|
}
|
46
46
|
}
|
@@ -57,8 +57,7 @@ void dir_each(char *path, void (*func)(char *fname, void *arg), void *arg)
|
|
57
57
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
58
58
|
|
59
59
|
while ((de = readdir(d)) != NULL) {
|
60
|
-
if (
|
61
|
-
&& !file_is_lock(de->d_name)) {
|
60
|
+
if (de->d_name[0] != '.' && !file_is_lock(de->d_name)) {
|
62
61
|
func(de->d_name, arg);
|
63
62
|
}
|
64
63
|
}
|
@@ -101,8 +100,7 @@ void fs_clear(Store *store)
|
|
101
100
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
102
101
|
|
103
102
|
while ((de = readdir(d)) != NULL) {
|
104
|
-
if (
|
105
|
-
&& !file_is_lock(de->d_name)) {
|
103
|
+
if (de->d_name[0] != '.' && !file_is_lock(de->d_name)) {
|
106
104
|
char buf[MAX_FILE_PATH];
|
107
105
|
remove(join_path(buf, store->dir.path, de->d_name));
|
108
106
|
}
|
@@ -124,7 +122,7 @@ void fs_clear_all(Store *store)
|
|
124
122
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
125
123
|
|
126
124
|
while ((de = readdir(d)) != NULL) {
|
127
|
-
if (
|
125
|
+
if (de->d_name[0] != '.') {
|
128
126
|
char buf[MAX_FILE_PATH];
|
129
127
|
remove(join_path(buf, store->dir.path, de->d_name));
|
130
128
|
}
|
data/ext/q_boolean.c
CHANGED
@@ -731,12 +731,6 @@ void csc_init(Scorer *self, bool init_scorers)
|
|
731
731
|
csc->first_time = false;
|
732
732
|
}
|
733
733
|
|
734
|
-
void csc_add_scorer(ConjunctionScorer *csc, Scorer *scorer)
|
735
|
-
{
|
736
|
-
RECAPA(csc, ss_cnt, ss_capa, sub_scorers, Scorer *);
|
737
|
-
csc->sub_scorers[csc->ss_cnt++] = scorer;
|
738
|
-
}
|
739
|
-
|
740
734
|
float csc_score(Scorer *self)
|
741
735
|
{
|
742
736
|
ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
|
data/ext/q_fuzzy.c
CHANGED
@@ -63,14 +63,15 @@ float fuzq_score(FuzzyQuery *fuzq, char *target)
|
|
63
63
|
/* Let's make sure we have enough room in our array to do the distance
|
64
64
|
* calculations. */
|
65
65
|
if (((m+1) * (n+1)) >= fuzq->da_capa) {
|
66
|
-
fuzq->da_capa = (m * (
|
66
|
+
fuzq->da_capa = ((m+1) * (n+1)) * 2;
|
67
67
|
REALLOC_N(fuzq->da, int, fuzq->da_capa);
|
68
|
+
printf("making capa %d -> %d\n", fuzq->da_capa, (int)fuzq->da);
|
68
69
|
d = fuzq->da;
|
69
70
|
}
|
70
71
|
|
71
72
|
/* init matrix d */
|
72
|
-
for (i = 0; i <= n; i++) d[i +
|
73
|
-
for (j = 0; j <= m; j++) d[0 +
|
73
|
+
for (i = 0; i <= n; i++) d[i + n * 0] = i;
|
74
|
+
for (j = 0; j <= m; j++) d[0 + n * j] = j;
|
74
75
|
|
75
76
|
/* start computing edit distance */
|
76
77
|
for (i = 1; i <= n; i++) {
|
@@ -78,11 +79,11 @@ float fuzq_score(FuzzyQuery *fuzq, char *target)
|
|
78
79
|
char s_i = text[i - 1];
|
79
80
|
for (j = 1; j <= m; j++) {
|
80
81
|
if (s_i != target[j-1]) {
|
81
|
-
d[i +
|
82
|
+
d[i + n*j] = min3(d[i-1 + n*j], d[i + n*(j-1)], d[i-1 + n*(j-1)])+1;
|
82
83
|
} else {
|
83
|
-
d[i +
|
84
|
+
d[i + n*j] = min3(d[i-1 + n*j]+1, d[i + n*(j-1)]+1, d[i-1 + n*(j-1)]);
|
84
85
|
}
|
85
|
-
best_pos_ed_dist = min2(best_pos_ed_dist, d[i +
|
86
|
+
best_pos_ed_dist = min2(best_pos_ed_dist, d[i + n*j]);
|
86
87
|
}
|
87
88
|
//printf("(bped = %d, i = %d, md = %d)", best_pos_ed_dist, i, max_distance);
|
88
89
|
|
@@ -104,7 +105,7 @@ float fuzq_score(FuzzyQuery *fuzq, char *target)
|
|
104
105
|
* number of characters in the shorter word. but this was the formula that
|
105
106
|
* was previously used in FuzzyTermEnum, so it has not been changed (even
|
106
107
|
* though min_sim must be greater than 0.0) */
|
107
|
-
return 1.0f - ((float)d[n +
|
108
|
+
return 1.0f - ((float)d[n + n*m] / (float) (fuzq->pre_len + min2(n, m)));
|
108
109
|
}
|
109
110
|
|
110
111
|
/****************************************************************************
|
@@ -252,7 +253,9 @@ void fuzq_destroy(Query *self)
|
|
252
253
|
{
|
253
254
|
FuzzyQuery *fuzq = (FuzzyQuery *)self->data;
|
254
255
|
if (self->destroy_all) term_destroy((Term *)fuzq->term);
|
256
|
+
printf("freeing %d -> %d\n", fuzq->da_capa, (int)fuzq->da);
|
255
257
|
free(fuzq->da);
|
258
|
+
printf("success\n");
|
256
259
|
free(fuzq);
|
257
260
|
q_destroy_i(self);
|
258
261
|
}
|
data/ext/q_multi_phrase.c
CHANGED
@@ -34,7 +34,7 @@ Scorer *mphw_scorer(Weight *self, IndexReader *ir)
|
|
34
34
|
tps[i] = mtdpe_create(ir, mphq->terms[i], mphq->pt_cnt[i]);
|
35
35
|
}
|
36
36
|
if (tps[i] == NULL) {
|
37
|
-
|
37
|
+
/* free everything we just created and return NULL */
|
38
38
|
int j;
|
39
39
|
for (j = 0; j < i; j++) {
|
40
40
|
tps[i]->close(tps[i]);
|
@@ -44,7 +44,7 @@ Scorer *mphw_scorer(Weight *self, IndexReader *ir)
|
|
44
44
|
}
|
45
45
|
}
|
46
46
|
|
47
|
-
if (mphq->slop == 0) {
|
47
|
+
if (mphq->slop == 0) { /* optimize exact case */
|
48
48
|
phsc = exact_phrase_scorer_create(self, tps, mphq->positions, mphq->t_cnt,
|
49
49
|
self->similarity,
|
50
50
|
ir->get_norms(ir, mphq->field));
|
data/ext/q_term.c
CHANGED
@@ -212,12 +212,12 @@ bool tsc_next(Scorer *self)
|
|
212
212
|
|
213
213
|
ts->pointer++;
|
214
214
|
if (ts->pointer >= ts->pointer_max) {
|
215
|
-
|
215
|
+
/* refill buffer */
|
216
216
|
ts->pointer_max = ts->tde->read(ts->tde, ts->docs, ts->freqs, TDE_READ_SIZE);
|
217
217
|
if (ts->pointer_max != 0) {
|
218
218
|
ts->pointer = 0;
|
219
219
|
} else {
|
220
|
-
ts->tde->close(ts->tde);
|
220
|
+
ts->tde->close(ts->tde); /* close stream */
|
221
221
|
ts->tde = NULL;
|
222
222
|
return false;
|
223
223
|
}
|
data/ext/q_wildcard.c
CHANGED
@@ -77,16 +77,17 @@ Query *wcq_rewrite(Query *self, IndexReader *ir)
|
|
77
77
|
Term *term = (Term *)self->data;
|
78
78
|
char *text = term->text;
|
79
79
|
char *field = term->field;
|
80
|
-
char *first_star =
|
81
|
-
char *first_ques =
|
82
|
-
|
80
|
+
char *first_star = strchr(text, WILD_STRING);
|
81
|
+
char *first_ques = strchr(text, WILD_CHAR);
|
82
|
+
|
83
|
+
if (first_star == NULL && first_ques == NULL) {
|
83
84
|
q = tq_create(term_clone(term));
|
84
85
|
} else {
|
85
86
|
TermEnum *te;
|
86
87
|
Term prefix_term;
|
87
88
|
char *prefix = NULL;
|
88
89
|
|
89
|
-
char *pattern = (first_ques && first_star > first_ques)
|
90
|
+
char *pattern = (first_ques && (!first_star || (first_star > first_ques)))
|
90
91
|
? first_ques : first_star;
|
91
92
|
|
92
93
|
int prefix_len = (int)(pattern - text);
|
data/ext/search.c
CHANGED
@@ -562,8 +562,7 @@ static void s_search_each_w(Searcher *self, Weight *weight, Filter *filter,
|
|
562
562
|
static void s_search_each(Searcher *self, Query *query, Filter *filter,
|
563
563
|
void (*fn)(Searcher *, int, float, void *), void *arg)
|
564
564
|
{
|
565
|
-
Weight *weight;
|
566
|
-
weight = q_weight(query, self);
|
565
|
+
Weight *weight = q_weight(query, self);
|
567
566
|
s_search_each_w(self, weight, filter, fn, arg);
|
568
567
|
weight->destroy(weight);
|
569
568
|
}
|
@@ -602,8 +601,9 @@ static Similarity *s_get_similarity(Searcher *self)
|
|
602
601
|
|
603
602
|
static void s_close(Searcher *self)
|
604
603
|
{
|
605
|
-
if (self->ir && self->close_ir)
|
604
|
+
if (self->ir && self->close_ir) {
|
606
605
|
ir_close(self->ir);
|
606
|
+
}
|
607
607
|
free(self);
|
608
608
|
}
|
609
609
|
|
@@ -865,7 +865,6 @@ static void msea_search_each(Searcher *self, Query *query, Filter *filter,
|
|
865
865
|
|
866
866
|
struct MultiSearchArg {
|
867
867
|
int total_hits, max_size;
|
868
|
-
float min_score;
|
869
868
|
PriorityQueue *hq;
|
870
869
|
void (*hq_insert)(PriorityQueue *pq, Hit *hit);
|
871
870
|
};
|
@@ -919,7 +918,6 @@ static TopDocs *msea_search(Searcher *self, Query *query, int first_doc,
|
|
919
918
|
ms_arg.hq = hq;
|
920
919
|
ms_arg.total_hits = 0;
|
921
920
|
ms_arg.max_size = max_size;
|
922
|
-
ms_arg.min_score = 0.0;
|
923
921
|
ms_arg.hq_insert = hq_insert;
|
924
922
|
|
925
923
|
msea_search_each_w(self, weight, filter, msea_search_i, &ms_arg);
|
data/ext/search.h
CHANGED
@@ -10,7 +10,7 @@ typedef struct Scorer Scorer;
|
|
10
10
|
#include "similarity.h"
|
11
11
|
|
12
12
|
#define term_set_create() \
|
13
|
-
|
13
|
+
hs_create((hash_ft)&term_hash, (eq_ft)&term_eq, (free_ft)&term_destroy)
|
14
14
|
|
15
15
|
/***************************************************************************
|
16
16
|
*
|
@@ -19,12 +19,13 @@ typedef struct Scorer Scorer;
|
|
19
19
|
***************************************************************************/
|
20
20
|
|
21
21
|
#define EXPLANATION_DETAILS_START_SIZE 4
|
22
|
-
typedef struct Explanation
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
22
|
+
typedef struct Explanation
|
23
|
+
{
|
24
|
+
float value;
|
25
|
+
char *description;
|
26
|
+
struct Explanation **details;
|
27
|
+
int dcnt;
|
28
|
+
int dcapa;
|
28
29
|
} Explanation;
|
29
30
|
|
30
31
|
extern Explanation *expl_create(float value, char *description);
|
@@ -39,23 +40,25 @@ extern char *expl_to_html(Explanation *self);
|
|
39
40
|
*
|
40
41
|
***************************************************************************/
|
41
42
|
|
42
|
-
typedef struct Hit
|
43
|
-
|
44
|
-
|
43
|
+
typedef struct Hit
|
44
|
+
{
|
45
|
+
int doc;
|
46
|
+
float score;
|
45
47
|
} Hit;
|
46
48
|
|
47
49
|
extern bool hit_less_than(void *p1, void *p2);
|
48
|
-
|
50
|
+
|
49
51
|
/***************************************************************************
|
50
52
|
*
|
51
53
|
* TopDocs
|
52
54
|
*
|
53
55
|
***************************************************************************/
|
54
56
|
|
55
|
-
typedef struct TopDocs
|
56
|
-
|
57
|
-
|
58
|
-
|
57
|
+
typedef struct TopDocs
|
58
|
+
{
|
59
|
+
int total_hits;
|
60
|
+
int size;
|
61
|
+
Hit **hits;
|
59
62
|
} TopDocs;
|
60
63
|
|
61
64
|
extern TopDocs *td_create(int total_hits, int size, Hit **hits);
|
@@ -68,15 +71,16 @@ extern char *td_to_s(TopDocs *td);
|
|
68
71
|
*
|
69
72
|
***************************************************************************/
|
70
73
|
|
71
|
-
typedef struct Filter
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
74
|
+
typedef struct Filter
|
75
|
+
{
|
76
|
+
void *data;
|
77
|
+
char *name;
|
78
|
+
HshTable *cache;
|
79
|
+
BitVector *(*get_bv)(struct Filter *self, IndexReader *ir);
|
80
|
+
char *(*to_s)(struct Filter *self);
|
81
|
+
uint (*hash)(struct Filter *self);
|
82
|
+
int (*eq)(struct Filter *self, struct Filter *o);
|
83
|
+
void (*destroy)(struct Filter *self);
|
80
84
|
} Filter;
|
81
85
|
|
82
86
|
extern Filter *filt_create(char *name);
|
@@ -93,7 +97,7 @@ extern int filt_eq(Filter *self, Filter *o);
|
|
93
97
|
***************************************************************************/
|
94
98
|
|
95
99
|
extern Filter *rfilt_create(const char *field, char *lower_term,
|
96
|
-
|
100
|
+
char *upper_term, bool include_lower, bool include_upper);
|
97
101
|
|
98
102
|
/***************************************************************************
|
99
103
|
*
|
@@ -101,8 +105,9 @@ extern Filter *rfilt_create(const char *field, char *lower_term,
|
|
101
105
|
*
|
102
106
|
***************************************************************************/
|
103
107
|
|
104
|
-
typedef struct QueryFilter
|
105
|
-
|
108
|
+
typedef struct QueryFilter
|
109
|
+
{
|
110
|
+
Query *query;
|
106
111
|
} QueryFilter;
|
107
112
|
|
108
113
|
extern Filter *qfilt_create(Query *query);
|
@@ -113,22 +118,23 @@ extern Filter *qfilt_create(Query *query);
|
|
113
118
|
*
|
114
119
|
***************************************************************************/
|
115
120
|
|
116
|
-
struct Weight
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
121
|
+
struct Weight
|
122
|
+
{
|
123
|
+
void *data;
|
124
|
+
float value;
|
125
|
+
float qweight;
|
126
|
+
float qnorm;
|
127
|
+
float idf;
|
128
|
+
Query *query;
|
129
|
+
Similarity *similarity;
|
130
|
+
Query *(*get_query)(Weight *self);
|
131
|
+
float (*get_value)(Weight *self);
|
132
|
+
void (*normalize)(Weight *self, float normalization_factor);
|
133
|
+
Scorer *(*scorer)(Weight *self, IndexReader *ir);
|
134
|
+
Explanation *(*explain)(Weight *self, IndexReader *ir, int doc_num);
|
135
|
+
float (*sum_of_squared_weights)(Weight *self);
|
136
|
+
char *(*to_s)(Weight *self);
|
137
|
+
void (*destroy)(Weight *self);
|
132
138
|
};
|
133
139
|
|
134
140
|
extern Weight *w_create(Query *query);
|
@@ -154,8 +160,8 @@ extern Weight *tw_create(Query *query, Searcher *searcher);
|
|
154
160
|
***************************************************************************/
|
155
161
|
|
156
162
|
typedef struct BooleanWeight {
|
157
|
-
|
158
|
-
|
163
|
+
Weight **weights;
|
164
|
+
int w_cnt;
|
159
165
|
} BooleanWeight;
|
160
166
|
|
161
167
|
extern Weight *bw_create(Query *query, Searcher *searcher);
|
@@ -199,39 +205,40 @@ extern Weight *spanw_create(Query *query, Searcher *searcher);
|
|
199
205
|
***************************************************************************/
|
200
206
|
|
201
207
|
enum QUERY_TYPE {
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
208
|
+
TERM_QUERY,
|
209
|
+
BOOLEAN_QUERY,
|
210
|
+
PHRASE_QUERY,
|
211
|
+
MULTI_PHRASE_QUERY,
|
212
|
+
CONSTANT_QUERY,
|
213
|
+
FILTERED_QUERY,
|
214
|
+
MATCH_ALL_QUERY,
|
215
|
+
RANGE_QUERY,
|
216
|
+
WILD_CARD_QUERY,
|
217
|
+
FUZZY_QUERY,
|
218
|
+
PREFIX_QUERY,
|
219
|
+
SPAN_TERM_QUERY,
|
220
|
+
SPAN_FIRST_QUERY,
|
221
|
+
SPAN_OR_QUERY,
|
222
|
+
SPAN_NOT_QUERY,
|
223
|
+
SPAN_NEAR_QUERY
|
218
224
|
};
|
219
225
|
|
220
|
-
struct Query
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
226
|
+
struct Query
|
227
|
+
{
|
228
|
+
uchar type;
|
229
|
+
int ref_cnt;
|
230
|
+
void *data;
|
231
|
+
float boost;
|
232
|
+
Weight *weight;
|
233
|
+
Query *(*rewrite)(Query *self, IndexReader *ir);
|
234
|
+
void (*extract_terms)(Query *self, HashSet *terms);
|
235
|
+
Similarity *(*get_similarity)(Query *self, Searcher *searcher);
|
236
|
+
char *(*to_s)(Query *self, char *field);
|
237
|
+
uint (*hash)(Query *self);
|
238
|
+
int (*eq)(Query *self, Query *o);
|
239
|
+
void (*destroy_i)(Query *self);
|
240
|
+
Weight *(*create_weight_i)(Query *self, Searcher *searcher);
|
241
|
+
bool destroy_all : 1;
|
235
242
|
};
|
236
243
|
|
237
244
|
/* Internal Query Functions */
|
@@ -253,8 +260,9 @@ extern int q_eq(Query *self, Query *o);
|
|
253
260
|
*
|
254
261
|
***************************************************************************/
|
255
262
|
|
256
|
-
typedef struct TermQuery
|
257
|
-
|
263
|
+
typedef struct TermQuery
|
264
|
+
{
|
265
|
+
Term *term;
|
258
266
|
} TermQuery;
|
259
267
|
|
260
268
|
extern Query *tq_create(Term *term);
|
@@ -269,19 +277,20 @@ extern Query *tq_create(Term *term);
|
|
269
277
|
* BooleanClause
|
270
278
|
***************************************************************************/
|
271
279
|
|
272
|
-
enum BC_TYPE
|
273
|
-
|
274
|
-
|
275
|
-
|
280
|
+
enum BC_TYPE
|
281
|
+
{
|
282
|
+
BC_SHOULD,
|
283
|
+
BC_MUST,
|
284
|
+
BC_MUST_NOT
|
276
285
|
};
|
277
286
|
|
278
287
|
typedef struct BooleanClause {
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
288
|
+
int ref_cnt;
|
289
|
+
Query *query;
|
290
|
+
Query *rewritten;
|
291
|
+
unsigned int occur : 4;
|
292
|
+
bool is_prohibited : 1;
|
293
|
+
bool is_required : 1;
|
285
294
|
} BooleanClause;
|
286
295
|
|
287
296
|
extern BooleanClause *bc_create(Query *query, unsigned int occur);
|
@@ -296,19 +305,20 @@ extern void bc_set_occur(BooleanClause *self, unsigned int occur);
|
|
296
305
|
#define BOOLEAN_CLAUSES_START_CAPA 4
|
297
306
|
#define QUERY_STRING_START_SIZE 64
|
298
307
|
|
299
|
-
typedef struct BooleanQuery
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
308
|
+
typedef struct BooleanQuery
|
309
|
+
{
|
310
|
+
bool coord_disabled;
|
311
|
+
int max_clause_cnt;
|
312
|
+
int clause_cnt;
|
313
|
+
int clause_capa;
|
314
|
+
float original_boost;
|
315
|
+
BooleanClause **clauses;
|
316
|
+
Similarity *similarity;
|
307
317
|
} BooleanQuery;
|
308
318
|
|
309
319
|
extern Query *bq_create(bool coord_disabled);
|
310
320
|
extern BooleanClause *bq_add_query(Query *self, Query *sub_query,
|
311
|
-
|
321
|
+
unsigned int occur);
|
312
322
|
extern BooleanClause *bq_add_clause(Query *self, BooleanClause *bc);
|
313
323
|
|
314
324
|
/***************************************************************************
|
@@ -318,13 +328,14 @@ extern BooleanClause *bq_add_clause(Query *self, BooleanClause *bc);
|
|
318
328
|
***************************************************************************/
|
319
329
|
|
320
330
|
#define PHQ_INIT_CAPA 4
|
321
|
-
typedef struct PhraseQuery
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
331
|
+
typedef struct PhraseQuery
|
332
|
+
{
|
333
|
+
int slop;
|
334
|
+
Term **terms;
|
335
|
+
int *positions;
|
336
|
+
int t_cnt;
|
337
|
+
int t_capa;
|
338
|
+
char *field;
|
328
339
|
} PhraseQuery;
|
329
340
|
|
330
341
|
extern Query *phq_create();
|
@@ -336,14 +347,15 @@ extern void phq_add_term(Query *self, Term *term, int pos_inc);
|
|
336
347
|
*
|
337
348
|
***************************************************************************/
|
338
349
|
|
339
|
-
typedef struct MultiPhraseQuery
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
350
|
+
typedef struct MultiPhraseQuery
|
351
|
+
{
|
352
|
+
int slop;
|
353
|
+
Term ***terms;
|
354
|
+
int *positions;
|
355
|
+
int *pt_cnt;
|
356
|
+
int t_cnt;
|
357
|
+
int t_capa;
|
358
|
+
char *field;
|
347
359
|
} MultiPhraseQuery;
|
348
360
|
|
349
361
|
extern Query *mphq_create();
|
@@ -379,16 +391,17 @@ extern bool wc_match(char *pattern, char *text);
|
|
379
391
|
#define DEF_PRE_LEN 0
|
380
392
|
#define TYPICAL_LONGEST_WORD 20
|
381
393
|
|
382
|
-
typedef struct FuzzyQuery
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
394
|
+
typedef struct FuzzyQuery
|
395
|
+
{
|
396
|
+
Term *term;
|
397
|
+
char *text; /* term text after prefix */
|
398
|
+
int text_len;
|
399
|
+
int pre_len;
|
400
|
+
float min_sim;
|
401
|
+
float scale_factor;
|
402
|
+
int max_distances[TYPICAL_LONGEST_WORD];
|
403
|
+
int *da;
|
404
|
+
int da_capa;
|
392
405
|
} FuzzyQuery;
|
393
406
|
|
394
407
|
extern Query *fuzq_create(Term *term);
|
@@ -408,6 +421,12 @@ extern Query *csq_create(Filter *filter);
|
|
408
421
|
*
|
409
422
|
***************************************************************************/
|
410
423
|
|
424
|
+
typedef struct FilteredQuery
|
425
|
+
{
|
426
|
+
Query *query;
|
427
|
+
Filter *filter;
|
428
|
+
} FilteredQuery;
|
429
|
+
|
411
430
|
extern Query *fq_create(Query *query, Filter *filter);
|
412
431
|
|
413
432
|
/***************************************************************************
|
@@ -424,31 +443,21 @@ extern Query *maq_create();
|
|
424
443
|
*
|
425
444
|
***************************************************************************/
|
426
445
|
|
427
|
-
typedef struct Range
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
446
|
+
typedef struct Range
|
447
|
+
{
|
448
|
+
char *field;
|
449
|
+
char *lower_term;
|
450
|
+
char *upper_term;
|
451
|
+
bool include_lower : 1;
|
452
|
+
bool include_upper : 1;
|
433
453
|
} Range;
|
434
454
|
|
435
455
|
extern Query *rq_create(const char *field, char *lower_term,
|
436
|
-
|
456
|
+
char *upper_term, bool include_lower, bool include_upper);
|
437
457
|
extern Query *rq_create_less(const char *field, char *upper_term,
|
438
|
-
|
458
|
+
bool include_upper);
|
439
459
|
extern Query *rq_create_more(const char *field, char *lower_term,
|
440
|
-
|
441
|
-
|
442
|
-
/***************************************************************************
|
443
|
-
*
|
444
|
-
* FilteredQuery
|
445
|
-
*
|
446
|
-
***************************************************************************/
|
447
|
-
|
448
|
-
typedef struct FilteredQuery {
|
449
|
-
Query *query;
|
450
|
-
Filter *filter;
|
451
|
-
} FilteredQuery;
|
460
|
+
bool include_lower);
|
452
461
|
|
453
462
|
/***************************************************************************
|
454
463
|
*
|
@@ -461,16 +470,17 @@ typedef struct FilteredQuery {
|
|
461
470
|
***************************************************************************/
|
462
471
|
|
463
472
|
typedef struct SpanEnum SpanEnum;
|
464
|
-
struct SpanEnum
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
473
|
+
struct SpanEnum
|
474
|
+
{
|
475
|
+
void *data;
|
476
|
+
Query *query;
|
477
|
+
bool (*next)(SpanEnum *self);
|
478
|
+
bool (*skip_to)(SpanEnum *self, int target_doc);
|
479
|
+
int (*doc)(SpanEnum *self);
|
480
|
+
int (*start)(SpanEnum *self);
|
481
|
+
int (*end)(SpanEnum *self);
|
482
|
+
char *(*to_s)(SpanEnum *self);
|
483
|
+
void (*destroy)(SpanEnum *self);
|
474
484
|
};
|
475
485
|
|
476
486
|
/***************************************************************************
|
@@ -478,12 +488,13 @@ struct SpanEnum {
|
|
478
488
|
***************************************************************************/
|
479
489
|
|
480
490
|
typedef struct SpanTermEnum SpanTermEnum;
|
481
|
-
struct SpanTermEnum
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
491
|
+
struct SpanTermEnum
|
492
|
+
{
|
493
|
+
TermDocEnum *positions;
|
494
|
+
int position;
|
495
|
+
int doc;
|
496
|
+
int count;
|
497
|
+
int freq;
|
487
498
|
};
|
488
499
|
|
489
500
|
extern SpanEnum *spante_create(Query *query, IndexReader *ir);
|
@@ -498,11 +509,12 @@ extern SpanEnum *spanfe_create(Query *query, IndexReader *ir);
|
|
498
509
|
* SpanOrEnum
|
499
510
|
***************************************************************************/
|
500
511
|
|
501
|
-
typedef struct SpanOrEnum
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
512
|
+
typedef struct SpanOrEnum
|
513
|
+
{
|
514
|
+
PriorityQueue *queue;
|
515
|
+
SpanEnum **span_enums;
|
516
|
+
int s_cnt;
|
517
|
+
bool first_time;
|
506
518
|
} SpanOrEnum;
|
507
519
|
|
508
520
|
extern SpanEnum *spanoe_create(Query *query, IndexReader *ir);
|
@@ -511,11 +523,12 @@ extern SpanEnum *spanoe_create(Query *query, IndexReader *ir);
|
|
511
523
|
* SpanEnumCell
|
512
524
|
***************************************************************************/
|
513
525
|
|
514
|
-
typedef struct SpanEnumCell
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
526
|
+
typedef struct SpanEnumCell
|
527
|
+
{
|
528
|
+
SpanEnum *parent;
|
529
|
+
SpanEnum *se;
|
530
|
+
int index;
|
531
|
+
int length;
|
519
532
|
} SpanEnumCell;
|
520
533
|
|
521
534
|
extern SpanEnum *spanec_create(Query *parent, Query *child, int index);
|
@@ -524,16 +537,17 @@ extern SpanEnum *spanec_create(Query *parent, Query *child, int index);
|
|
524
537
|
* SpanNearEnum
|
525
538
|
***************************************************************************/
|
526
539
|
|
527
|
-
typedef struct SpanNearEnum
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
540
|
+
typedef struct SpanNearEnum
|
541
|
+
{
|
542
|
+
SpanEnum **span_enums;
|
543
|
+
int s_cnt;
|
544
|
+
int slop;
|
545
|
+
int current;
|
546
|
+
bool first_time : 1;
|
547
|
+
bool in_order : 1;
|
548
|
+
int doc;
|
549
|
+
int start;
|
550
|
+
int end;
|
537
551
|
} SpanNearEnum;
|
538
552
|
|
539
553
|
extern SpanEnum *spanne_create(Query *query, IndexReader *ir);
|
@@ -542,11 +556,12 @@ extern SpanEnum *spanne_create(Query *query, IndexReader *ir);
|
|
542
556
|
* SpanNotEnum
|
543
557
|
***************************************************************************/
|
544
558
|
|
545
|
-
typedef struct SpanNotEnum
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
559
|
+
typedef struct SpanNotEnum
|
560
|
+
{
|
561
|
+
SpanEnum *inc;
|
562
|
+
SpanEnum *exc;
|
563
|
+
bool more_inc : 1;
|
564
|
+
bool more_exc : 1;
|
550
565
|
} SpanNotEnum;
|
551
566
|
|
552
567
|
extern SpanEnum *spanxe_create(Query *query, IndexReader *ir);
|
@@ -556,11 +571,12 @@ extern SpanEnum *spanxe_create(Query *query, IndexReader *ir);
|
|
556
571
|
***************************************************************************/
|
557
572
|
|
558
573
|
typedef struct SpanQuery SpanQuery;
|
559
|
-
struct SpanQuery
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
574
|
+
struct SpanQuery
|
575
|
+
{
|
576
|
+
void *data;
|
577
|
+
char *field;
|
578
|
+
SpanEnum *(*get_spans)(Query *self, IndexReader *ir);
|
579
|
+
HashSet *(*get_terms)(Query *self);
|
564
580
|
};
|
565
581
|
|
566
582
|
/***************************************************************************
|
@@ -573,9 +589,10 @@ extern Query *spantq_create(Term *term);
|
|
573
589
|
* SpanFirstQuery
|
574
590
|
***************************************************************************/
|
575
591
|
|
576
|
-
typedef struct SpanFirstQuery
|
577
|
-
|
578
|
-
|
592
|
+
typedef struct SpanFirstQuery
|
593
|
+
{
|
594
|
+
int end;
|
595
|
+
Query *match;
|
579
596
|
} SpanFirstQuery;
|
580
597
|
|
581
598
|
extern Query *spanfq_create(Query *match, int end);
|
@@ -584,9 +601,10 @@ extern Query *spanfq_create(Query *match, int end);
|
|
584
601
|
* SpanOrQuery
|
585
602
|
***************************************************************************/
|
586
603
|
|
587
|
-
typedef struct SpanOrQuery
|
588
|
-
|
589
|
-
|
604
|
+
typedef struct SpanOrQuery
|
605
|
+
{
|
606
|
+
Query **clauses;
|
607
|
+
int c_cnt;
|
590
608
|
} SpanOrQuery;
|
591
609
|
|
592
610
|
extern Query *spanoq_create(Query **clauses, int c_cnt);
|
@@ -595,24 +613,26 @@ extern Query *spanoq_create(Query **clauses, int c_cnt);
|
|
595
613
|
* SpanNearQuery
|
596
614
|
***************************************************************************/
|
597
615
|
|
598
|
-
typedef struct SpanNearQuery
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
616
|
+
typedef struct SpanNearQuery
|
617
|
+
{
|
618
|
+
Query **clauses;
|
619
|
+
int c_cnt;
|
620
|
+
int slop;
|
621
|
+
bool in_order;
|
603
622
|
} SpanNearQuery;
|
604
623
|
|
605
624
|
extern Query *spannq_create(Query **clauses, int c_cnt, int slop,
|
606
|
-
|
625
|
+
bool in_order);
|
607
626
|
|
608
627
|
|
609
628
|
/***************************************************************************
|
610
629
|
* SpanNotQuery
|
611
630
|
***************************************************************************/
|
612
631
|
|
613
|
-
typedef struct SpanNotQuery
|
614
|
-
|
615
|
-
|
632
|
+
typedef struct SpanNotQuery
|
633
|
+
{
|
634
|
+
Query *inc;
|
635
|
+
Query *exc;
|
616
636
|
} SpanNotQuery;
|
617
637
|
|
618
638
|
extern Query *spanxq_create(Query *inc, Query *exc);
|
@@ -625,15 +645,16 @@ extern Query *spanxq_create(Query *inc, Query *exc);
|
|
625
645
|
|
626
646
|
#define SCORER_NULLIFY(mscorer) mscorer->destroy(mscorer); mscorer = NULL
|
627
647
|
|
628
|
-
struct Scorer
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
648
|
+
struct Scorer
|
649
|
+
{
|
650
|
+
void *data;
|
651
|
+
Similarity *similarity;
|
652
|
+
int doc;
|
653
|
+
float (*score)(Scorer *self);
|
654
|
+
bool (*next)(Scorer *self);
|
655
|
+
bool (*skip_to)(Scorer *self, int doc_num);
|
656
|
+
Explanation *(*explain)(Scorer *self, int doc_num);
|
657
|
+
void (*destroy)(Scorer *self);
|
637
658
|
};
|
638
659
|
|
639
660
|
/* Internal Scorer Function */
|
@@ -653,16 +674,17 @@ extern int scorer_doc_cmp(const void *p1, const void *p2);
|
|
653
674
|
#define SCORE_CACHE_SIZE 32
|
654
675
|
#define TDE_READ_SIZE 32
|
655
676
|
|
656
|
-
typedef struct TermScorer
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
677
|
+
typedef struct TermScorer
|
678
|
+
{
|
679
|
+
int docs[TDE_READ_SIZE];
|
680
|
+
int freqs[TDE_READ_SIZE];
|
681
|
+
int pointer;
|
682
|
+
int pointer_max;
|
683
|
+
float score_cache[SCORE_CACHE_SIZE];
|
684
|
+
Weight *weight;
|
685
|
+
TermDocEnum *tde;
|
686
|
+
uchar *norms;
|
687
|
+
float weight_value;
|
666
688
|
} TermScorer;
|
667
689
|
|
668
690
|
extern Scorer *tsc_create(Weight *weight, TermDocEnum *tde, uchar *norms);
|
@@ -677,89 +699,96 @@ extern Scorer *tsc_create(Weight *weight, TermDocEnum *tde, uchar *norms);
|
|
677
699
|
* Coordinator
|
678
700
|
***************************************************************************/
|
679
701
|
|
680
|
-
typedef struct Coordinator
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
702
|
+
typedef struct Coordinator
|
703
|
+
{
|
704
|
+
int max_coord;
|
705
|
+
float *coord_factors;
|
706
|
+
Similarity *similarity;
|
707
|
+
int num_matches;
|
685
708
|
} Coordinator;
|
686
709
|
|
687
710
|
/***************************************************************************
|
688
711
|
* DisjunctionSumScorer
|
689
712
|
***************************************************************************/
|
690
713
|
|
691
|
-
typedef struct DisjunctionSumScorer
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
714
|
+
typedef struct DisjunctionSumScorer
|
715
|
+
{
|
716
|
+
float cum_score;
|
717
|
+
int num_matches;
|
718
|
+
int min_num_matches;
|
719
|
+
Scorer **sub_scorers;
|
720
|
+
int ss_cnt;
|
721
|
+
PriorityQueue *scorer_queue;
|
722
|
+
Coordinator *coordinator;
|
699
723
|
} DisjunctionSumScorer;
|
700
724
|
|
701
725
|
/***************************************************************************
|
702
726
|
* ConjunctionScorer
|
703
727
|
***************************************************************************/
|
704
728
|
|
705
|
-
typedef struct ConjunctionScorer
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
729
|
+
typedef struct ConjunctionScorer
|
730
|
+
{
|
731
|
+
bool first_time : 1;
|
732
|
+
bool more : 1;
|
733
|
+
float coord;
|
734
|
+
int ss_cnt;
|
735
|
+
int ss_capa;
|
736
|
+
Scorer **sub_scorers;
|
737
|
+
int first;
|
738
|
+
int last;
|
739
|
+
Coordinator *coordinator;
|
740
|
+
int last_scored_doc;
|
716
741
|
} ConjunctionScorer;
|
717
742
|
|
718
743
|
/***************************************************************************
|
719
744
|
* SingleMatchScorer
|
720
745
|
***************************************************************************/
|
721
746
|
|
722
|
-
typedef struct SingleMatchScorer
|
723
|
-
|
724
|
-
|
747
|
+
typedef struct SingleMatchScorer
|
748
|
+
{
|
749
|
+
Coordinator *coordinator;
|
750
|
+
Scorer *scorer;
|
725
751
|
} SingleMatchScorer;
|
726
752
|
|
727
753
|
/***************************************************************************
|
728
754
|
* ReqOptSumScorer
|
729
755
|
***************************************************************************/
|
730
756
|
|
731
|
-
typedef struct ReqOptSumScorer
|
732
|
-
|
733
|
-
|
734
|
-
|
757
|
+
typedef struct ReqOptSumScorer
|
758
|
+
{
|
759
|
+
Scorer *req_scorer;
|
760
|
+
Scorer *opt_scorer;
|
761
|
+
bool first_time_opt;
|
735
762
|
} ReqOptSumScorer;
|
736
763
|
|
737
764
|
/***************************************************************************
|
738
765
|
* ReqExclScorer
|
739
766
|
***************************************************************************/
|
740
767
|
|
741
|
-
typedef struct ReqExclScorer
|
742
|
-
|
743
|
-
|
744
|
-
|
768
|
+
typedef struct ReqExclScorer
|
769
|
+
{
|
770
|
+
Scorer *req_scorer;
|
771
|
+
Scorer *excl_scorer;
|
772
|
+
bool first_time;
|
745
773
|
} ReqExclScorer;
|
746
774
|
|
747
775
|
/***************************************************************************
|
748
776
|
* BooleanScorer
|
749
777
|
***************************************************************************/
|
750
778
|
|
751
|
-
typedef struct BooleanScorer
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
779
|
+
typedef struct BooleanScorer
|
780
|
+
{
|
781
|
+
Scorer **required_scorers;
|
782
|
+
int rs_cnt;
|
783
|
+
int rs_capa;
|
784
|
+
Scorer **optional_scorers;
|
785
|
+
int os_cnt;
|
786
|
+
int os_capa;
|
787
|
+
Scorer **prohibited_scorers;
|
788
|
+
int ps_cnt;
|
789
|
+
int ps_capa;
|
790
|
+
Scorer *counting_sum_scorer;
|
791
|
+
Coordinator *coordinator;
|
763
792
|
} BooleanScorer;
|
764
793
|
|
765
794
|
extern Scorer *bsc_create(Similarity *similarity);
|
@@ -774,12 +803,13 @@ extern void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur);
|
|
774
803
|
/***************************************************************************
|
775
804
|
* PhrasePosition
|
776
805
|
***************************************************************************/
|
777
|
-
typedef struct PhrasePosition
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
806
|
+
typedef struct PhrasePosition
|
807
|
+
{
|
808
|
+
TermDocEnum *tpe;
|
809
|
+
int offset;
|
810
|
+
int count;
|
811
|
+
int doc;
|
812
|
+
int position;
|
783
813
|
} PhrasePosition;
|
784
814
|
|
785
815
|
extern PhrasePosition *pp_create(TermDocEnum *tpe, int offset);
|
@@ -788,39 +818,40 @@ extern PhrasePosition *pp_create(TermDocEnum *tpe, int offset);
|
|
788
818
|
* PhraseScorer
|
789
819
|
***************************************************************************/
|
790
820
|
|
791
|
-
typedef struct PhraseScorer
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
821
|
+
typedef struct PhraseScorer
|
822
|
+
{
|
823
|
+
float freq;
|
824
|
+
uchar *norms;
|
825
|
+
float value;
|
826
|
+
Weight *weight;
|
827
|
+
bool first_time : 1;
|
828
|
+
bool more : 1;
|
829
|
+
int pp_first;
|
830
|
+
int pp_last;
|
831
|
+
int pp_cnt;
|
832
|
+
PhrasePosition **phrase_pos;
|
833
|
+
float (*phrase_freq)(Scorer *self);
|
834
|
+
int slop;
|
804
835
|
} PhraseScorer;
|
805
836
|
|
806
837
|
extern Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
|
807
|
-
|
838
|
+
int *positions, int t_cnt, Similarity *similarity, uchar *norms);
|
808
839
|
|
809
840
|
/***************************************************************************
|
810
841
|
* ExactPhraseScorer
|
811
842
|
***************************************************************************/
|
812
843
|
|
813
844
|
extern Scorer *exact_phrase_scorer_create(Weight *weight,
|
814
|
-
|
815
|
-
|
845
|
+
TermDocEnum **term_pos_enum, int *positions, int t_cnt,
|
846
|
+
Similarity *similarity, uchar *norms);
|
816
847
|
|
817
848
|
/***************************************************************************
|
818
849
|
* SloppyPhraseScorer
|
819
850
|
***************************************************************************/
|
820
851
|
|
821
852
|
extern Scorer *sloppy_phrase_scorer_create(Weight *weight,
|
822
|
-
|
823
|
-
|
853
|
+
TermDocEnum **term_pos_enum, int *positions, int t_cnt,
|
854
|
+
Similarity *similarity, int slop, uchar *norms);
|
824
855
|
|
825
856
|
/***************************************************************************
|
826
857
|
*
|
@@ -828,9 +859,10 @@ extern Scorer *sloppy_phrase_scorer_create(Weight *weight,
|
|
828
859
|
*
|
829
860
|
***************************************************************************/
|
830
861
|
|
831
|
-
typedef struct ConstantScoreScorer
|
832
|
-
|
833
|
-
|
862
|
+
typedef struct ConstantScoreScorer
|
863
|
+
{
|
864
|
+
BitVector *bv;
|
865
|
+
float score;
|
834
866
|
} ConstantScoreScorer;
|
835
867
|
|
836
868
|
extern Scorer *cssc_create(Weight *weight, IndexReader *ir);
|
@@ -842,10 +874,11 @@ extern Scorer *cssc_create(Weight *weight, IndexReader *ir);
|
|
842
874
|
*
|
843
875
|
***************************************************************************/
|
844
876
|
|
845
|
-
typedef struct MatchAllScorer
|
846
|
-
|
847
|
-
|
848
|
-
|
877
|
+
typedef struct MatchAllScorer
|
878
|
+
{
|
879
|
+
IndexReader *ir;
|
880
|
+
int max_doc;
|
881
|
+
float score;
|
849
882
|
} MatchAllScorer;
|
850
883
|
|
851
884
|
extern Scorer *masc_create(Weight *weight, IndexReader *ir);
|
@@ -857,16 +890,17 @@ extern Scorer *masc_create(Weight *weight, IndexReader *ir);
|
|
857
890
|
*
|
858
891
|
***************************************************************************/
|
859
892
|
|
860
|
-
typedef struct SpanScorer
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
893
|
+
typedef struct SpanScorer
|
894
|
+
{
|
895
|
+
bool first_time : 1;
|
896
|
+
bool more : 1;
|
897
|
+
IndexReader *ir;
|
898
|
+
SpanEnum *spans;
|
899
|
+
Similarity *sim;
|
900
|
+
uchar *norms;
|
901
|
+
Weight *weight;
|
902
|
+
float value;
|
903
|
+
float freq;
|
870
904
|
} SpanScorer;
|
871
905
|
|
872
906
|
extern Scorer *spansc_create(Weight *weight, IndexReader *ir);
|
@@ -878,28 +912,29 @@ extern Scorer *spansc_create(Weight *weight, IndexReader *ir);
|
|
878
912
|
***************************************************************************/
|
879
913
|
|
880
914
|
enum SORT_TYPE {
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
915
|
+
SORT_TYPE_SCORE,
|
916
|
+
SORT_TYPE_DOC,
|
917
|
+
SORT_TYPE_INTEGER,
|
918
|
+
SORT_TYPE_FLOAT,
|
919
|
+
SORT_TYPE_STRING,
|
920
|
+
SORT_TYPE_AUTO
|
887
921
|
};
|
888
922
|
|
889
923
|
/***************************************************************************
|
890
924
|
* SortField
|
891
925
|
***************************************************************************/
|
892
926
|
|
893
|
-
typedef struct SortField
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
927
|
+
typedef struct SortField
|
928
|
+
{
|
929
|
+
mutex_t mutex;
|
930
|
+
char *field;
|
931
|
+
int type;
|
932
|
+
bool reverse : 1;
|
933
|
+
void *index;
|
934
|
+
int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
|
935
|
+
void *(*create_index)(int size);
|
936
|
+
void (*destroy_index)(void *p);
|
937
|
+
void (*handle_term)(void *index, TermDocEnum *tde, char *text);
|
903
938
|
} SortField;
|
904
939
|
|
905
940
|
extern SortField *sort_field_create(char *field, int type, bool reverse);
|
@@ -921,11 +956,12 @@ extern SortField SORT_FIELD_DOC_REV;
|
|
921
956
|
* Sort
|
922
957
|
***************************************************************************/
|
923
958
|
|
924
|
-
typedef struct Sort
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
959
|
+
typedef struct Sort
|
960
|
+
{
|
961
|
+
SortField **sort_fields;
|
962
|
+
int sf_cnt;
|
963
|
+
int sf_capa;
|
964
|
+
bool destroy_all : 1;
|
929
965
|
} Sort;
|
930
966
|
|
931
967
|
extern Sort *sort_create();
|
@@ -951,27 +987,27 @@ extern PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
|
|
951
987
|
***************************************************************************/
|
952
988
|
|
953
989
|
struct Searcher {
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
990
|
+
void *data;
|
991
|
+
IndexReader *ir;
|
992
|
+
Similarity *similarity;
|
993
|
+
bool close_ir : 1;
|
994
|
+
int (*doc_freq)(Searcher *self, Term *term);
|
995
|
+
int *(*doc_freqs)(Searcher *self, Term **terms, int tcnt);
|
996
|
+
Document *(*get_doc)(Searcher *self, int doc_num);
|
997
|
+
int (*max_doc)(Searcher *self);
|
998
|
+
Weight *(*create_weight)(Searcher *self, Query *query);
|
999
|
+
TopDocs *(*search)(Searcher *self, Query *query, int first_doc,
|
1000
|
+
int num_docs, Filter *filter, Sort *sort);
|
1001
|
+
void (*search_each)(Searcher *self, Query *query, Filter *filter,
|
1002
|
+
void (*fn)(Searcher *, int, float, void *), void *arg);
|
1003
|
+
void (*search_each_w)(Searcher *self, Weight *weight,
|
1004
|
+
Filter *filter, void (*fn)(Searcher *, int, float, void *),
|
1005
|
+
void *arg);
|
1006
|
+
Query *(*rewrite)(Searcher *self, Query *original);
|
1007
|
+
Explanation *(*explain)(Searcher *self, Query *query, int doc_num);
|
1008
|
+
Explanation *(*explain_w)(Searcher *self, Weight *weight, int doc_num);
|
1009
|
+
Similarity *(*get_similarity)(Searcher *self);
|
1010
|
+
void (*close)(Searcher *self);
|
975
1011
|
};
|
976
1012
|
|
977
1013
|
#define sea_doc_freq(s, t) s->doc_freq(s, t)
|
@@ -979,11 +1015,11 @@ struct Searcher {
|
|
979
1015
|
#define sea_get_doc(s, dn) s->get_doc(s, dn)
|
980
1016
|
#define sea_max_doc(s) s->max_doc(s)
|
981
1017
|
#define sea_search(s, q, fd, nd, filt, sort)\
|
982
|
-
|
1018
|
+
s->search(s, q, fd, nd, filt, sort)
|
983
1019
|
#define sea_search_each(s, q, filt, fn, arg)\
|
984
|
-
|
1020
|
+
s->search_each(s, q, filt, fn, arg)
|
985
1021
|
#define sea_search_each_w(s, q, filt, fn, arg)\
|
986
|
-
|
1022
|
+
s->search_each_w(s, q, filt, fn, arg)
|
987
1023
|
#define sea_rewrite(s, q) s->rewrite(s, q)
|
988
1024
|
#define sea_explain(s, q, dn) s->explain(s, q, dn)
|
989
1025
|
#define sea_explain_w(s, q, dn) s->explain_w(s, q, dn)
|
@@ -998,16 +1034,17 @@ extern Searcher *sea_create(IndexReader *ir);
|
|
998
1034
|
*
|
999
1035
|
***************************************************************************/
|
1000
1036
|
|
1001
|
-
typedef struct MultiSearcher
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1037
|
+
typedef struct MultiSearcher
|
1038
|
+
{
|
1039
|
+
int s_cnt;
|
1040
|
+
Searcher **searchers;
|
1041
|
+
int *starts;
|
1042
|
+
int max_doc;
|
1043
|
+
bool close_subs : 1;
|
1007
1044
|
} MultiSearcher;
|
1008
1045
|
|
1009
1046
|
extern Searcher *msea_create(Searcher **searchers, int s_cnt,
|
1010
|
-
|
1047
|
+
bool close_subs);
|
1011
1048
|
|
1012
1049
|
/***************************************************************************
|
1013
1050
|
*
|
@@ -1017,29 +1054,30 @@ extern Searcher *msea_create(Searcher **searchers, int s_cnt,
|
|
1017
1054
|
|
1018
1055
|
#define CONC_WORDS 2
|
1019
1056
|
|
1020
|
-
typedef struct QParser
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1057
|
+
typedef struct QParser
|
1058
|
+
{
|
1059
|
+
mutex_t mutex;
|
1060
|
+
bool or_default : 1;
|
1061
|
+
bool wild_lower : 1;
|
1062
|
+
bool clean_str : 1;
|
1063
|
+
bool handle_parse_errors : 1;
|
1064
|
+
bool allow_any_fields : 1;
|
1065
|
+
bool close_def_fields : 1;
|
1066
|
+
int def_slop;
|
1067
|
+
char *qstr;
|
1068
|
+
char *qstrp;
|
1069
|
+
char buf[CONC_WORDS][MAX_WORD_SIZE];
|
1070
|
+
int buf_index;
|
1071
|
+
HashSet *fields;
|
1072
|
+
HashSet *fields_buf;
|
1073
|
+
HashSet *def_fields;
|
1074
|
+
HashSet *all_fields;
|
1075
|
+
Analyzer *analyzer;
|
1076
|
+
Query *result;
|
1039
1077
|
} QParser;
|
1040
1078
|
|
1041
1079
|
extern QParser *qp_create(HashSet *all_fields, HashSet *def_fields,
|
1042
|
-
|
1080
|
+
Analyzer *analyzer);
|
1043
1081
|
extern void qp_destroy(QParser *self);
|
1044
1082
|
extern Query *qp_parse(QParser *self, char *qstr);
|
1045
1083
|
extern char *qp_clean_str(char *str);
|
@@ -1050,26 +1088,27 @@ extern char *qp_clean_str(char *str);
|
|
1050
1088
|
*
|
1051
1089
|
***************************************************************************/
|
1052
1090
|
|
1053
|
-
typedef struct Index
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1091
|
+
typedef struct Index
|
1092
|
+
{
|
1093
|
+
mutex_t mutex;
|
1094
|
+
Store *store;
|
1095
|
+
Analyzer *analyzer;
|
1096
|
+
IndexReader *ir;
|
1097
|
+
IndexWriter *iw;
|
1098
|
+
Searcher *sea;
|
1099
|
+
QParser *qp;
|
1100
|
+
HashSet *key;
|
1101
|
+
char *id_field;
|
1102
|
+
char *def_field;
|
1103
|
+
/* for IndexWriter */
|
1104
|
+
bool use_compound_file : 1;
|
1105
|
+
bool auto_flush : 1;
|
1106
|
+
bool has_writes : 1;
|
1107
|
+
bool check_latest : 1;
|
1069
1108
|
} Index;
|
1070
1109
|
|
1071
1110
|
extern Index *index_create(Store *store, Analyzer *analyzer,
|
1072
|
-
|
1111
|
+
HashSet *def_fields, bool create);
|
1073
1112
|
extern void index_destroy(Index *self);
|
1074
1113
|
extern void index_flush(Index *self);
|
1075
1114
|
extern int index_size(Index *self);
|
@@ -1081,7 +1120,7 @@ extern void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
|
|
1081
1120
|
extern void index_add_string(Index *self, char *str, Analyzer *analyzer);
|
1082
1121
|
extern void index_add_array(Index *self, Array *ary, Analyzer *analyzer);
|
1083
1122
|
extern TopDocs *index_search_str(Index *self, char *query, int first_doc,
|
1084
|
-
|
1123
|
+
int num_docs, Filter *filter, Sort *sort);
|
1085
1124
|
extern Query *index_get_query(Index *self, char *qstr);
|
1086
1125
|
extern Document *index_get_doc(Index *self, int doc_num);
|
1087
1126
|
extern Document *index_get_doc_ts(Index *self, int doc_num);
|