ferret 0.9.1 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +6 -5
- data/Rakefile +34 -13
- data/TODO +1 -0
- data/TUTORIAL +1 -1
- data/ext/analysis.c +87 -70
- data/ext/analysis.h +18 -6
- data/ext/array.c +1 -2
- data/ext/array.h +1 -1
- data/ext/bitvector.c +10 -6
- data/ext/bitvector.h +2 -2
- data/ext/compound_io.c +30 -27
- data/ext/document.c +15 -15
- data/ext/document.h +5 -5
- data/ext/except.c +2 -0
- data/ext/except.h +25 -23
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +10 -8
- data/ext/ferret.h +9 -8
- data/ext/field.c +29 -25
- data/ext/filter.c +52 -14
- data/ext/frtio.h +13 -0
- data/ext/fs_store.c +115 -170
- data/ext/global.c +9 -8
- data/ext/global.h +17 -13
- data/ext/hash.c +13 -19
- data/ext/hash.h +11 -11
- data/ext/hashset.c +5 -7
- data/ext/hashset.h +9 -8
- data/ext/helper.c +1 -1
- data/ext/helper.h +2 -1
- data/ext/inc/except.h +25 -23
- data/ext/inc/lang.h +11 -1
- data/ext/ind.c +33 -21
- data/ext/index.h +44 -39
- data/ext/index_io.c +61 -57
- data/ext/index_rw.c +418 -361
- data/ext/lang.c +10 -0
- data/ext/lang.h +11 -1
- data/ext/nix_io.c +135 -0
- data/ext/priorityqueue.c +16 -16
- data/ext/priorityqueue.h +9 -6
- data/ext/q_boolean.c +128 -76
- data/ext/q_const_score.c +20 -20
- data/ext/q_filtered_query.c +20 -20
- data/ext/q_fuzzy.c +37 -23
- data/ext/q_match_all.c +15 -19
- data/ext/q_multi_phrase.c +87 -46
- data/ext/q_parser.c +247 -119
- data/ext/q_phrase.c +86 -52
- data/ext/q_prefix.c +25 -14
- data/ext/q_range.c +59 -14
- data/ext/q_span.c +263 -172
- data/ext/q_term.c +62 -51
- data/ext/q_wildcard.c +24 -13
- data/ext/r_analysis.c +328 -80
- data/ext/r_doc.c +11 -6
- data/ext/r_index_io.c +40 -32
- data/ext/r_qparser.c +15 -14
- data/ext/r_search.c +270 -152
- data/ext/r_store.c +32 -17
- data/ext/ram_store.c +38 -22
- data/ext/search.c +617 -87
- data/ext/search.h +227 -163
- data/ext/similarity.c +54 -45
- data/ext/similarity.h +3 -3
- data/ext/sort.c +132 -53
- data/ext/store.c +21 -2
- data/ext/store.h +14 -14
- data/ext/tags +4322 -232
- data/ext/term.c +140 -109
- data/ext/termdocs.c +74 -60
- data/ext/vector.c +181 -152
- data/ext/w32_io.c +150 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
- data/lib/ferret/document/field.rb +1 -1
- data/lib/ferret/index/field_infos.rb +1 -1
- data/lib/ferret/index/term.rb +1 -1
- data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
- data/lib/ferret/search.rb +1 -0
- data/lib/ferret/search/boolean_query.rb +0 -4
- data/lib/ferret/search/index_searcher.rb +21 -8
- data/lib/ferret/search/multi_phrase_query.rb +7 -0
- data/lib/ferret/search/multi_searcher.rb +261 -0
- data/lib/ferret/search/phrase_query.rb +1 -1
- data/lib/ferret/search/query.rb +34 -5
- data/lib/ferret/search/sort.rb +7 -3
- data/lib/ferret/search/sort_field.rb +8 -4
- data/lib/ferret/store/fs_store.rb +13 -6
- data/lib/ferret/store/index_io.rb +0 -14
- data/lib/ferret/store/ram_store.rb +3 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/analysis/ctc_analyzer.rb +131 -0
- data/test/unit/analysis/ctc_tokenstream.rb +98 -9
- data/test/unit/index/tc_index.rb +40 -1
- data/test/unit/index/tc_term.rb +7 -0
- data/test/unit/index/th_doc.rb +8 -0
- data/test/unit/query_parser/tc_query_parser.rb +6 -4
- data/test/unit/search/rtc_sort_field.rb +6 -6
- data/test/unit/search/tc_index_searcher.rb +8 -0
- data/test/unit/search/tc_multi_searcher.rb +275 -0
- data/test/unit/search/tc_multi_searcher2.rb +126 -0
- data/test/unit/search/tc_search_and_sort.rb +66 -0
- metadata +31 -26
- data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/q_phrase.c
CHANGED
@@ -14,9 +14,13 @@ Scorer *phw_scorer(Weight *self, IndexReader *ir)
|
|
14
14
|
Scorer *phsc;
|
15
15
|
PhraseQuery *phq = (PhraseQuery *)self->query->data;
|
16
16
|
int i;
|
17
|
-
|
17
|
+
TermDocEnum **tps;
|
18
|
+
|
19
|
+
if (phq->t_cnt == 0) {
|
20
|
+
return NULL; /* optimize zero-term case */
|
21
|
+
}
|
18
22
|
|
19
|
-
|
23
|
+
tps = ALLOC_N(TermDocEnum *, phq->t_cnt);
|
20
24
|
|
21
25
|
for (i = 0; i < phq->t_cnt; i++) {
|
22
26
|
tps[i] = ir_term_positions_for(ir, phq->terms[i]);
|
@@ -47,6 +51,17 @@ Scorer *phw_scorer(Weight *self, IndexReader *ir)
|
|
47
51
|
|
48
52
|
Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
|
49
53
|
{
|
54
|
+
Explanation *idf_expl1;
|
55
|
+
Explanation *idf_expl2;
|
56
|
+
Explanation *query_expl;
|
57
|
+
Explanation *qnorm_expl;
|
58
|
+
Explanation *field_expl;
|
59
|
+
Explanation *tf_expl;
|
60
|
+
Scorer *scorer;
|
61
|
+
uchar *field_norms;
|
62
|
+
float field_norm;
|
63
|
+
Explanation *field_norm_expl;
|
64
|
+
|
50
65
|
char *query_str = self->query->to_s(self->query, "");
|
51
66
|
PhraseQuery *phq = (PhraseQuery *)self->query->data;
|
52
67
|
int i;
|
@@ -57,25 +72,25 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
57
72
|
strfmt("weight(%s in %d), product of:", query_str, doc_num));
|
58
73
|
|
59
74
|
for (i = 0; i < phq->t_cnt; i++) {
|
60
|
-
len += strlen(phq->terms[i]->text) + 30;
|
75
|
+
len += (int)strlen(phq->terms[i]->text) + 30;
|
61
76
|
}
|
62
77
|
doc_freqs = ALLOC_N(char, len);
|
63
78
|
for (i = 0; i < phq->t_cnt; i++) {
|
64
79
|
Term *term = phq->terms[i];
|
65
80
|
sprintf(doc_freqs + pos, "%s=%d, ", term->text, ir->doc_freq(ir, term));
|
66
|
-
pos += strlen(doc_freqs + pos);
|
81
|
+
pos += (int)strlen(doc_freqs + pos);
|
67
82
|
}
|
68
83
|
pos -= 2; // remove ", " from the end
|
69
84
|
doc_freqs[pos] = 0;
|
70
85
|
|
71
|
-
|
86
|
+
idf_expl1 = expl_create(self->idf,
|
72
87
|
strfmt("idf(%s:<%s>)", phq->field, doc_freqs));
|
73
|
-
|
88
|
+
idf_expl2 = expl_create(self->idf,
|
74
89
|
strfmt("idf(%s:<%s>)", phq->field, doc_freqs));
|
75
90
|
free(doc_freqs);
|
76
91
|
|
77
|
-
|
78
|
-
|
92
|
+
/* explain query weight */
|
93
|
+
query_expl = expl_create(0.0,
|
79
94
|
strfmt("query_weight(%s), product of:", query_str));
|
80
95
|
|
81
96
|
if (self->query->boost != 1.0) {
|
@@ -83,36 +98,36 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
83
98
|
}
|
84
99
|
expl_add_detail(query_expl, idf_expl1);
|
85
100
|
|
86
|
-
|
101
|
+
qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
|
87
102
|
expl_add_detail(query_expl, qnorm_expl);
|
88
103
|
|
89
104
|
query_expl->value = self->query->boost * self->idf * self->qnorm;
|
90
105
|
|
91
106
|
expl_add_detail(expl, query_expl);
|
92
107
|
|
93
|
-
|
94
|
-
|
108
|
+
/* explain field weight */
|
109
|
+
field_expl = expl_create(0.0,
|
95
110
|
strfmt("field_weight(%s in %d), product of:", query_str, doc_num));
|
96
111
|
free(query_str);
|
97
112
|
|
98
|
-
|
99
|
-
|
113
|
+
scorer = self->scorer(self, ir);
|
114
|
+
tf_expl = scorer->explain(scorer, doc_num);
|
100
115
|
scorer->destroy(scorer);
|
101
116
|
expl_add_detail(field_expl, tf_expl);
|
102
117
|
expl_add_detail(field_expl, idf_expl2);
|
103
118
|
|
104
|
-
|
105
|
-
|
119
|
+
field_norms = ir->get_norms(ir, phq->field);
|
120
|
+
field_norm = (field_norms != NULL)
|
106
121
|
? sim_decode_norm(self->similarity, field_norms[doc_num])
|
107
|
-
: 0.0;
|
108
|
-
|
122
|
+
: (float)0.0;
|
123
|
+
field_norm_expl = expl_create(field_norm,
|
109
124
|
strfmt("field_norm(field=%s, doc=%d)", phq->field, doc_num));
|
110
125
|
|
111
126
|
expl_add_detail(field_expl, field_norm_expl);
|
112
127
|
|
113
128
|
field_expl->value = tf_expl->value * self->idf * field_norm;
|
114
129
|
|
115
|
-
|
130
|
+
/* combine them */
|
116
131
|
if (query_expl->value == 1.0) {
|
117
132
|
expl_destoy(expl);
|
118
133
|
return field_expl;
|
@@ -130,20 +145,15 @@ char *phw_to_s(Weight *self)
|
|
130
145
|
|
131
146
|
Weight *phw_create(Query *query, Searcher *searcher)
|
132
147
|
{
|
148
|
+
Weight *self = w_create(query);
|
133
149
|
PhraseQuery *phq = (PhraseQuery *)query->data;
|
134
|
-
|
135
|
-
ZEROSET(self, Weight, 1);
|
136
|
-
self->get_query = &w_get_query;
|
137
|
-
self->get_value = &w_get_value;
|
138
|
-
self->normalize = &w_normalize;
|
150
|
+
|
139
151
|
self->scorer = &phw_scorer;
|
140
152
|
self->explain = &phw_explain;
|
141
153
|
self->to_s = &phw_to_s;
|
142
|
-
self->destroy = &free;
|
143
154
|
self->sum_of_squared_weights = &w_sum_of_squared_weights;
|
144
155
|
|
145
156
|
self->similarity = query->get_similarity(query, searcher);
|
146
|
-
self->query = query;
|
147
157
|
self->value = query->boost;
|
148
158
|
self->idf = sim_idf_phrase(self->similarity, phq->terms, phq->t_cnt, searcher);
|
149
159
|
|
@@ -158,12 +168,12 @@ Weight *phw_create(Query *query, Searcher *searcher)
|
|
158
168
|
|
159
169
|
#define GET_PHQ PhraseQuery *phq = (PhraseQuery *)self->data
|
160
170
|
|
161
|
-
void phq_extract_terms(Query *self,
|
171
|
+
void phq_extract_terms(Query *self, HashSet *terms)
|
162
172
|
{
|
163
173
|
GET_PHQ;
|
164
174
|
int i;
|
165
175
|
for (i = 0; i < phq->t_cnt; i++) {
|
166
|
-
|
176
|
+
hs_add(terms, term_clone(phq->terms[i]));
|
167
177
|
}
|
168
178
|
}
|
169
179
|
|
@@ -173,9 +183,9 @@ char *phq_to_s(Query *self, char *field)
|
|
173
183
|
int i, j, buf_index = 0, len = 0, pos, last_pos = -1;
|
174
184
|
char *buffer;
|
175
185
|
if (!phq->t_cnt) return NULL;
|
176
|
-
len = strlen(phq->field) + 1;
|
186
|
+
len = (int)strlen(phq->field) + 1;
|
177
187
|
for (i = 0; i < phq->t_cnt; i++) {
|
178
|
-
len += strlen(phq->terms[i]->text) + 1;
|
188
|
+
len += (int)strlen(phq->terms[i]->text) + 1;
|
179
189
|
}
|
180
190
|
// add space for extra characters and boost and slop
|
181
191
|
len += 100 + 3 * phq->positions[phq->t_cnt - 1];
|
@@ -183,7 +193,7 @@ char *phq_to_s(Query *self, char *field)
|
|
183
193
|
buffer = ALLOC_N(char, len);
|
184
194
|
|
185
195
|
if (strcmp(field, phq->field) != 0) {
|
186
|
-
len = strlen(phq->field);
|
196
|
+
len = (int)strlen(phq->field);
|
187
197
|
memcpy(buffer, phq->field, len);
|
188
198
|
buffer[len] = ':';
|
189
199
|
buf_index += len + 1;
|
@@ -199,7 +209,7 @@ char *phq_to_s(Query *self, char *field)
|
|
199
209
|
}
|
200
210
|
last_pos = pos;
|
201
211
|
|
202
|
-
len = strlen(term->text);
|
212
|
+
len = (int)strlen(term->text);
|
203
213
|
memcpy(buffer + buf_index, term->text, len);
|
204
214
|
buf_index += len;
|
205
215
|
buffer[buf_index++] = ' ';
|
@@ -209,7 +219,7 @@ char *phq_to_s(Query *self, char *field)
|
|
209
219
|
buffer[buf_index] = 0;
|
210
220
|
if (phq->slop != 0) {
|
211
221
|
sprintf(buffer + buf_index, "~%d", phq->slop);
|
212
|
-
buf_index += strlen(buffer + buf_index);
|
222
|
+
buf_index += (int)strlen(buffer + buf_index);
|
213
223
|
}
|
214
224
|
if (self->boost != 1.0) {
|
215
225
|
buffer[buf_index++] = '^';
|
@@ -218,10 +228,8 @@ char *phq_to_s(Query *self, char *field)
|
|
218
228
|
return buffer;
|
219
229
|
}
|
220
230
|
|
221
|
-
void phq_destroy(
|
231
|
+
void phq_destroy(Query *self)
|
222
232
|
{
|
223
|
-
Query *self = (Query *)p;
|
224
|
-
|
225
233
|
GET_PHQ;
|
226
234
|
int i;
|
227
235
|
if (self->destroy_all) {
|
@@ -233,7 +241,7 @@ void phq_destroy(void *p)
|
|
233
241
|
free(phq->positions);
|
234
242
|
free(phq);
|
235
243
|
|
236
|
-
|
244
|
+
q_destroy_i(self);
|
237
245
|
}
|
238
246
|
|
239
247
|
Query *phq_rewrite(Query *self, IndexReader *ir)
|
@@ -243,9 +251,9 @@ Query *phq_rewrite(Query *self, IndexReader *ir)
|
|
243
251
|
Term *term = phq->terms[0];
|
244
252
|
Query *tq = tq_create(term_clone(term));
|
245
253
|
tq->boost = self->boost;
|
246
|
-
|
247
|
-
return self->rewritten = tq;
|
254
|
+
return tq;
|
248
255
|
} else {
|
256
|
+
self->ref_cnt++;
|
249
257
|
return self;
|
250
258
|
}
|
251
259
|
}
|
@@ -273,22 +281,48 @@ void phq_add_term(Query *self, Term *term, int pos_inc)
|
|
273
281
|
phq->t_cnt++;
|
274
282
|
}
|
275
283
|
|
284
|
+
static uint phq_hash(Query *self)
|
285
|
+
{
|
286
|
+
int i;
|
287
|
+
uint hash = 0;
|
288
|
+
PhraseQuery *phq = (PhraseQuery *)self->data;
|
289
|
+
for (i = 0; i < phq->t_cnt; i++) {
|
290
|
+
hash = (hash << 1) ^ (term_hash(phq->terms[i]) ^ phq->positions[i]);
|
291
|
+
}
|
292
|
+
return (hash ^ phq->slop);
|
293
|
+
}
|
294
|
+
|
295
|
+
static int phq_eq(Query *self, Query *o)
|
296
|
+
{
|
297
|
+
int i;
|
298
|
+
PhraseQuery *phq1 = (PhraseQuery *)self->data;
|
299
|
+
PhraseQuery *phq2 = (PhraseQuery *)o->data;
|
300
|
+
if (phq1->slop != phq2->slop) return false;
|
301
|
+
for (i = 0; i < phq1->t_cnt; i++) {
|
302
|
+
if (!term_eq(phq1->terms[i], phq2->terms[i]) ||
|
303
|
+
(phq1->positions[i] != phq2->positions[i])) return false;
|
304
|
+
}
|
305
|
+
return true;
|
306
|
+
}
|
307
|
+
|
276
308
|
Query *phq_create()
|
277
309
|
{
|
278
310
|
Query *self = q_create();
|
279
|
-
PhraseQuery *phq =
|
280
|
-
|
311
|
+
PhraseQuery *phq = ALLOC_AND_ZERO_N(PhraseQuery, 1);
|
312
|
+
|
281
313
|
phq->t_capa = PHQ_INIT_CAPA;
|
282
314
|
phq->terms = ALLOC_N(Term *, PHQ_INIT_CAPA);
|
283
315
|
phq->positions = ALLOC_N(int, PHQ_INIT_CAPA);
|
284
316
|
self->data = phq;
|
285
317
|
|
286
|
-
self->
|
318
|
+
self->type = PHRASE_QUERY;
|
319
|
+
self->rewrite = &phq_rewrite;
|
287
320
|
self->extract_terms = &phq_extract_terms;
|
288
321
|
self->to_s = &phq_to_s;
|
289
|
-
self->
|
290
|
-
self->
|
291
|
-
self->
|
322
|
+
self->hash = &phq_hash;
|
323
|
+
self->eq = &phq_eq;
|
324
|
+
self->destroy_i = &phq_destroy;
|
325
|
+
self->create_weight_i = &phw_create;
|
292
326
|
return self;
|
293
327
|
}
|
294
328
|
|
@@ -376,9 +410,8 @@ bool pp_less_than(void *p1, void *p2)
|
|
376
410
|
}
|
377
411
|
}
|
378
412
|
|
379
|
-
void pp_destroy(
|
413
|
+
void pp_destroy(PhrasePosition *pp)
|
380
414
|
{
|
381
|
-
PhrasePosition *pp = (PhrasePosition *)p;
|
382
415
|
if (pp->tpe) pp->tpe->close(pp->tpe);
|
383
416
|
free(pp);
|
384
417
|
}
|
@@ -396,7 +429,7 @@ PhrasePosition *pp_create(TermDocEnum *tpe, int offset)
|
|
396
429
|
* PhraseScorer
|
397
430
|
***************************************************************************/
|
398
431
|
|
399
|
-
#define GET_PHSC PhraseScorer *phsc = (PhraseScorer *)self->data
|
432
|
+
#define GET_PHSC PhraseScorer *phsc = (PhraseScorer *)self->data
|
400
433
|
|
401
434
|
|
402
435
|
void phsc_init(PhraseScorer *phsc)
|
@@ -484,27 +517,28 @@ bool phsc_skip_to(Scorer *self, int doc_num)
|
|
484
517
|
return phsc_do_next(self);
|
485
518
|
}
|
486
519
|
|
487
|
-
Explanation *phsc_explain(Scorer *self, int doc_num)
|
520
|
+
static Explanation *phsc_explain(Scorer *self, int doc_num)
|
488
521
|
{
|
489
522
|
GET_PHSC;
|
523
|
+
float phrase_freq;
|
524
|
+
|
490
525
|
while (phsc_next(self) && self->doc < doc_num)
|
491
526
|
;
|
492
527
|
|
493
|
-
|
528
|
+
phrase_freq = (self->doc == doc_num) ? phsc->freq : (float)0.0;
|
494
529
|
return expl_create(sim_tf(self->similarity, phrase_freq),
|
495
530
|
strfmt("tf(phrase_freq=%f)", phrase_freq));
|
496
531
|
}
|
497
532
|
|
498
|
-
void phsc_destroy(
|
533
|
+
static void phsc_destroy(Scorer *self)
|
499
534
|
{
|
500
|
-
Scorer *self = (Scorer *)p;
|
501
535
|
GET_PHSC;
|
502
536
|
int i;
|
503
537
|
for (i = phsc->pp_cnt - 1; i >= 0; i--) {
|
504
538
|
pp_destroy(phsc->phrase_pos[i]);
|
505
539
|
}
|
506
540
|
free(phsc->phrase_pos);
|
507
|
-
|
541
|
+
scorer_destroy_i(self);
|
508
542
|
}
|
509
543
|
|
510
544
|
Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
|
data/ext/q_prefix.c
CHANGED
@@ -11,8 +11,8 @@ char *prq_to_s(Query *self, char *field)
|
|
11
11
|
{
|
12
12
|
char *buffer, *bptr;
|
13
13
|
Term *term = (Term *)self->data;
|
14
|
-
|
15
|
-
|
14
|
+
size_t tlen = strlen(term->text);
|
15
|
+
size_t flen = strlen(term->field);
|
16
16
|
bptr = buffer = ALLOC_N(char, tlen + flen + 35);
|
17
17
|
|
18
18
|
if (strcmp(term->field, field) != 0) {
|
@@ -33,7 +33,7 @@ Query *prq_rewrite(Query *self, IndexReader *ir)
|
|
33
33
|
Term *prefix = (Term *)self->data;
|
34
34
|
TermEnum *te = ir->terms_from(ir, prefix);
|
35
35
|
char *prefix_text = prefix->text;
|
36
|
-
|
36
|
+
size_t prefix_length = strlen(prefix_text);
|
37
37
|
char *prefix_field = prefix->field;
|
38
38
|
Query *tq;
|
39
39
|
Query *bq = bq_create(true);
|
@@ -45,34 +45,45 @@ Query *prq_rewrite(Query *self, IndexReader *ir)
|
|
45
45
|
strncmp(tb->text, prefix_text, prefix_length) != 0) {
|
46
46
|
break;
|
47
47
|
}
|
48
|
-
tq = tq_create(term_create(tb->field, tb->text));
|
49
|
-
tq->boost = self->boost;
|
50
|
-
bq_add_query(bq, tq, BC_SHOULD);
|
48
|
+
tq = tq_create(term_create(tb->field, tb->text)); /* found a match */
|
49
|
+
tq->boost = self->boost; /* set the boost */
|
50
|
+
bq_add_query(bq, tq, BC_SHOULD); /* add to query */
|
51
51
|
} while (te->next(te));
|
52
52
|
XFINALLY
|
53
53
|
te->close(te);
|
54
54
|
XENDTRY
|
55
55
|
|
56
|
-
|
57
|
-
return self->rewritten = bq;
|
56
|
+
return bq;
|
58
57
|
}
|
59
58
|
|
60
|
-
void prq_destroy(
|
59
|
+
static void prq_destroy(Query *self)
|
61
60
|
{
|
62
|
-
Query *self = (Query *)p;
|
63
61
|
if (self->destroy_all) term_destroy((Term *)self->data);
|
64
|
-
|
62
|
+
q_destroy_i(self);
|
63
|
+
}
|
64
|
+
|
65
|
+
static uint prq_hash(Query *self)
|
66
|
+
{
|
67
|
+
return term_hash((Term *)self->data);
|
68
|
+
}
|
69
|
+
|
70
|
+
static int prq_eq(Query *self, Query *o)
|
71
|
+
{
|
72
|
+
return term_eq((Term *)self->data, (Term *)o->data);
|
65
73
|
}
|
66
74
|
|
67
75
|
Query *prefixq_create(Term *prefix)
|
68
76
|
{
|
69
77
|
Query *self = q_create();
|
70
78
|
self->data = prefix;
|
79
|
+
|
71
80
|
self->type = PREFIX_QUERY;
|
72
|
-
self->create_weight = NULL;
|
73
|
-
self->to_s = &prq_to_s;
|
74
81
|
self->rewrite = &prq_rewrite;
|
75
|
-
self->
|
82
|
+
self->to_s = &prq_to_s;
|
83
|
+
self->hash = &prq_hash;
|
84
|
+
self->eq = &prq_eq;
|
85
|
+
self->destroy_i = &prq_destroy;
|
86
|
+
self->create_weight_i = &q_create_weight_unsup;
|
76
87
|
|
77
88
|
return self;
|
78
89
|
}
|
data/ext/q_range.c
CHANGED
@@ -15,7 +15,7 @@ static char * const BOUND_ORDER_ERROR_MSG = "The lower bound must less than the
|
|
15
15
|
char *range_to_s(Range *range, char *field, float boost)
|
16
16
|
{
|
17
17
|
char *buffer, *b;
|
18
|
-
|
18
|
+
size_t flen, llen, ulen;
|
19
19
|
|
20
20
|
flen = strlen(range->field);
|
21
21
|
llen = range->lower_term ? strlen(range->lower_term) : 0;
|
@@ -71,6 +71,28 @@ void range_destroy(void *p)
|
|
71
71
|
free(range);
|
72
72
|
}
|
73
73
|
|
74
|
+
static inline uint range_hash(Range *self)
|
75
|
+
{
|
76
|
+
return self->include_lower | (self->include_upper << 1) |
|
77
|
+
((str_hash(self->field) ^
|
78
|
+
(self->lower_term ? str_hash(self->lower_term) : 0) ^
|
79
|
+
(self->upper_term ? str_hash(self->upper_term) : 0)) << 2);
|
80
|
+
}
|
81
|
+
|
82
|
+
static inline int str_eq(char *s1, char *s2)
|
83
|
+
{
|
84
|
+
return (s1 && s2 && (strcmp(s1, s2) == 0)) || (s1 == s2);
|
85
|
+
}
|
86
|
+
|
87
|
+
static inline int range_eq(Range *self, Range *o)
|
88
|
+
{
|
89
|
+
return (str_eq(self->field, o->field) &&
|
90
|
+
str_eq(self->lower_term, o->lower_term) &&
|
91
|
+
str_eq(self->upper_term, o->upper_term) &&
|
92
|
+
(self->include_lower == o->include_lower) &&
|
93
|
+
(self->include_upper == o->include_upper));
|
94
|
+
}
|
95
|
+
|
74
96
|
Range *range_create(const char *field, char *lower_term, char *upper_term,
|
75
97
|
bool include_lower, bool include_upper)
|
76
98
|
{
|
@@ -101,9 +123,8 @@ Range *range_create(const char *field, char *lower_term, char *upper_term,
|
|
101
123
|
*
|
102
124
|
***************************************************************************/
|
103
125
|
|
104
|
-
void rfilt_destroy(
|
126
|
+
void rfilt_destroy(Filter *self)
|
105
127
|
{
|
106
|
-
Filter *self = (Filter *)p;
|
107
128
|
range_destroy(self->data);
|
108
129
|
filt_destroy(self);
|
109
130
|
}
|
@@ -111,7 +132,10 @@ void rfilt_destroy(void *p)
|
|
111
132
|
char *rfilt_to_s(Filter *self)
|
112
133
|
{
|
113
134
|
Range *range = (Range *)self->data;
|
114
|
-
|
135
|
+
char *rstr = range_to_s(range, "", 1.0);
|
136
|
+
char *rfstr = epstrdup("RangeFilter< %s >", strlen(rstr), rstr);
|
137
|
+
free(rstr);
|
138
|
+
return rfstr;
|
115
139
|
}
|
116
140
|
|
117
141
|
BitVector *rfilt_get_bv(Filter *self, IndexReader *ir)
|
@@ -177,6 +201,16 @@ BitVector *rfilt_get_bv(Filter *self, IndexReader *ir)
|
|
177
201
|
return bv;
|
178
202
|
}
|
179
203
|
|
204
|
+
uint rfilt_hash(Filter *self)
|
205
|
+
{
|
206
|
+
return range_hash((Range *)self->data);
|
207
|
+
}
|
208
|
+
|
209
|
+
int rfilt_eq(Filter *self, Filter *o)
|
210
|
+
{
|
211
|
+
return range_eq((Range *)self->data, (Range *)o->data);
|
212
|
+
}
|
213
|
+
|
180
214
|
Filter *rfilt_create(const char *field, char *lower_term, char *upper_term,
|
181
215
|
bool include_lower, bool include_upper)
|
182
216
|
{
|
@@ -187,6 +221,8 @@ Filter *rfilt_create(const char *field, char *lower_term, char *upper_term,
|
|
187
221
|
self = filt_create("RangeFilter");
|
188
222
|
self->data = range;
|
189
223
|
self->get_bv = &rfilt_get_bv;
|
224
|
+
self->hash = &rfilt_hash;
|
225
|
+
self->eq = &rfilt_eq;
|
190
226
|
self->to_s = &rfilt_to_s;
|
191
227
|
self->destroy = &rfilt_destroy;
|
192
228
|
return self;
|
@@ -204,11 +240,10 @@ char *rq_to_s(Query *self, char *field)
|
|
204
240
|
return range_to_s(range, field, self->boost);
|
205
241
|
}
|
206
242
|
|
207
|
-
void rq_destroy(
|
243
|
+
void rq_destroy(Query *self)
|
208
244
|
{
|
209
|
-
Query *self = (Query *)p;
|
210
245
|
range_destroy(self->data);
|
211
|
-
|
246
|
+
q_destroy_i(self);
|
212
247
|
}
|
213
248
|
|
214
249
|
Query *rq_rewrite(Query *self, IndexReader *ir)
|
@@ -216,8 +251,17 @@ Query *rq_rewrite(Query *self, IndexReader *ir)
|
|
216
251
|
Range *r = (Range *)self->data;
|
217
252
|
Filter *filter = rfilt_create(r->field, r->lower_term, r->upper_term,
|
218
253
|
r->include_lower, r->include_upper);
|
219
|
-
|
220
|
-
|
254
|
+
return csq_create(filter);
|
255
|
+
}
|
256
|
+
|
257
|
+
static uint rq_hash(Query *self)
|
258
|
+
{
|
259
|
+
return range_hash((Range *)self->data);
|
260
|
+
}
|
261
|
+
|
262
|
+
static int rq_eq(Query *self, Query *o)
|
263
|
+
{
|
264
|
+
return range_eq((Range *)self->data, (Range *)o->data);
|
221
265
|
}
|
222
266
|
|
223
267
|
Query *rq_create_less(const char *field, char *upper_term, bool include_upper)
|
@@ -233,17 +277,18 @@ Query *rq_create_more(const char *field, char *lower_term, bool include_lower)
|
|
233
277
|
Query *rq_create(const char *field, char *lower_term, char *upper_term,
|
234
278
|
bool include_lower, bool include_upper)
|
235
279
|
{
|
236
|
-
Query *self;
|
280
|
+
Query *self = q_create();
|
237
281
|
Range *range = range_create(field, lower_term, upper_term,
|
238
282
|
include_lower, include_upper);
|
239
283
|
|
240
|
-
self =
|
284
|
+
self->data = range;
|
241
285
|
|
242
286
|
self->type = RANGE_QUERY;
|
243
|
-
self->data = range;
|
244
|
-
self->create_weight = NULL;
|
245
287
|
self->rewrite = &rq_rewrite;
|
246
288
|
self->to_s = &rq_to_s;
|
247
|
-
self->
|
289
|
+
self->hash = &rq_hash;
|
290
|
+
self->eq = &rq_eq;
|
291
|
+
self->destroy_i = &rq_destroy;
|
292
|
+
self->create_weight_i = &q_create_weight_unsup;
|
248
293
|
return self;
|
249
294
|
}
|