ferret 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +6 -5
- data/Rakefile +34 -13
- data/TODO +1 -0
- data/TUTORIAL +1 -1
- data/ext/analysis.c +87 -70
- data/ext/analysis.h +18 -6
- data/ext/array.c +1 -2
- data/ext/array.h +1 -1
- data/ext/bitvector.c +10 -6
- data/ext/bitvector.h +2 -2
- data/ext/compound_io.c +30 -27
- data/ext/document.c +15 -15
- data/ext/document.h +5 -5
- data/ext/except.c +2 -0
- data/ext/except.h +25 -23
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +10 -8
- data/ext/ferret.h +9 -8
- data/ext/field.c +29 -25
- data/ext/filter.c +52 -14
- data/ext/frtio.h +13 -0
- data/ext/fs_store.c +115 -170
- data/ext/global.c +9 -8
- data/ext/global.h +17 -13
- data/ext/hash.c +13 -19
- data/ext/hash.h +11 -11
- data/ext/hashset.c +5 -7
- data/ext/hashset.h +9 -8
- data/ext/helper.c +1 -1
- data/ext/helper.h +2 -1
- data/ext/inc/except.h +25 -23
- data/ext/inc/lang.h +11 -1
- data/ext/ind.c +33 -21
- data/ext/index.h +44 -39
- data/ext/index_io.c +61 -57
- data/ext/index_rw.c +418 -361
- data/ext/lang.c +10 -0
- data/ext/lang.h +11 -1
- data/ext/nix_io.c +135 -0
- data/ext/priorityqueue.c +16 -16
- data/ext/priorityqueue.h +9 -6
- data/ext/q_boolean.c +128 -76
- data/ext/q_const_score.c +20 -20
- data/ext/q_filtered_query.c +20 -20
- data/ext/q_fuzzy.c +37 -23
- data/ext/q_match_all.c +15 -19
- data/ext/q_multi_phrase.c +87 -46
- data/ext/q_parser.c +247 -119
- data/ext/q_phrase.c +86 -52
- data/ext/q_prefix.c +25 -14
- data/ext/q_range.c +59 -14
- data/ext/q_span.c +263 -172
- data/ext/q_term.c +62 -51
- data/ext/q_wildcard.c +24 -13
- data/ext/r_analysis.c +328 -80
- data/ext/r_doc.c +11 -6
- data/ext/r_index_io.c +40 -32
- data/ext/r_qparser.c +15 -14
- data/ext/r_search.c +270 -152
- data/ext/r_store.c +32 -17
- data/ext/ram_store.c +38 -22
- data/ext/search.c +617 -87
- data/ext/search.h +227 -163
- data/ext/similarity.c +54 -45
- data/ext/similarity.h +3 -3
- data/ext/sort.c +132 -53
- data/ext/store.c +21 -2
- data/ext/store.h +14 -14
- data/ext/tags +4322 -232
- data/ext/term.c +140 -109
- data/ext/termdocs.c +74 -60
- data/ext/vector.c +181 -152
- data/ext/w32_io.c +150 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
- data/lib/ferret/document/field.rb +1 -1
- data/lib/ferret/index/field_infos.rb +1 -1
- data/lib/ferret/index/term.rb +1 -1
- data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
- data/lib/ferret/search.rb +1 -0
- data/lib/ferret/search/boolean_query.rb +0 -4
- data/lib/ferret/search/index_searcher.rb +21 -8
- data/lib/ferret/search/multi_phrase_query.rb +7 -0
- data/lib/ferret/search/multi_searcher.rb +261 -0
- data/lib/ferret/search/phrase_query.rb +1 -1
- data/lib/ferret/search/query.rb +34 -5
- data/lib/ferret/search/sort.rb +7 -3
- data/lib/ferret/search/sort_field.rb +8 -4
- data/lib/ferret/store/fs_store.rb +13 -6
- data/lib/ferret/store/index_io.rb +0 -14
- data/lib/ferret/store/ram_store.rb +3 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/analysis/ctc_analyzer.rb +131 -0
- data/test/unit/analysis/ctc_tokenstream.rb +98 -9
- data/test/unit/index/tc_index.rb +40 -1
- data/test/unit/index/tc_term.rb +7 -0
- data/test/unit/index/th_doc.rb +8 -0
- data/test/unit/query_parser/tc_query_parser.rb +6 -4
- data/test/unit/search/rtc_sort_field.rb +6 -6
- data/test/unit/search/tc_index_searcher.rb +8 -0
- data/test/unit/search/tc_multi_searcher.rb +275 -0
- data/test/unit/search/tc_multi_searcher2.rb +126 -0
- data/test/unit/search/tc_search_and_sort.rb +66 -0
- metadata +31 -26
- data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/q_phrase.c
CHANGED
@@ -14,9 +14,13 @@ Scorer *phw_scorer(Weight *self, IndexReader *ir)
|
|
14
14
|
Scorer *phsc;
|
15
15
|
PhraseQuery *phq = (PhraseQuery *)self->query->data;
|
16
16
|
int i;
|
17
|
-
|
17
|
+
TermDocEnum **tps;
|
18
|
+
|
19
|
+
if (phq->t_cnt == 0) {
|
20
|
+
return NULL; /* optimize zero-term case */
|
21
|
+
}
|
18
22
|
|
19
|
-
|
23
|
+
tps = ALLOC_N(TermDocEnum *, phq->t_cnt);
|
20
24
|
|
21
25
|
for (i = 0; i < phq->t_cnt; i++) {
|
22
26
|
tps[i] = ir_term_positions_for(ir, phq->terms[i]);
|
@@ -47,6 +51,17 @@ Scorer *phw_scorer(Weight *self, IndexReader *ir)
|
|
47
51
|
|
48
52
|
Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
|
49
53
|
{
|
54
|
+
Explanation *idf_expl1;
|
55
|
+
Explanation *idf_expl2;
|
56
|
+
Explanation *query_expl;
|
57
|
+
Explanation *qnorm_expl;
|
58
|
+
Explanation *field_expl;
|
59
|
+
Explanation *tf_expl;
|
60
|
+
Scorer *scorer;
|
61
|
+
uchar *field_norms;
|
62
|
+
float field_norm;
|
63
|
+
Explanation *field_norm_expl;
|
64
|
+
|
50
65
|
char *query_str = self->query->to_s(self->query, "");
|
51
66
|
PhraseQuery *phq = (PhraseQuery *)self->query->data;
|
52
67
|
int i;
|
@@ -57,25 +72,25 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
57
72
|
strfmt("weight(%s in %d), product of:", query_str, doc_num));
|
58
73
|
|
59
74
|
for (i = 0; i < phq->t_cnt; i++) {
|
60
|
-
len += strlen(phq->terms[i]->text) + 30;
|
75
|
+
len += (int)strlen(phq->terms[i]->text) + 30;
|
61
76
|
}
|
62
77
|
doc_freqs = ALLOC_N(char, len);
|
63
78
|
for (i = 0; i < phq->t_cnt; i++) {
|
64
79
|
Term *term = phq->terms[i];
|
65
80
|
sprintf(doc_freqs + pos, "%s=%d, ", term->text, ir->doc_freq(ir, term));
|
66
|
-
pos += strlen(doc_freqs + pos);
|
81
|
+
pos += (int)strlen(doc_freqs + pos);
|
67
82
|
}
|
68
83
|
pos -= 2; // remove ", " from the end
|
69
84
|
doc_freqs[pos] = 0;
|
70
85
|
|
71
|
-
|
86
|
+
idf_expl1 = expl_create(self->idf,
|
72
87
|
strfmt("idf(%s:<%s>)", phq->field, doc_freqs));
|
73
|
-
|
88
|
+
idf_expl2 = expl_create(self->idf,
|
74
89
|
strfmt("idf(%s:<%s>)", phq->field, doc_freqs));
|
75
90
|
free(doc_freqs);
|
76
91
|
|
77
|
-
|
78
|
-
|
92
|
+
/* explain query weight */
|
93
|
+
query_expl = expl_create(0.0,
|
79
94
|
strfmt("query_weight(%s), product of:", query_str));
|
80
95
|
|
81
96
|
if (self->query->boost != 1.0) {
|
@@ -83,36 +98,36 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
83
98
|
}
|
84
99
|
expl_add_detail(query_expl, idf_expl1);
|
85
100
|
|
86
|
-
|
101
|
+
qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
|
87
102
|
expl_add_detail(query_expl, qnorm_expl);
|
88
103
|
|
89
104
|
query_expl->value = self->query->boost * self->idf * self->qnorm;
|
90
105
|
|
91
106
|
expl_add_detail(expl, query_expl);
|
92
107
|
|
93
|
-
|
94
|
-
|
108
|
+
/* explain field weight */
|
109
|
+
field_expl = expl_create(0.0,
|
95
110
|
strfmt("field_weight(%s in %d), product of:", query_str, doc_num));
|
96
111
|
free(query_str);
|
97
112
|
|
98
|
-
|
99
|
-
|
113
|
+
scorer = self->scorer(self, ir);
|
114
|
+
tf_expl = scorer->explain(scorer, doc_num);
|
100
115
|
scorer->destroy(scorer);
|
101
116
|
expl_add_detail(field_expl, tf_expl);
|
102
117
|
expl_add_detail(field_expl, idf_expl2);
|
103
118
|
|
104
|
-
|
105
|
-
|
119
|
+
field_norms = ir->get_norms(ir, phq->field);
|
120
|
+
field_norm = (field_norms != NULL)
|
106
121
|
? sim_decode_norm(self->similarity, field_norms[doc_num])
|
107
|
-
: 0.0;
|
108
|
-
|
122
|
+
: (float)0.0;
|
123
|
+
field_norm_expl = expl_create(field_norm,
|
109
124
|
strfmt("field_norm(field=%s, doc=%d)", phq->field, doc_num));
|
110
125
|
|
111
126
|
expl_add_detail(field_expl, field_norm_expl);
|
112
127
|
|
113
128
|
field_expl->value = tf_expl->value * self->idf * field_norm;
|
114
129
|
|
115
|
-
|
130
|
+
/* combine them */
|
116
131
|
if (query_expl->value == 1.0) {
|
117
132
|
expl_destoy(expl);
|
118
133
|
return field_expl;
|
@@ -130,20 +145,15 @@ char *phw_to_s(Weight *self)
|
|
130
145
|
|
131
146
|
Weight *phw_create(Query *query, Searcher *searcher)
|
132
147
|
{
|
148
|
+
Weight *self = w_create(query);
|
133
149
|
PhraseQuery *phq = (PhraseQuery *)query->data;
|
134
|
-
|
135
|
-
ZEROSET(self, Weight, 1);
|
136
|
-
self->get_query = &w_get_query;
|
137
|
-
self->get_value = &w_get_value;
|
138
|
-
self->normalize = &w_normalize;
|
150
|
+
|
139
151
|
self->scorer = &phw_scorer;
|
140
152
|
self->explain = &phw_explain;
|
141
153
|
self->to_s = &phw_to_s;
|
142
|
-
self->destroy = &free;
|
143
154
|
self->sum_of_squared_weights = &w_sum_of_squared_weights;
|
144
155
|
|
145
156
|
self->similarity = query->get_similarity(query, searcher);
|
146
|
-
self->query = query;
|
147
157
|
self->value = query->boost;
|
148
158
|
self->idf = sim_idf_phrase(self->similarity, phq->terms, phq->t_cnt, searcher);
|
149
159
|
|
@@ -158,12 +168,12 @@ Weight *phw_create(Query *query, Searcher *searcher)
|
|
158
168
|
|
159
169
|
#define GET_PHQ PhraseQuery *phq = (PhraseQuery *)self->data
|
160
170
|
|
161
|
-
void phq_extract_terms(Query *self,
|
171
|
+
void phq_extract_terms(Query *self, HashSet *terms)
|
162
172
|
{
|
163
173
|
GET_PHQ;
|
164
174
|
int i;
|
165
175
|
for (i = 0; i < phq->t_cnt; i++) {
|
166
|
-
|
176
|
+
hs_add(terms, term_clone(phq->terms[i]));
|
167
177
|
}
|
168
178
|
}
|
169
179
|
|
@@ -173,9 +183,9 @@ char *phq_to_s(Query *self, char *field)
|
|
173
183
|
int i, j, buf_index = 0, len = 0, pos, last_pos = -1;
|
174
184
|
char *buffer;
|
175
185
|
if (!phq->t_cnt) return NULL;
|
176
|
-
len = strlen(phq->field) + 1;
|
186
|
+
len = (int)strlen(phq->field) + 1;
|
177
187
|
for (i = 0; i < phq->t_cnt; i++) {
|
178
|
-
len += strlen(phq->terms[i]->text) + 1;
|
188
|
+
len += (int)strlen(phq->terms[i]->text) + 1;
|
179
189
|
}
|
180
190
|
// add space for extra characters and boost and slop
|
181
191
|
len += 100 + 3 * phq->positions[phq->t_cnt - 1];
|
@@ -183,7 +193,7 @@ char *phq_to_s(Query *self, char *field)
|
|
183
193
|
buffer = ALLOC_N(char, len);
|
184
194
|
|
185
195
|
if (strcmp(field, phq->field) != 0) {
|
186
|
-
len = strlen(phq->field);
|
196
|
+
len = (int)strlen(phq->field);
|
187
197
|
memcpy(buffer, phq->field, len);
|
188
198
|
buffer[len] = ':';
|
189
199
|
buf_index += len + 1;
|
@@ -199,7 +209,7 @@ char *phq_to_s(Query *self, char *field)
|
|
199
209
|
}
|
200
210
|
last_pos = pos;
|
201
211
|
|
202
|
-
len = strlen(term->text);
|
212
|
+
len = (int)strlen(term->text);
|
203
213
|
memcpy(buffer + buf_index, term->text, len);
|
204
214
|
buf_index += len;
|
205
215
|
buffer[buf_index++] = ' ';
|
@@ -209,7 +219,7 @@ char *phq_to_s(Query *self, char *field)
|
|
209
219
|
buffer[buf_index] = 0;
|
210
220
|
if (phq->slop != 0) {
|
211
221
|
sprintf(buffer + buf_index, "~%d", phq->slop);
|
212
|
-
buf_index += strlen(buffer + buf_index);
|
222
|
+
buf_index += (int)strlen(buffer + buf_index);
|
213
223
|
}
|
214
224
|
if (self->boost != 1.0) {
|
215
225
|
buffer[buf_index++] = '^';
|
@@ -218,10 +228,8 @@ char *phq_to_s(Query *self, char *field)
|
|
218
228
|
return buffer;
|
219
229
|
}
|
220
230
|
|
221
|
-
void phq_destroy(
|
231
|
+
void phq_destroy(Query *self)
|
222
232
|
{
|
223
|
-
Query *self = (Query *)p;
|
224
|
-
|
225
233
|
GET_PHQ;
|
226
234
|
int i;
|
227
235
|
if (self->destroy_all) {
|
@@ -233,7 +241,7 @@ void phq_destroy(void *p)
|
|
233
241
|
free(phq->positions);
|
234
242
|
free(phq);
|
235
243
|
|
236
|
-
|
244
|
+
q_destroy_i(self);
|
237
245
|
}
|
238
246
|
|
239
247
|
Query *phq_rewrite(Query *self, IndexReader *ir)
|
@@ -243,9 +251,9 @@ Query *phq_rewrite(Query *self, IndexReader *ir)
|
|
243
251
|
Term *term = phq->terms[0];
|
244
252
|
Query *tq = tq_create(term_clone(term));
|
245
253
|
tq->boost = self->boost;
|
246
|
-
|
247
|
-
return self->rewritten = tq;
|
254
|
+
return tq;
|
248
255
|
} else {
|
256
|
+
self->ref_cnt++;
|
249
257
|
return self;
|
250
258
|
}
|
251
259
|
}
|
@@ -273,22 +281,48 @@ void phq_add_term(Query *self, Term *term, int pos_inc)
|
|
273
281
|
phq->t_cnt++;
|
274
282
|
}
|
275
283
|
|
284
|
+
static uint phq_hash(Query *self)
|
285
|
+
{
|
286
|
+
int i;
|
287
|
+
uint hash = 0;
|
288
|
+
PhraseQuery *phq = (PhraseQuery *)self->data;
|
289
|
+
for (i = 0; i < phq->t_cnt; i++) {
|
290
|
+
hash = (hash << 1) ^ (term_hash(phq->terms[i]) ^ phq->positions[i]);
|
291
|
+
}
|
292
|
+
return (hash ^ phq->slop);
|
293
|
+
}
|
294
|
+
|
295
|
+
static int phq_eq(Query *self, Query *o)
|
296
|
+
{
|
297
|
+
int i;
|
298
|
+
PhraseQuery *phq1 = (PhraseQuery *)self->data;
|
299
|
+
PhraseQuery *phq2 = (PhraseQuery *)o->data;
|
300
|
+
if (phq1->slop != phq2->slop) return false;
|
301
|
+
for (i = 0; i < phq1->t_cnt; i++) {
|
302
|
+
if (!term_eq(phq1->terms[i], phq2->terms[i]) ||
|
303
|
+
(phq1->positions[i] != phq2->positions[i])) return false;
|
304
|
+
}
|
305
|
+
return true;
|
306
|
+
}
|
307
|
+
|
276
308
|
Query *phq_create()
|
277
309
|
{
|
278
310
|
Query *self = q_create();
|
279
|
-
PhraseQuery *phq =
|
280
|
-
|
311
|
+
PhraseQuery *phq = ALLOC_AND_ZERO_N(PhraseQuery, 1);
|
312
|
+
|
281
313
|
phq->t_capa = PHQ_INIT_CAPA;
|
282
314
|
phq->terms = ALLOC_N(Term *, PHQ_INIT_CAPA);
|
283
315
|
phq->positions = ALLOC_N(int, PHQ_INIT_CAPA);
|
284
316
|
self->data = phq;
|
285
317
|
|
286
|
-
self->
|
318
|
+
self->type = PHRASE_QUERY;
|
319
|
+
self->rewrite = &phq_rewrite;
|
287
320
|
self->extract_terms = &phq_extract_terms;
|
288
321
|
self->to_s = &phq_to_s;
|
289
|
-
self->
|
290
|
-
self->
|
291
|
-
self->
|
322
|
+
self->hash = &phq_hash;
|
323
|
+
self->eq = &phq_eq;
|
324
|
+
self->destroy_i = &phq_destroy;
|
325
|
+
self->create_weight_i = &phw_create;
|
292
326
|
return self;
|
293
327
|
}
|
294
328
|
|
@@ -376,9 +410,8 @@ bool pp_less_than(void *p1, void *p2)
|
|
376
410
|
}
|
377
411
|
}
|
378
412
|
|
379
|
-
void pp_destroy(
|
413
|
+
void pp_destroy(PhrasePosition *pp)
|
380
414
|
{
|
381
|
-
PhrasePosition *pp = (PhrasePosition *)p;
|
382
415
|
if (pp->tpe) pp->tpe->close(pp->tpe);
|
383
416
|
free(pp);
|
384
417
|
}
|
@@ -396,7 +429,7 @@ PhrasePosition *pp_create(TermDocEnum *tpe, int offset)
|
|
396
429
|
* PhraseScorer
|
397
430
|
***************************************************************************/
|
398
431
|
|
399
|
-
#define GET_PHSC PhraseScorer *phsc = (PhraseScorer *)self->data
|
432
|
+
#define GET_PHSC PhraseScorer *phsc = (PhraseScorer *)self->data
|
400
433
|
|
401
434
|
|
402
435
|
void phsc_init(PhraseScorer *phsc)
|
@@ -484,27 +517,28 @@ bool phsc_skip_to(Scorer *self, int doc_num)
|
|
484
517
|
return phsc_do_next(self);
|
485
518
|
}
|
486
519
|
|
487
|
-
Explanation *phsc_explain(Scorer *self, int doc_num)
|
520
|
+
static Explanation *phsc_explain(Scorer *self, int doc_num)
|
488
521
|
{
|
489
522
|
GET_PHSC;
|
523
|
+
float phrase_freq;
|
524
|
+
|
490
525
|
while (phsc_next(self) && self->doc < doc_num)
|
491
526
|
;
|
492
527
|
|
493
|
-
|
528
|
+
phrase_freq = (self->doc == doc_num) ? phsc->freq : (float)0.0;
|
494
529
|
return expl_create(sim_tf(self->similarity, phrase_freq),
|
495
530
|
strfmt("tf(phrase_freq=%f)", phrase_freq));
|
496
531
|
}
|
497
532
|
|
498
|
-
void phsc_destroy(
|
533
|
+
static void phsc_destroy(Scorer *self)
|
499
534
|
{
|
500
|
-
Scorer *self = (Scorer *)p;
|
501
535
|
GET_PHSC;
|
502
536
|
int i;
|
503
537
|
for (i = phsc->pp_cnt - 1; i >= 0; i--) {
|
504
538
|
pp_destroy(phsc->phrase_pos[i]);
|
505
539
|
}
|
506
540
|
free(phsc->phrase_pos);
|
507
|
-
|
541
|
+
scorer_destroy_i(self);
|
508
542
|
}
|
509
543
|
|
510
544
|
Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
|
data/ext/q_prefix.c
CHANGED
@@ -11,8 +11,8 @@ char *prq_to_s(Query *self, char *field)
|
|
11
11
|
{
|
12
12
|
char *buffer, *bptr;
|
13
13
|
Term *term = (Term *)self->data;
|
14
|
-
|
15
|
-
|
14
|
+
size_t tlen = strlen(term->text);
|
15
|
+
size_t flen = strlen(term->field);
|
16
16
|
bptr = buffer = ALLOC_N(char, tlen + flen + 35);
|
17
17
|
|
18
18
|
if (strcmp(term->field, field) != 0) {
|
@@ -33,7 +33,7 @@ Query *prq_rewrite(Query *self, IndexReader *ir)
|
|
33
33
|
Term *prefix = (Term *)self->data;
|
34
34
|
TermEnum *te = ir->terms_from(ir, prefix);
|
35
35
|
char *prefix_text = prefix->text;
|
36
|
-
|
36
|
+
size_t prefix_length = strlen(prefix_text);
|
37
37
|
char *prefix_field = prefix->field;
|
38
38
|
Query *tq;
|
39
39
|
Query *bq = bq_create(true);
|
@@ -45,34 +45,45 @@ Query *prq_rewrite(Query *self, IndexReader *ir)
|
|
45
45
|
strncmp(tb->text, prefix_text, prefix_length) != 0) {
|
46
46
|
break;
|
47
47
|
}
|
48
|
-
tq = tq_create(term_create(tb->field, tb->text));
|
49
|
-
tq->boost = self->boost;
|
50
|
-
bq_add_query(bq, tq, BC_SHOULD);
|
48
|
+
tq = tq_create(term_create(tb->field, tb->text)); /* found a match */
|
49
|
+
tq->boost = self->boost; /* set the boost */
|
50
|
+
bq_add_query(bq, tq, BC_SHOULD); /* add to query */
|
51
51
|
} while (te->next(te));
|
52
52
|
XFINALLY
|
53
53
|
te->close(te);
|
54
54
|
XENDTRY
|
55
55
|
|
56
|
-
|
57
|
-
return self->rewritten = bq;
|
56
|
+
return bq;
|
58
57
|
}
|
59
58
|
|
60
|
-
void prq_destroy(
|
59
|
+
static void prq_destroy(Query *self)
|
61
60
|
{
|
62
|
-
Query *self = (Query *)p;
|
63
61
|
if (self->destroy_all) term_destroy((Term *)self->data);
|
64
|
-
|
62
|
+
q_destroy_i(self);
|
63
|
+
}
|
64
|
+
|
65
|
+
static uint prq_hash(Query *self)
|
66
|
+
{
|
67
|
+
return term_hash((Term *)self->data);
|
68
|
+
}
|
69
|
+
|
70
|
+
static int prq_eq(Query *self, Query *o)
|
71
|
+
{
|
72
|
+
return term_eq((Term *)self->data, (Term *)o->data);
|
65
73
|
}
|
66
74
|
|
67
75
|
Query *prefixq_create(Term *prefix)
|
68
76
|
{
|
69
77
|
Query *self = q_create();
|
70
78
|
self->data = prefix;
|
79
|
+
|
71
80
|
self->type = PREFIX_QUERY;
|
72
|
-
self->create_weight = NULL;
|
73
|
-
self->to_s = &prq_to_s;
|
74
81
|
self->rewrite = &prq_rewrite;
|
75
|
-
self->
|
82
|
+
self->to_s = &prq_to_s;
|
83
|
+
self->hash = &prq_hash;
|
84
|
+
self->eq = &prq_eq;
|
85
|
+
self->destroy_i = &prq_destroy;
|
86
|
+
self->create_weight_i = &q_create_weight_unsup;
|
76
87
|
|
77
88
|
return self;
|
78
89
|
}
|
data/ext/q_range.c
CHANGED
@@ -15,7 +15,7 @@ static char * const BOUND_ORDER_ERROR_MSG = "The lower bound must less than the
|
|
15
15
|
char *range_to_s(Range *range, char *field, float boost)
|
16
16
|
{
|
17
17
|
char *buffer, *b;
|
18
|
-
|
18
|
+
size_t flen, llen, ulen;
|
19
19
|
|
20
20
|
flen = strlen(range->field);
|
21
21
|
llen = range->lower_term ? strlen(range->lower_term) : 0;
|
@@ -71,6 +71,28 @@ void range_destroy(void *p)
|
|
71
71
|
free(range);
|
72
72
|
}
|
73
73
|
|
74
|
+
static inline uint range_hash(Range *self)
|
75
|
+
{
|
76
|
+
return self->include_lower | (self->include_upper << 1) |
|
77
|
+
((str_hash(self->field) ^
|
78
|
+
(self->lower_term ? str_hash(self->lower_term) : 0) ^
|
79
|
+
(self->upper_term ? str_hash(self->upper_term) : 0)) << 2);
|
80
|
+
}
|
81
|
+
|
82
|
+
static inline int str_eq(char *s1, char *s2)
|
83
|
+
{
|
84
|
+
return (s1 && s2 && (strcmp(s1, s2) == 0)) || (s1 == s2);
|
85
|
+
}
|
86
|
+
|
87
|
+
static inline int range_eq(Range *self, Range *o)
|
88
|
+
{
|
89
|
+
return (str_eq(self->field, o->field) &&
|
90
|
+
str_eq(self->lower_term, o->lower_term) &&
|
91
|
+
str_eq(self->upper_term, o->upper_term) &&
|
92
|
+
(self->include_lower == o->include_lower) &&
|
93
|
+
(self->include_upper == o->include_upper));
|
94
|
+
}
|
95
|
+
|
74
96
|
Range *range_create(const char *field, char *lower_term, char *upper_term,
|
75
97
|
bool include_lower, bool include_upper)
|
76
98
|
{
|
@@ -101,9 +123,8 @@ Range *range_create(const char *field, char *lower_term, char *upper_term,
|
|
101
123
|
*
|
102
124
|
***************************************************************************/
|
103
125
|
|
104
|
-
void rfilt_destroy(
|
126
|
+
void rfilt_destroy(Filter *self)
|
105
127
|
{
|
106
|
-
Filter *self = (Filter *)p;
|
107
128
|
range_destroy(self->data);
|
108
129
|
filt_destroy(self);
|
109
130
|
}
|
@@ -111,7 +132,10 @@ void rfilt_destroy(void *p)
|
|
111
132
|
char *rfilt_to_s(Filter *self)
|
112
133
|
{
|
113
134
|
Range *range = (Range *)self->data;
|
114
|
-
|
135
|
+
char *rstr = range_to_s(range, "", 1.0);
|
136
|
+
char *rfstr = epstrdup("RangeFilter< %s >", strlen(rstr), rstr);
|
137
|
+
free(rstr);
|
138
|
+
return rfstr;
|
115
139
|
}
|
116
140
|
|
117
141
|
BitVector *rfilt_get_bv(Filter *self, IndexReader *ir)
|
@@ -177,6 +201,16 @@ BitVector *rfilt_get_bv(Filter *self, IndexReader *ir)
|
|
177
201
|
return bv;
|
178
202
|
}
|
179
203
|
|
204
|
+
uint rfilt_hash(Filter *self)
|
205
|
+
{
|
206
|
+
return range_hash((Range *)self->data);
|
207
|
+
}
|
208
|
+
|
209
|
+
int rfilt_eq(Filter *self, Filter *o)
|
210
|
+
{
|
211
|
+
return range_eq((Range *)self->data, (Range *)o->data);
|
212
|
+
}
|
213
|
+
|
180
214
|
Filter *rfilt_create(const char *field, char *lower_term, char *upper_term,
|
181
215
|
bool include_lower, bool include_upper)
|
182
216
|
{
|
@@ -187,6 +221,8 @@ Filter *rfilt_create(const char *field, char *lower_term, char *upper_term,
|
|
187
221
|
self = filt_create("RangeFilter");
|
188
222
|
self->data = range;
|
189
223
|
self->get_bv = &rfilt_get_bv;
|
224
|
+
self->hash = &rfilt_hash;
|
225
|
+
self->eq = &rfilt_eq;
|
190
226
|
self->to_s = &rfilt_to_s;
|
191
227
|
self->destroy = &rfilt_destroy;
|
192
228
|
return self;
|
@@ -204,11 +240,10 @@ char *rq_to_s(Query *self, char *field)
|
|
204
240
|
return range_to_s(range, field, self->boost);
|
205
241
|
}
|
206
242
|
|
207
|
-
void rq_destroy(
|
243
|
+
void rq_destroy(Query *self)
|
208
244
|
{
|
209
|
-
Query *self = (Query *)p;
|
210
245
|
range_destroy(self->data);
|
211
|
-
|
246
|
+
q_destroy_i(self);
|
212
247
|
}
|
213
248
|
|
214
249
|
Query *rq_rewrite(Query *self, IndexReader *ir)
|
@@ -216,8 +251,17 @@ Query *rq_rewrite(Query *self, IndexReader *ir)
|
|
216
251
|
Range *r = (Range *)self->data;
|
217
252
|
Filter *filter = rfilt_create(r->field, r->lower_term, r->upper_term,
|
218
253
|
r->include_lower, r->include_upper);
|
219
|
-
|
220
|
-
|
254
|
+
return csq_create(filter);
|
255
|
+
}
|
256
|
+
|
257
|
+
static uint rq_hash(Query *self)
|
258
|
+
{
|
259
|
+
return range_hash((Range *)self->data);
|
260
|
+
}
|
261
|
+
|
262
|
+
static int rq_eq(Query *self, Query *o)
|
263
|
+
{
|
264
|
+
return range_eq((Range *)self->data, (Range *)o->data);
|
221
265
|
}
|
222
266
|
|
223
267
|
Query *rq_create_less(const char *field, char *upper_term, bool include_upper)
|
@@ -233,17 +277,18 @@ Query *rq_create_more(const char *field, char *lower_term, bool include_lower)
|
|
233
277
|
Query *rq_create(const char *field, char *lower_term, char *upper_term,
|
234
278
|
bool include_lower, bool include_upper)
|
235
279
|
{
|
236
|
-
Query *self;
|
280
|
+
Query *self = q_create();
|
237
281
|
Range *range = range_create(field, lower_term, upper_term,
|
238
282
|
include_lower, include_upper);
|
239
283
|
|
240
|
-
self =
|
284
|
+
self->data = range;
|
241
285
|
|
242
286
|
self->type = RANGE_QUERY;
|
243
|
-
self->data = range;
|
244
|
-
self->create_weight = NULL;
|
245
287
|
self->rewrite = &rq_rewrite;
|
246
288
|
self->to_s = &rq_to_s;
|
247
|
-
self->
|
289
|
+
self->hash = &rq_hash;
|
290
|
+
self->eq = &rq_eq;
|
291
|
+
self->destroy_i = &rq_destroy;
|
292
|
+
self->create_weight_i = &q_create_weight_unsup;
|
248
293
|
return self;
|
249
294
|
}
|