ferret 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/q_phrase.c CHANGED
@@ -14,9 +14,13 @@ Scorer *phw_scorer(Weight *self, IndexReader *ir)
14
14
  Scorer *phsc;
15
15
  PhraseQuery *phq = (PhraseQuery *)self->query->data;
16
16
  int i;
17
- if (phq->t_cnt == 0) return NULL; // optimize zero-term case
17
+ TermDocEnum **tps;
18
+
19
+ if (phq->t_cnt == 0) {
20
+ return NULL; /* optimize zero-term case */
21
+ }
18
22
 
19
- TermDocEnum **tps = ALLOC_N(TermDocEnum *, phq->t_cnt);
23
+ tps = ALLOC_N(TermDocEnum *, phq->t_cnt);
20
24
 
21
25
  for (i = 0; i < phq->t_cnt; i++) {
22
26
  tps[i] = ir_term_positions_for(ir, phq->terms[i]);
@@ -47,6 +51,17 @@ Scorer *phw_scorer(Weight *self, IndexReader *ir)
47
51
 
48
52
  Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
49
53
  {
54
+ Explanation *idf_expl1;
55
+ Explanation *idf_expl2;
56
+ Explanation *query_expl;
57
+ Explanation *qnorm_expl;
58
+ Explanation *field_expl;
59
+ Explanation *tf_expl;
60
+ Scorer *scorer;
61
+ uchar *field_norms;
62
+ float field_norm;
63
+ Explanation *field_norm_expl;
64
+
50
65
  char *query_str = self->query->to_s(self->query, "");
51
66
  PhraseQuery *phq = (PhraseQuery *)self->query->data;
52
67
  int i;
@@ -57,25 +72,25 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
57
72
  strfmt("weight(%s in %d), product of:", query_str, doc_num));
58
73
 
59
74
  for (i = 0; i < phq->t_cnt; i++) {
60
- len += strlen(phq->terms[i]->text) + 30;
75
+ len += (int)strlen(phq->terms[i]->text) + 30;
61
76
  }
62
77
  doc_freqs = ALLOC_N(char, len);
63
78
  for (i = 0; i < phq->t_cnt; i++) {
64
79
  Term *term = phq->terms[i];
65
80
  sprintf(doc_freqs + pos, "%s=%d, ", term->text, ir->doc_freq(ir, term));
66
- pos += strlen(doc_freqs + pos);
81
+ pos += (int)strlen(doc_freqs + pos);
67
82
  }
68
83
  pos -= 2; // remove ", " from the end
69
84
  doc_freqs[pos] = 0;
70
85
 
71
- Explanation *idf_expl1 = expl_create(self->idf,
86
+ idf_expl1 = expl_create(self->idf,
72
87
  strfmt("idf(%s:<%s>)", phq->field, doc_freqs));
73
- Explanation *idf_expl2 = expl_create(self->idf,
88
+ idf_expl2 = expl_create(self->idf,
74
89
  strfmt("idf(%s:<%s>)", phq->field, doc_freqs));
75
90
  free(doc_freqs);
76
91
 
77
- // explain query weight
78
- Explanation *query_expl = expl_create(0.0,
92
+ /* explain query weight */
93
+ query_expl = expl_create(0.0,
79
94
  strfmt("query_weight(%s), product of:", query_str));
80
95
 
81
96
  if (self->query->boost != 1.0) {
@@ -83,36 +98,36 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
83
98
  }
84
99
  expl_add_detail(query_expl, idf_expl1);
85
100
 
86
- Explanation *qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
101
+ qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
87
102
  expl_add_detail(query_expl, qnorm_expl);
88
103
 
89
104
  query_expl->value = self->query->boost * self->idf * self->qnorm;
90
105
 
91
106
  expl_add_detail(expl, query_expl);
92
107
 
93
- // explain field weight
94
- Explanation *field_expl = expl_create(0.0,
108
+ /* explain field weight */
109
+ field_expl = expl_create(0.0,
95
110
  strfmt("field_weight(%s in %d), product of:", query_str, doc_num));
96
111
  free(query_str);
97
112
 
98
- Scorer *scorer = self->scorer(self, ir);
99
- Explanation *tf_expl = scorer->explain(scorer, doc_num);
113
+ scorer = self->scorer(self, ir);
114
+ tf_expl = scorer->explain(scorer, doc_num);
100
115
  scorer->destroy(scorer);
101
116
  expl_add_detail(field_expl, tf_expl);
102
117
  expl_add_detail(field_expl, idf_expl2);
103
118
 
104
- uchar *field_norms = ir->get_norms(ir, phq->field);
105
- float field_norm = (field_norms != NULL)
119
+ field_norms = ir->get_norms(ir, phq->field);
120
+ field_norm = (field_norms != NULL)
106
121
  ? sim_decode_norm(self->similarity, field_norms[doc_num])
107
- : 0.0;
108
- Explanation *field_norm_expl = expl_create(field_norm,
122
+ : (float)0.0;
123
+ field_norm_expl = expl_create(field_norm,
109
124
  strfmt("field_norm(field=%s, doc=%d)", phq->field, doc_num));
110
125
 
111
126
  expl_add_detail(field_expl, field_norm_expl);
112
127
 
113
128
  field_expl->value = tf_expl->value * self->idf * field_norm;
114
129
 
115
- // combine them
130
+ /* combine them */
116
131
  if (query_expl->value == 1.0) {
117
132
  expl_destoy(expl);
118
133
  return field_expl;
@@ -130,20 +145,15 @@ char *phw_to_s(Weight *self)
130
145
 
131
146
  Weight *phw_create(Query *query, Searcher *searcher)
132
147
  {
148
+ Weight *self = w_create(query);
133
149
  PhraseQuery *phq = (PhraseQuery *)query->data;
134
- Weight *self = ALLOC(Weight);
135
- ZEROSET(self, Weight, 1);
136
- self->get_query = &w_get_query;
137
- self->get_value = &w_get_value;
138
- self->normalize = &w_normalize;
150
+
139
151
  self->scorer = &phw_scorer;
140
152
  self->explain = &phw_explain;
141
153
  self->to_s = &phw_to_s;
142
- self->destroy = &free;
143
154
  self->sum_of_squared_weights = &w_sum_of_squared_weights;
144
155
 
145
156
  self->similarity = query->get_similarity(query, searcher);
146
- self->query = query;
147
157
  self->value = query->boost;
148
158
  self->idf = sim_idf_phrase(self->similarity, phq->terms, phq->t_cnt, searcher);
149
159
 
@@ -158,12 +168,12 @@ Weight *phw_create(Query *query, Searcher *searcher)
158
168
 
159
169
  #define GET_PHQ PhraseQuery *phq = (PhraseQuery *)self->data
160
170
 
161
- void phq_extract_terms(Query *self, Array *terms)
171
+ void phq_extract_terms(Query *self, HashSet *terms)
162
172
  {
163
173
  GET_PHQ;
164
174
  int i;
165
175
  for (i = 0; i < phq->t_cnt; i++) {
166
- ary_append(terms, phq->terms[i]);
176
+ hs_add(terms, term_clone(phq->terms[i]));
167
177
  }
168
178
  }
169
179
 
@@ -173,9 +183,9 @@ char *phq_to_s(Query *self, char *field)
173
183
  int i, j, buf_index = 0, len = 0, pos, last_pos = -1;
174
184
  char *buffer;
175
185
  if (!phq->t_cnt) return NULL;
176
- len = strlen(phq->field) + 1;
186
+ len = (int)strlen(phq->field) + 1;
177
187
  for (i = 0; i < phq->t_cnt; i++) {
178
- len += strlen(phq->terms[i]->text) + 1;
188
+ len += (int)strlen(phq->terms[i]->text) + 1;
179
189
  }
180
190
  // add space for extra characters and boost and slop
181
191
  len += 100 + 3 * phq->positions[phq->t_cnt - 1];
@@ -183,7 +193,7 @@ char *phq_to_s(Query *self, char *field)
183
193
  buffer = ALLOC_N(char, len);
184
194
 
185
195
  if (strcmp(field, phq->field) != 0) {
186
- len = strlen(phq->field);
196
+ len = (int)strlen(phq->field);
187
197
  memcpy(buffer, phq->field, len);
188
198
  buffer[len] = ':';
189
199
  buf_index += len + 1;
@@ -199,7 +209,7 @@ char *phq_to_s(Query *self, char *field)
199
209
  }
200
210
  last_pos = pos;
201
211
 
202
- len = strlen(term->text);
212
+ len = (int)strlen(term->text);
203
213
  memcpy(buffer + buf_index, term->text, len);
204
214
  buf_index += len;
205
215
  buffer[buf_index++] = ' ';
@@ -209,7 +219,7 @@ char *phq_to_s(Query *self, char *field)
209
219
  buffer[buf_index] = 0;
210
220
  if (phq->slop != 0) {
211
221
  sprintf(buffer + buf_index, "~%d", phq->slop);
212
- buf_index += strlen(buffer + buf_index);
222
+ buf_index += (int)strlen(buffer + buf_index);
213
223
  }
214
224
  if (self->boost != 1.0) {
215
225
  buffer[buf_index++] = '^';
@@ -218,10 +228,8 @@ char *phq_to_s(Query *self, char *field)
218
228
  return buffer;
219
229
  }
220
230
 
221
- void phq_destroy(void *p)
231
+ void phq_destroy(Query *self)
222
232
  {
223
- Query *self = (Query *)p;
224
-
225
233
  GET_PHQ;
226
234
  int i;
227
235
  if (self->destroy_all) {
@@ -233,7 +241,7 @@ void phq_destroy(void *p)
233
241
  free(phq->positions);
234
242
  free(phq);
235
243
 
236
- q_destroy(self);
244
+ q_destroy_i(self);
237
245
  }
238
246
 
239
247
  Query *phq_rewrite(Query *self, IndexReader *ir)
@@ -243,9 +251,9 @@ Query *phq_rewrite(Query *self, IndexReader *ir)
243
251
  Term *term = phq->terms[0];
244
252
  Query *tq = tq_create(term_clone(term));
245
253
  tq->boost = self->boost;
246
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
247
- return self->rewritten = tq;
254
+ return tq;
248
255
  } else {
256
+ self->ref_cnt++;
249
257
  return self;
250
258
  }
251
259
  }
@@ -273,22 +281,48 @@ void phq_add_term(Query *self, Term *term, int pos_inc)
273
281
  phq->t_cnt++;
274
282
  }
275
283
 
284
+ static uint phq_hash(Query *self)
285
+ {
286
+ int i;
287
+ uint hash = 0;
288
+ PhraseQuery *phq = (PhraseQuery *)self->data;
289
+ for (i = 0; i < phq->t_cnt; i++) {
290
+ hash = (hash << 1) ^ (term_hash(phq->terms[i]) ^ phq->positions[i]);
291
+ }
292
+ return (hash ^ phq->slop);
293
+ }
294
+
295
+ static int phq_eq(Query *self, Query *o)
296
+ {
297
+ int i;
298
+ PhraseQuery *phq1 = (PhraseQuery *)self->data;
299
+ PhraseQuery *phq2 = (PhraseQuery *)o->data;
300
+ if (phq1->slop != phq2->slop) return false;
301
+ for (i = 0; i < phq1->t_cnt; i++) {
302
+ if (!term_eq(phq1->terms[i], phq2->terms[i]) ||
303
+ (phq1->positions[i] != phq2->positions[i])) return false;
304
+ }
305
+ return true;
306
+ }
307
+
276
308
  Query *phq_create()
277
309
  {
278
310
  Query *self = q_create();
279
- PhraseQuery *phq = ALLOC(PhraseQuery);
280
- ZEROSET(phq, PhraseQuery, 1);
311
+ PhraseQuery *phq = ALLOC_AND_ZERO_N(PhraseQuery, 1);
312
+
281
313
  phq->t_capa = PHQ_INIT_CAPA;
282
314
  phq->terms = ALLOC_N(Term *, PHQ_INIT_CAPA);
283
315
  phq->positions = ALLOC_N(int, PHQ_INIT_CAPA);
284
316
  self->data = phq;
285
317
 
286
- self->create_weight = &phw_create;
318
+ self->type = PHRASE_QUERY;
319
+ self->rewrite = &phq_rewrite;
287
320
  self->extract_terms = &phq_extract_terms;
288
321
  self->to_s = &phq_to_s;
289
- self->destroy = &phq_destroy;
290
- self->rewrite = &phq_rewrite;
291
- self->type = PHRASE_QUERY;
322
+ self->hash = &phq_hash;
323
+ self->eq = &phq_eq;
324
+ self->destroy_i = &phq_destroy;
325
+ self->create_weight_i = &phw_create;
292
326
  return self;
293
327
  }
294
328
 
@@ -376,9 +410,8 @@ bool pp_less_than(void *p1, void *p2)
376
410
  }
377
411
  }
378
412
 
379
- void pp_destroy(void *p)
413
+ void pp_destroy(PhrasePosition *pp)
380
414
  {
381
- PhrasePosition *pp = (PhrasePosition *)p;
382
415
  if (pp->tpe) pp->tpe->close(pp->tpe);
383
416
  free(pp);
384
417
  }
@@ -396,7 +429,7 @@ PhrasePosition *pp_create(TermDocEnum *tpe, int offset)
396
429
  * PhraseScorer
397
430
  ***************************************************************************/
398
431
 
399
- #define GET_PHSC PhraseScorer *phsc = (PhraseScorer *)self->data;
432
+ #define GET_PHSC PhraseScorer *phsc = (PhraseScorer *)self->data
400
433
 
401
434
 
402
435
  void phsc_init(PhraseScorer *phsc)
@@ -484,27 +517,28 @@ bool phsc_skip_to(Scorer *self, int doc_num)
484
517
  return phsc_do_next(self);
485
518
  }
486
519
 
487
- Explanation *phsc_explain(Scorer *self, int doc_num)
520
+ static Explanation *phsc_explain(Scorer *self, int doc_num)
488
521
  {
489
522
  GET_PHSC;
523
+ float phrase_freq;
524
+
490
525
  while (phsc_next(self) && self->doc < doc_num)
491
526
  ;
492
527
 
493
- float phrase_freq = (self->doc == doc_num) ? phsc->freq : 0.0;
528
+ phrase_freq = (self->doc == doc_num) ? phsc->freq : (float)0.0;
494
529
  return expl_create(sim_tf(self->similarity, phrase_freq),
495
530
  strfmt("tf(phrase_freq=%f)", phrase_freq));
496
531
  }
497
532
 
498
- void phsc_destroy(void *p)
533
+ static void phsc_destroy(Scorer *self)
499
534
  {
500
- Scorer *self = (Scorer *)p;
501
535
  GET_PHSC;
502
536
  int i;
503
537
  for (i = phsc->pp_cnt - 1; i >= 0; i--) {
504
538
  pp_destroy(phsc->phrase_pos[i]);
505
539
  }
506
540
  free(phsc->phrase_pos);
507
- scorer_destroy(self);
541
+ scorer_destroy_i(self);
508
542
  }
509
543
 
510
544
  Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
data/ext/q_prefix.c CHANGED
@@ -11,8 +11,8 @@ char *prq_to_s(Query *self, char *field)
11
11
  {
12
12
  char *buffer, *bptr;
13
13
  Term *term = (Term *)self->data;
14
- int tlen = strlen(term->text);
15
- int flen = strlen(term->field);
14
+ size_t tlen = strlen(term->text);
15
+ size_t flen = strlen(term->field);
16
16
  bptr = buffer = ALLOC_N(char, tlen + flen + 35);
17
17
 
18
18
  if (strcmp(term->field, field) != 0) {
@@ -33,7 +33,7 @@ Query *prq_rewrite(Query *self, IndexReader *ir)
33
33
  Term *prefix = (Term *)self->data;
34
34
  TermEnum *te = ir->terms_from(ir, prefix);
35
35
  char *prefix_text = prefix->text;
36
- int prefix_length = strlen(prefix_text);
36
+ size_t prefix_length = strlen(prefix_text);
37
37
  char *prefix_field = prefix->field;
38
38
  Query *tq;
39
39
  Query *bq = bq_create(true);
@@ -45,34 +45,45 @@ Query *prq_rewrite(Query *self, IndexReader *ir)
45
45
  strncmp(tb->text, prefix_text, prefix_length) != 0) {
46
46
  break;
47
47
  }
48
- tq = tq_create(term_create(tb->field, tb->text)); // found a match
49
- tq->boost = self->boost; // set the boost
50
- bq_add_query(bq, tq, BC_SHOULD); // add to query
48
+ tq = tq_create(term_create(tb->field, tb->text)); /* found a match */
49
+ tq->boost = self->boost; /* set the boost */
50
+ bq_add_query(bq, tq, BC_SHOULD); /* add to query */
51
51
  } while (te->next(te));
52
52
  XFINALLY
53
53
  te->close(te);
54
54
  XENDTRY
55
55
 
56
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
57
- return self->rewritten = bq;
56
+ return bq;
58
57
  }
59
58
 
60
- void prq_destroy(void *p)
59
+ static void prq_destroy(Query *self)
61
60
  {
62
- Query *self = (Query *)p;
63
61
  if (self->destroy_all) term_destroy((Term *)self->data);
64
- q_destroy(self);
62
+ q_destroy_i(self);
63
+ }
64
+
65
+ static uint prq_hash(Query *self)
66
+ {
67
+ return term_hash((Term *)self->data);
68
+ }
69
+
70
+ static int prq_eq(Query *self, Query *o)
71
+ {
72
+ return term_eq((Term *)self->data, (Term *)o->data);
65
73
  }
66
74
 
67
75
  Query *prefixq_create(Term *prefix)
68
76
  {
69
77
  Query *self = q_create();
70
78
  self->data = prefix;
79
+
71
80
  self->type = PREFIX_QUERY;
72
- self->create_weight = NULL;
73
- self->to_s = &prq_to_s;
74
81
  self->rewrite = &prq_rewrite;
75
- self->destroy = &prq_destroy;
82
+ self->to_s = &prq_to_s;
83
+ self->hash = &prq_hash;
84
+ self->eq = &prq_eq;
85
+ self->destroy_i = &prq_destroy;
86
+ self->create_weight_i = &q_create_weight_unsup;
76
87
 
77
88
  return self;
78
89
  }
data/ext/q_range.c CHANGED
@@ -15,7 +15,7 @@ static char * const BOUND_ORDER_ERROR_MSG = "The lower bound must less than the
15
15
  char *range_to_s(Range *range, char *field, float boost)
16
16
  {
17
17
  char *buffer, *b;
18
- int flen, llen, ulen;
18
+ size_t flen, llen, ulen;
19
19
 
20
20
  flen = strlen(range->field);
21
21
  llen = range->lower_term ? strlen(range->lower_term) : 0;
@@ -71,6 +71,28 @@ void range_destroy(void *p)
71
71
  free(range);
72
72
  }
73
73
 
74
+ static inline uint range_hash(Range *self)
75
+ {
76
+ return self->include_lower | (self->include_upper << 1) |
77
+ ((str_hash(self->field) ^
78
+ (self->lower_term ? str_hash(self->lower_term) : 0) ^
79
+ (self->upper_term ? str_hash(self->upper_term) : 0)) << 2);
80
+ }
81
+
82
+ static inline int str_eq(char *s1, char *s2)
83
+ {
84
+ return (s1 && s2 && (strcmp(s1, s2) == 0)) || (s1 == s2);
85
+ }
86
+
87
+ static inline int range_eq(Range *self, Range *o)
88
+ {
89
+ return (str_eq(self->field, o->field) &&
90
+ str_eq(self->lower_term, o->lower_term) &&
91
+ str_eq(self->upper_term, o->upper_term) &&
92
+ (self->include_lower == o->include_lower) &&
93
+ (self->include_upper == o->include_upper));
94
+ }
95
+
74
96
  Range *range_create(const char *field, char *lower_term, char *upper_term,
75
97
  bool include_lower, bool include_upper)
76
98
  {
@@ -101,9 +123,8 @@ Range *range_create(const char *field, char *lower_term, char *upper_term,
101
123
  *
102
124
  ***************************************************************************/
103
125
 
104
- void rfilt_destroy(void *p)
126
+ void rfilt_destroy(Filter *self)
105
127
  {
106
- Filter *self = (Filter *)p;
107
128
  range_destroy(self->data);
108
129
  filt_destroy(self);
109
130
  }
@@ -111,7 +132,10 @@ void rfilt_destroy(void *p)
111
132
  char *rfilt_to_s(Filter *self)
112
133
  {
113
134
  Range *range = (Range *)self->data;
114
- return range_to_s(range, "", 1.0);
135
+ char *rstr = range_to_s(range, "", 1.0);
136
+ char *rfstr = epstrdup("RangeFilter< %s >", strlen(rstr), rstr);
137
+ free(rstr);
138
+ return rfstr;
115
139
  }
116
140
 
117
141
  BitVector *rfilt_get_bv(Filter *self, IndexReader *ir)
@@ -177,6 +201,16 @@ BitVector *rfilt_get_bv(Filter *self, IndexReader *ir)
177
201
  return bv;
178
202
  }
179
203
 
204
+ uint rfilt_hash(Filter *self)
205
+ {
206
+ return range_hash((Range *)self->data);
207
+ }
208
+
209
+ int rfilt_eq(Filter *self, Filter *o)
210
+ {
211
+ return range_eq((Range *)self->data, (Range *)o->data);
212
+ }
213
+
180
214
  Filter *rfilt_create(const char *field, char *lower_term, char *upper_term,
181
215
  bool include_lower, bool include_upper)
182
216
  {
@@ -187,6 +221,8 @@ Filter *rfilt_create(const char *field, char *lower_term, char *upper_term,
187
221
  self = filt_create("RangeFilter");
188
222
  self->data = range;
189
223
  self->get_bv = &rfilt_get_bv;
224
+ self->hash = &rfilt_hash;
225
+ self->eq = &rfilt_eq;
190
226
  self->to_s = &rfilt_to_s;
191
227
  self->destroy = &rfilt_destroy;
192
228
  return self;
@@ -204,11 +240,10 @@ char *rq_to_s(Query *self, char *field)
204
240
  return range_to_s(range, field, self->boost);
205
241
  }
206
242
 
207
- void rq_destroy(void *p)
243
+ void rq_destroy(Query *self)
208
244
  {
209
- Query *self = (Query *)p;
210
245
  range_destroy(self->data);
211
- q_destroy(self);
246
+ q_destroy_i(self);
212
247
  }
213
248
 
214
249
  Query *rq_rewrite(Query *self, IndexReader *ir)
@@ -216,8 +251,17 @@ Query *rq_rewrite(Query *self, IndexReader *ir)
216
251
  Range *r = (Range *)self->data;
217
252
  Filter *filter = rfilt_create(r->field, r->lower_term, r->upper_term,
218
253
  r->include_lower, r->include_upper);
219
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
220
- return self->rewritten = csq_create(filter);
254
+ return csq_create(filter);
255
+ }
256
+
257
+ static uint rq_hash(Query *self)
258
+ {
259
+ return range_hash((Range *)self->data);
260
+ }
261
+
262
+ static int rq_eq(Query *self, Query *o)
263
+ {
264
+ return range_eq((Range *)self->data, (Range *)o->data);
221
265
  }
222
266
 
223
267
  Query *rq_create_less(const char *field, char *upper_term, bool include_upper)
@@ -233,17 +277,18 @@ Query *rq_create_more(const char *field, char *lower_term, bool include_lower)
233
277
  Query *rq_create(const char *field, char *lower_term, char *upper_term,
234
278
  bool include_lower, bool include_upper)
235
279
  {
236
- Query *self;
280
+ Query *self = q_create();
237
281
  Range *range = range_create(field, lower_term, upper_term,
238
282
  include_lower, include_upper);
239
283
 
240
- self = q_create();
284
+ self->data = range;
241
285
 
242
286
  self->type = RANGE_QUERY;
243
- self->data = range;
244
- self->create_weight = NULL;
245
287
  self->rewrite = &rq_rewrite;
246
288
  self->to_s = &rq_to_s;
247
- self->destroy = &rq_destroy;
289
+ self->hash = &rq_hash;
290
+ self->eq = &rq_eq;
291
+ self->destroy_i = &rq_destroy;
292
+ self->create_weight_i = &q_create_weight_unsup;
248
293
  return self;
249
294
  }