ferret 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/q_phrase.c CHANGED
@@ -14,9 +14,13 @@ Scorer *phw_scorer(Weight *self, IndexReader *ir)
14
14
  Scorer *phsc;
15
15
  PhraseQuery *phq = (PhraseQuery *)self->query->data;
16
16
  int i;
17
- if (phq->t_cnt == 0) return NULL; // optimize zero-term case
17
+ TermDocEnum **tps;
18
+
19
+ if (phq->t_cnt == 0) {
20
+ return NULL; /* optimize zero-term case */
21
+ }
18
22
 
19
- TermDocEnum **tps = ALLOC_N(TermDocEnum *, phq->t_cnt);
23
+ tps = ALLOC_N(TermDocEnum *, phq->t_cnt);
20
24
 
21
25
  for (i = 0; i < phq->t_cnt; i++) {
22
26
  tps[i] = ir_term_positions_for(ir, phq->terms[i]);
@@ -47,6 +51,17 @@ Scorer *phw_scorer(Weight *self, IndexReader *ir)
47
51
 
48
52
  Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
49
53
  {
54
+ Explanation *idf_expl1;
55
+ Explanation *idf_expl2;
56
+ Explanation *query_expl;
57
+ Explanation *qnorm_expl;
58
+ Explanation *field_expl;
59
+ Explanation *tf_expl;
60
+ Scorer *scorer;
61
+ uchar *field_norms;
62
+ float field_norm;
63
+ Explanation *field_norm_expl;
64
+
50
65
  char *query_str = self->query->to_s(self->query, "");
51
66
  PhraseQuery *phq = (PhraseQuery *)self->query->data;
52
67
  int i;
@@ -57,25 +72,25 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
57
72
  strfmt("weight(%s in %d), product of:", query_str, doc_num));
58
73
 
59
74
  for (i = 0; i < phq->t_cnt; i++) {
60
- len += strlen(phq->terms[i]->text) + 30;
75
+ len += (int)strlen(phq->terms[i]->text) + 30;
61
76
  }
62
77
  doc_freqs = ALLOC_N(char, len);
63
78
  for (i = 0; i < phq->t_cnt; i++) {
64
79
  Term *term = phq->terms[i];
65
80
  sprintf(doc_freqs + pos, "%s=%d, ", term->text, ir->doc_freq(ir, term));
66
- pos += strlen(doc_freqs + pos);
81
+ pos += (int)strlen(doc_freqs + pos);
67
82
  }
68
83
  pos -= 2; // remove ", " from the end
69
84
  doc_freqs[pos] = 0;
70
85
 
71
- Explanation *idf_expl1 = expl_create(self->idf,
86
+ idf_expl1 = expl_create(self->idf,
72
87
  strfmt("idf(%s:<%s>)", phq->field, doc_freqs));
73
- Explanation *idf_expl2 = expl_create(self->idf,
88
+ idf_expl2 = expl_create(self->idf,
74
89
  strfmt("idf(%s:<%s>)", phq->field, doc_freqs));
75
90
  free(doc_freqs);
76
91
 
77
- // explain query weight
78
- Explanation *query_expl = expl_create(0.0,
92
+ /* explain query weight */
93
+ query_expl = expl_create(0.0,
79
94
  strfmt("query_weight(%s), product of:", query_str));
80
95
 
81
96
  if (self->query->boost != 1.0) {
@@ -83,36 +98,36 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
83
98
  }
84
99
  expl_add_detail(query_expl, idf_expl1);
85
100
 
86
- Explanation *qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
101
+ qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
87
102
  expl_add_detail(query_expl, qnorm_expl);
88
103
 
89
104
  query_expl->value = self->query->boost * self->idf * self->qnorm;
90
105
 
91
106
  expl_add_detail(expl, query_expl);
92
107
 
93
- // explain field weight
94
- Explanation *field_expl = expl_create(0.0,
108
+ /* explain field weight */
109
+ field_expl = expl_create(0.0,
95
110
  strfmt("field_weight(%s in %d), product of:", query_str, doc_num));
96
111
  free(query_str);
97
112
 
98
- Scorer *scorer = self->scorer(self, ir);
99
- Explanation *tf_expl = scorer->explain(scorer, doc_num);
113
+ scorer = self->scorer(self, ir);
114
+ tf_expl = scorer->explain(scorer, doc_num);
100
115
  scorer->destroy(scorer);
101
116
  expl_add_detail(field_expl, tf_expl);
102
117
  expl_add_detail(field_expl, idf_expl2);
103
118
 
104
- uchar *field_norms = ir->get_norms(ir, phq->field);
105
- float field_norm = (field_norms != NULL)
119
+ field_norms = ir->get_norms(ir, phq->field);
120
+ field_norm = (field_norms != NULL)
106
121
  ? sim_decode_norm(self->similarity, field_norms[doc_num])
107
- : 0.0;
108
- Explanation *field_norm_expl = expl_create(field_norm,
122
+ : (float)0.0;
123
+ field_norm_expl = expl_create(field_norm,
109
124
  strfmt("field_norm(field=%s, doc=%d)", phq->field, doc_num));
110
125
 
111
126
  expl_add_detail(field_expl, field_norm_expl);
112
127
 
113
128
  field_expl->value = tf_expl->value * self->idf * field_norm;
114
129
 
115
- // combine them
130
+ /* combine them */
116
131
  if (query_expl->value == 1.0) {
117
132
  expl_destoy(expl);
118
133
  return field_expl;
@@ -130,20 +145,15 @@ char *phw_to_s(Weight *self)
130
145
 
131
146
  Weight *phw_create(Query *query, Searcher *searcher)
132
147
  {
148
+ Weight *self = w_create(query);
133
149
  PhraseQuery *phq = (PhraseQuery *)query->data;
134
- Weight *self = ALLOC(Weight);
135
- ZEROSET(self, Weight, 1);
136
- self->get_query = &w_get_query;
137
- self->get_value = &w_get_value;
138
- self->normalize = &w_normalize;
150
+
139
151
  self->scorer = &phw_scorer;
140
152
  self->explain = &phw_explain;
141
153
  self->to_s = &phw_to_s;
142
- self->destroy = &free;
143
154
  self->sum_of_squared_weights = &w_sum_of_squared_weights;
144
155
 
145
156
  self->similarity = query->get_similarity(query, searcher);
146
- self->query = query;
147
157
  self->value = query->boost;
148
158
  self->idf = sim_idf_phrase(self->similarity, phq->terms, phq->t_cnt, searcher);
149
159
 
@@ -158,12 +168,12 @@ Weight *phw_create(Query *query, Searcher *searcher)
158
168
 
159
169
  #define GET_PHQ PhraseQuery *phq = (PhraseQuery *)self->data
160
170
 
161
- void phq_extract_terms(Query *self, Array *terms)
171
+ void phq_extract_terms(Query *self, HashSet *terms)
162
172
  {
163
173
  GET_PHQ;
164
174
  int i;
165
175
  for (i = 0; i < phq->t_cnt; i++) {
166
- ary_append(terms, phq->terms[i]);
176
+ hs_add(terms, term_clone(phq->terms[i]));
167
177
  }
168
178
  }
169
179
 
@@ -173,9 +183,9 @@ char *phq_to_s(Query *self, char *field)
173
183
  int i, j, buf_index = 0, len = 0, pos, last_pos = -1;
174
184
  char *buffer;
175
185
  if (!phq->t_cnt) return NULL;
176
- len = strlen(phq->field) + 1;
186
+ len = (int)strlen(phq->field) + 1;
177
187
  for (i = 0; i < phq->t_cnt; i++) {
178
- len += strlen(phq->terms[i]->text) + 1;
188
+ len += (int)strlen(phq->terms[i]->text) + 1;
179
189
  }
180
190
  // add space for extra characters and boost and slop
181
191
  len += 100 + 3 * phq->positions[phq->t_cnt - 1];
@@ -183,7 +193,7 @@ char *phq_to_s(Query *self, char *field)
183
193
  buffer = ALLOC_N(char, len);
184
194
 
185
195
  if (strcmp(field, phq->field) != 0) {
186
- len = strlen(phq->field);
196
+ len = (int)strlen(phq->field);
187
197
  memcpy(buffer, phq->field, len);
188
198
  buffer[len] = ':';
189
199
  buf_index += len + 1;
@@ -199,7 +209,7 @@ char *phq_to_s(Query *self, char *field)
199
209
  }
200
210
  last_pos = pos;
201
211
 
202
- len = strlen(term->text);
212
+ len = (int)strlen(term->text);
203
213
  memcpy(buffer + buf_index, term->text, len);
204
214
  buf_index += len;
205
215
  buffer[buf_index++] = ' ';
@@ -209,7 +219,7 @@ char *phq_to_s(Query *self, char *field)
209
219
  buffer[buf_index] = 0;
210
220
  if (phq->slop != 0) {
211
221
  sprintf(buffer + buf_index, "~%d", phq->slop);
212
- buf_index += strlen(buffer + buf_index);
222
+ buf_index += (int)strlen(buffer + buf_index);
213
223
  }
214
224
  if (self->boost != 1.0) {
215
225
  buffer[buf_index++] = '^';
@@ -218,10 +228,8 @@ char *phq_to_s(Query *self, char *field)
218
228
  return buffer;
219
229
  }
220
230
 
221
- void phq_destroy(void *p)
231
+ void phq_destroy(Query *self)
222
232
  {
223
- Query *self = (Query *)p;
224
-
225
233
  GET_PHQ;
226
234
  int i;
227
235
  if (self->destroy_all) {
@@ -233,7 +241,7 @@ void phq_destroy(void *p)
233
241
  free(phq->positions);
234
242
  free(phq);
235
243
 
236
- q_destroy(self);
244
+ q_destroy_i(self);
237
245
  }
238
246
 
239
247
  Query *phq_rewrite(Query *self, IndexReader *ir)
@@ -243,9 +251,9 @@ Query *phq_rewrite(Query *self, IndexReader *ir)
243
251
  Term *term = phq->terms[0];
244
252
  Query *tq = tq_create(term_clone(term));
245
253
  tq->boost = self->boost;
246
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
247
- return self->rewritten = tq;
254
+ return tq;
248
255
  } else {
256
+ self->ref_cnt++;
249
257
  return self;
250
258
  }
251
259
  }
@@ -273,22 +281,48 @@ void phq_add_term(Query *self, Term *term, int pos_inc)
273
281
  phq->t_cnt++;
274
282
  }
275
283
 
284
+ static uint phq_hash(Query *self)
285
+ {
286
+ int i;
287
+ uint hash = 0;
288
+ PhraseQuery *phq = (PhraseQuery *)self->data;
289
+ for (i = 0; i < phq->t_cnt; i++) {
290
+ hash = (hash << 1) ^ (term_hash(phq->terms[i]) ^ phq->positions[i]);
291
+ }
292
+ return (hash ^ phq->slop);
293
+ }
294
+
295
+ static int phq_eq(Query *self, Query *o)
296
+ {
297
+ int i;
298
+ PhraseQuery *phq1 = (PhraseQuery *)self->data;
299
+ PhraseQuery *phq2 = (PhraseQuery *)o->data;
300
+ if (phq1->slop != phq2->slop) return false;
301
+ for (i = 0; i < phq1->t_cnt; i++) {
302
+ if (!term_eq(phq1->terms[i], phq2->terms[i]) ||
303
+ (phq1->positions[i] != phq2->positions[i])) return false;
304
+ }
305
+ return true;
306
+ }
307
+
276
308
  Query *phq_create()
277
309
  {
278
310
  Query *self = q_create();
279
- PhraseQuery *phq = ALLOC(PhraseQuery);
280
- ZEROSET(phq, PhraseQuery, 1);
311
+ PhraseQuery *phq = ALLOC_AND_ZERO_N(PhraseQuery, 1);
312
+
281
313
  phq->t_capa = PHQ_INIT_CAPA;
282
314
  phq->terms = ALLOC_N(Term *, PHQ_INIT_CAPA);
283
315
  phq->positions = ALLOC_N(int, PHQ_INIT_CAPA);
284
316
  self->data = phq;
285
317
 
286
- self->create_weight = &phw_create;
318
+ self->type = PHRASE_QUERY;
319
+ self->rewrite = &phq_rewrite;
287
320
  self->extract_terms = &phq_extract_terms;
288
321
  self->to_s = &phq_to_s;
289
- self->destroy = &phq_destroy;
290
- self->rewrite = &phq_rewrite;
291
- self->type = PHRASE_QUERY;
322
+ self->hash = &phq_hash;
323
+ self->eq = &phq_eq;
324
+ self->destroy_i = &phq_destroy;
325
+ self->create_weight_i = &phw_create;
292
326
  return self;
293
327
  }
294
328
 
@@ -376,9 +410,8 @@ bool pp_less_than(void *p1, void *p2)
376
410
  }
377
411
  }
378
412
 
379
- void pp_destroy(void *p)
413
+ void pp_destroy(PhrasePosition *pp)
380
414
  {
381
- PhrasePosition *pp = (PhrasePosition *)p;
382
415
  if (pp->tpe) pp->tpe->close(pp->tpe);
383
416
  free(pp);
384
417
  }
@@ -396,7 +429,7 @@ PhrasePosition *pp_create(TermDocEnum *tpe, int offset)
396
429
  * PhraseScorer
397
430
  ***************************************************************************/
398
431
 
399
- #define GET_PHSC PhraseScorer *phsc = (PhraseScorer *)self->data;
432
+ #define GET_PHSC PhraseScorer *phsc = (PhraseScorer *)self->data
400
433
 
401
434
 
402
435
  void phsc_init(PhraseScorer *phsc)
@@ -484,27 +517,28 @@ bool phsc_skip_to(Scorer *self, int doc_num)
484
517
  return phsc_do_next(self);
485
518
  }
486
519
 
487
- Explanation *phsc_explain(Scorer *self, int doc_num)
520
+ static Explanation *phsc_explain(Scorer *self, int doc_num)
488
521
  {
489
522
  GET_PHSC;
523
+ float phrase_freq;
524
+
490
525
  while (phsc_next(self) && self->doc < doc_num)
491
526
  ;
492
527
 
493
- float phrase_freq = (self->doc == doc_num) ? phsc->freq : 0.0;
528
+ phrase_freq = (self->doc == doc_num) ? phsc->freq : (float)0.0;
494
529
  return expl_create(sim_tf(self->similarity, phrase_freq),
495
530
  strfmt("tf(phrase_freq=%f)", phrase_freq));
496
531
  }
497
532
 
498
- void phsc_destroy(void *p)
533
+ static void phsc_destroy(Scorer *self)
499
534
  {
500
- Scorer *self = (Scorer *)p;
501
535
  GET_PHSC;
502
536
  int i;
503
537
  for (i = phsc->pp_cnt - 1; i >= 0; i--) {
504
538
  pp_destroy(phsc->phrase_pos[i]);
505
539
  }
506
540
  free(phsc->phrase_pos);
507
- scorer_destroy(self);
541
+ scorer_destroy_i(self);
508
542
  }
509
543
 
510
544
  Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
data/ext/q_prefix.c CHANGED
@@ -11,8 +11,8 @@ char *prq_to_s(Query *self, char *field)
11
11
  {
12
12
  char *buffer, *bptr;
13
13
  Term *term = (Term *)self->data;
14
- int tlen = strlen(term->text);
15
- int flen = strlen(term->field);
14
+ size_t tlen = strlen(term->text);
15
+ size_t flen = strlen(term->field);
16
16
  bptr = buffer = ALLOC_N(char, tlen + flen + 35);
17
17
 
18
18
  if (strcmp(term->field, field) != 0) {
@@ -33,7 +33,7 @@ Query *prq_rewrite(Query *self, IndexReader *ir)
33
33
  Term *prefix = (Term *)self->data;
34
34
  TermEnum *te = ir->terms_from(ir, prefix);
35
35
  char *prefix_text = prefix->text;
36
- int prefix_length = strlen(prefix_text);
36
+ size_t prefix_length = strlen(prefix_text);
37
37
  char *prefix_field = prefix->field;
38
38
  Query *tq;
39
39
  Query *bq = bq_create(true);
@@ -45,34 +45,45 @@ Query *prq_rewrite(Query *self, IndexReader *ir)
45
45
  strncmp(tb->text, prefix_text, prefix_length) != 0) {
46
46
  break;
47
47
  }
48
- tq = tq_create(term_create(tb->field, tb->text)); // found a match
49
- tq->boost = self->boost; // set the boost
50
- bq_add_query(bq, tq, BC_SHOULD); // add to query
48
+ tq = tq_create(term_create(tb->field, tb->text)); /* found a match */
49
+ tq->boost = self->boost; /* set the boost */
50
+ bq_add_query(bq, tq, BC_SHOULD); /* add to query */
51
51
  } while (te->next(te));
52
52
  XFINALLY
53
53
  te->close(te);
54
54
  XENDTRY
55
55
 
56
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
57
- return self->rewritten = bq;
56
+ return bq;
58
57
  }
59
58
 
60
- void prq_destroy(void *p)
59
+ static void prq_destroy(Query *self)
61
60
  {
62
- Query *self = (Query *)p;
63
61
  if (self->destroy_all) term_destroy((Term *)self->data);
64
- q_destroy(self);
62
+ q_destroy_i(self);
63
+ }
64
+
65
+ static uint prq_hash(Query *self)
66
+ {
67
+ return term_hash((Term *)self->data);
68
+ }
69
+
70
+ static int prq_eq(Query *self, Query *o)
71
+ {
72
+ return term_eq((Term *)self->data, (Term *)o->data);
65
73
  }
66
74
 
67
75
  Query *prefixq_create(Term *prefix)
68
76
  {
69
77
  Query *self = q_create();
70
78
  self->data = prefix;
79
+
71
80
  self->type = PREFIX_QUERY;
72
- self->create_weight = NULL;
73
- self->to_s = &prq_to_s;
74
81
  self->rewrite = &prq_rewrite;
75
- self->destroy = &prq_destroy;
82
+ self->to_s = &prq_to_s;
83
+ self->hash = &prq_hash;
84
+ self->eq = &prq_eq;
85
+ self->destroy_i = &prq_destroy;
86
+ self->create_weight_i = &q_create_weight_unsup;
76
87
 
77
88
  return self;
78
89
  }
data/ext/q_range.c CHANGED
@@ -15,7 +15,7 @@ static char * const BOUND_ORDER_ERROR_MSG = "The lower bound must less than the
15
15
  char *range_to_s(Range *range, char *field, float boost)
16
16
  {
17
17
  char *buffer, *b;
18
- int flen, llen, ulen;
18
+ size_t flen, llen, ulen;
19
19
 
20
20
  flen = strlen(range->field);
21
21
  llen = range->lower_term ? strlen(range->lower_term) : 0;
@@ -71,6 +71,28 @@ void range_destroy(void *p)
71
71
  free(range);
72
72
  }
73
73
 
74
+ static inline uint range_hash(Range *self)
75
+ {
76
+ return self->include_lower | (self->include_upper << 1) |
77
+ ((str_hash(self->field) ^
78
+ (self->lower_term ? str_hash(self->lower_term) : 0) ^
79
+ (self->upper_term ? str_hash(self->upper_term) : 0)) << 2);
80
+ }
81
+
82
+ static inline int str_eq(char *s1, char *s2)
83
+ {
84
+ return (s1 && s2 && (strcmp(s1, s2) == 0)) || (s1 == s2);
85
+ }
86
+
87
+ static inline int range_eq(Range *self, Range *o)
88
+ {
89
+ return (str_eq(self->field, o->field) &&
90
+ str_eq(self->lower_term, o->lower_term) &&
91
+ str_eq(self->upper_term, o->upper_term) &&
92
+ (self->include_lower == o->include_lower) &&
93
+ (self->include_upper == o->include_upper));
94
+ }
95
+
74
96
  Range *range_create(const char *field, char *lower_term, char *upper_term,
75
97
  bool include_lower, bool include_upper)
76
98
  {
@@ -101,9 +123,8 @@ Range *range_create(const char *field, char *lower_term, char *upper_term,
101
123
  *
102
124
  ***************************************************************************/
103
125
 
104
- void rfilt_destroy(void *p)
126
+ void rfilt_destroy(Filter *self)
105
127
  {
106
- Filter *self = (Filter *)p;
107
128
  range_destroy(self->data);
108
129
  filt_destroy(self);
109
130
  }
@@ -111,7 +132,10 @@ void rfilt_destroy(void *p)
111
132
  char *rfilt_to_s(Filter *self)
112
133
  {
113
134
  Range *range = (Range *)self->data;
114
- return range_to_s(range, "", 1.0);
135
+ char *rstr = range_to_s(range, "", 1.0);
136
+ char *rfstr = epstrdup("RangeFilter< %s >", strlen(rstr), rstr);
137
+ free(rstr);
138
+ return rfstr;
115
139
  }
116
140
 
117
141
  BitVector *rfilt_get_bv(Filter *self, IndexReader *ir)
@@ -177,6 +201,16 @@ BitVector *rfilt_get_bv(Filter *self, IndexReader *ir)
177
201
  return bv;
178
202
  }
179
203
 
204
+ uint rfilt_hash(Filter *self)
205
+ {
206
+ return range_hash((Range *)self->data);
207
+ }
208
+
209
+ int rfilt_eq(Filter *self, Filter *o)
210
+ {
211
+ return range_eq((Range *)self->data, (Range *)o->data);
212
+ }
213
+
180
214
  Filter *rfilt_create(const char *field, char *lower_term, char *upper_term,
181
215
  bool include_lower, bool include_upper)
182
216
  {
@@ -187,6 +221,8 @@ Filter *rfilt_create(const char *field, char *lower_term, char *upper_term,
187
221
  self = filt_create("RangeFilter");
188
222
  self->data = range;
189
223
  self->get_bv = &rfilt_get_bv;
224
+ self->hash = &rfilt_hash;
225
+ self->eq = &rfilt_eq;
190
226
  self->to_s = &rfilt_to_s;
191
227
  self->destroy = &rfilt_destroy;
192
228
  return self;
@@ -204,11 +240,10 @@ char *rq_to_s(Query *self, char *field)
204
240
  return range_to_s(range, field, self->boost);
205
241
  }
206
242
 
207
- void rq_destroy(void *p)
243
+ void rq_destroy(Query *self)
208
244
  {
209
- Query *self = (Query *)p;
210
245
  range_destroy(self->data);
211
- q_destroy(self);
246
+ q_destroy_i(self);
212
247
  }
213
248
 
214
249
  Query *rq_rewrite(Query *self, IndexReader *ir)
@@ -216,8 +251,17 @@ Query *rq_rewrite(Query *self, IndexReader *ir)
216
251
  Range *r = (Range *)self->data;
217
252
  Filter *filter = rfilt_create(r->field, r->lower_term, r->upper_term,
218
253
  r->include_lower, r->include_upper);
219
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
220
- return self->rewritten = csq_create(filter);
254
+ return csq_create(filter);
255
+ }
256
+
257
+ static uint rq_hash(Query *self)
258
+ {
259
+ return range_hash((Range *)self->data);
260
+ }
261
+
262
+ static int rq_eq(Query *self, Query *o)
263
+ {
264
+ return range_eq((Range *)self->data, (Range *)o->data);
221
265
  }
222
266
 
223
267
  Query *rq_create_less(const char *field, char *upper_term, bool include_upper)
@@ -233,17 +277,18 @@ Query *rq_create_more(const char *field, char *lower_term, bool include_lower)
233
277
  Query *rq_create(const char *field, char *lower_term, char *upper_term,
234
278
  bool include_lower, bool include_upper)
235
279
  {
236
- Query *self;
280
+ Query *self = q_create();
237
281
  Range *range = range_create(field, lower_term, upper_term,
238
282
  include_lower, include_upper);
239
283
 
240
- self = q_create();
284
+ self->data = range;
241
285
 
242
286
  self->type = RANGE_QUERY;
243
- self->data = range;
244
- self->create_weight = NULL;
245
287
  self->rewrite = &rq_rewrite;
246
288
  self->to_s = &rq_to_s;
247
- self->destroy = &rq_destroy;
289
+ self->hash = &rq_hash;
290
+ self->eq = &rq_eq;
291
+ self->destroy_i = &rq_destroy;
292
+ self->create_weight_i = &q_create_weight_unsup;
248
293
  return self;
249
294
  }