ferret 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/q_const_score.c CHANGED
@@ -12,11 +12,6 @@ char *csw_to_s(Weight *self)
12
12
  return strfmt("ConstantScoreWeight(%f)", self->value);
13
13
  }
14
14
 
15
- void csw_destroy(void *p)
16
- {
17
- free(p);
18
- }
19
-
20
15
  Explanation *csw_explain(Weight *self, IndexReader *ir, int doc_num)
21
16
  {
22
17
  Filter *filter = (Filter *)self->query->data;
@@ -40,21 +35,14 @@ Explanation *csw_explain(Weight *self, IndexReader *ir, int doc_num)
40
35
 
41
36
  Weight *csw_create(Query *query, Searcher *searcher)
42
37
  {
43
- Weight *self = ALLOC(Weight);
44
- ZEROSET(self, Weight, 1);
45
- self->get_query = &w_get_query;
46
- self->get_value = &w_get_value;
47
- self->normalize = &w_normalize;
38
+ Weight *self = w_create(query);
48
39
  self->scorer = &cssc_create;
49
40
  self->explain = &csw_explain;
50
41
  self->to_s = &csw_to_s;
51
- self->destroy = &csw_destroy;
52
42
  self->sum_of_squared_weights = &w_sum_of_squared_weights;
53
43
 
54
44
  self->similarity = query->get_similarity(query, searcher);
55
45
  self->idf = 1.0;
56
- self->query = query;
57
- self->value = 0.0;
58
46
 
59
47
  return self;
60
48
  }
@@ -79,24 +67,36 @@ char *csq_to_s(Query *self, char *field)
79
67
  return buffer;;
80
68
  }
81
69
 
82
- void csq_destroy(void *p)
70
+ void csq_destroy(Query *self)
83
71
  {
84
- Query *self = (Query *)p;
85
72
  if (self->destroy_all) {
86
73
  Filter *filter = (Filter *)self->data;
87
74
  filter->destroy(filter);
88
75
  }
89
- q_destroy(self);
76
+ q_destroy_i(self);
77
+ }
78
+
79
+ static uint csq_hash(Query *self)
80
+ {
81
+ return filt_hash((Filter *)self->data);
82
+ }
83
+
84
+ static int csq_eq(Query *self, Query *o)
85
+ {
86
+ return filt_eq((Filter *)self->data, (Filter *)o->data);
90
87
  }
91
88
 
92
89
  Query *csq_create(Filter *filter)
93
90
  {
94
91
  Query *self = q_create();
95
- self->type = CONSTANT_QUERY;
96
92
  self->data = filter;
97
- self->create_weight = &csw_create;
93
+
94
+ self->type = CONSTANT_QUERY;
98
95
  self->to_s = &csq_to_s;
99
- self->destroy = &csq_destroy;
96
+ self->hash = &csq_hash;
97
+ self->eq = &csq_eq;
98
+ self->destroy_i = &csq_destroy;
99
+ self->create_weight_i = &csw_create;
100
100
 
101
101
  return self;
102
102
  }
@@ -143,6 +143,6 @@ Scorer *cssc_create(Weight *weight, IndexReader *ir)
143
143
  self->next = &cssc_next;
144
144
  self->skip_to = &cssc_skip_to;
145
145
  self->explain = &cssc_explain;
146
- self->destroy = &scorer_destroy;
146
+ self->destroy = &scorer_destroy_i;
147
147
  return self;
148
148
  }
@@ -14,11 +14,6 @@ char *fqw_to_s(Weight *self)
14
14
  return strfmt("FilteredQueryWeight(%f)", self->value);
15
15
  }
16
16
 
17
- void fqw_destroy(void *p)
18
- {
19
- free(p);
20
- }
21
-
22
17
  float fqw_sum_of_squared_weights(Weight *self)
23
18
  {
24
19
  Weight *sw = (Weight *)self->data;
@@ -28,7 +23,7 @@ float fqw_sum_of_squared_weights(Weight *self)
28
23
  void fqw_normalize(Weight *self, float normalization_factor)
29
24
  {
30
25
  Weight *sw = (Weight *)self->data;
31
- return sw->normalize(sw, normalization_factor);
26
+ sw->normalize(sw, normalization_factor);
32
27
  }
33
28
 
34
29
  float fqw_get_value(Weight *self)
@@ -52,13 +47,19 @@ Scorer *fqw_scorer(Weight *self, IndexReader *ir)
52
47
  return fqsc_create(scorer, filter->get_bv(filter, ir), self->similarity);
53
48
  }
54
49
 
50
+ void fqw_destroy(Weight *self)
51
+ {
52
+ Weight *sw = (Weight *)self->data;
53
+ sw->destroy(sw);
54
+ w_destroy(self);
55
+ }
56
+
55
57
  Weight *fqw_create(Query *query, Weight *sub_weight, Similarity *sim)
56
58
  {
57
- Weight *self = ALLOC(Weight);
58
- ZEROSET(self, Weight, 1);
59
+ Weight *self = w_create(query);
60
+
59
61
  self->data = sub_weight;
60
62
 
61
- self->get_query = &w_get_query;
62
63
  self->get_value = &fqw_get_value;
63
64
  self->normalize = &fqw_normalize;
64
65
  self->scorer = &fqw_scorer;
@@ -69,7 +70,6 @@ Weight *fqw_create(Query *query, Weight *sub_weight, Similarity *sim)
69
70
 
70
71
  self->similarity = sim;
71
72
  self->idf = 1.0;
72
- self->query = query;
73
73
  self->value = sub_weight->value;
74
74
 
75
75
  return self;
@@ -99,16 +99,15 @@ char *fq_to_s(Query *self, char *field)
99
99
  return buffer;;
100
100
  }
101
101
 
102
- void fq_destroy(void *p)
102
+ void fq_destroy(Query *self)
103
103
  {
104
- Query *self = (Query *)p;
105
104
  if (self->destroy_all) {
106
105
  FilteredQuery *fq = (FilteredQuery *)self->data;
107
106
  fq->filter->destroy(fq->filter);
108
- fq->query->destroy(fq->query);
107
+ q_deref(fq->query);
109
108
  }
110
109
  free(self->data);
111
- q_destroy(self);
110
+ q_destroy_i(self);
112
111
  }
113
112
 
114
113
  Weight *fq_create_weight(Query *self, Searcher *searcher)
@@ -121,14 +120,16 @@ Weight *fq_create_weight(Query *self, Searcher *searcher)
121
120
  Query *fq_create(Query *query, Filter *filter)
122
121
  {
123
122
  Query *self = q_create();
123
+
124
124
  FilteredQuery *fq = ALLOC(FilteredQuery);
125
125
  fq->query = query;
126
126
  fq->filter = filter;
127
- self->type = FILTERED_QUERY;
128
127
  self->data = fq;
129
- self->create_weight = &fq_create_weight;
128
+
129
+ self->type = FILTERED_QUERY;
130
130
  self->to_s = &fq_to_s;
131
- self->destroy = &fq_destroy;
131
+ self->destroy_i = &fq_destroy;
132
+ self->create_weight_i = &fq_create_weight;
132
133
 
133
134
  return self;
134
135
  }
@@ -180,13 +181,12 @@ Explanation *fqsc_explain(Scorer *self, int doc_num)
180
181
  return sub_sc->explain(sub_sc, doc_num);
181
182
  }
182
183
 
183
- void fqsc_destroy(void *p)
184
+ void fqsc_destroy(Scorer *self)
184
185
  {
185
- Scorer *self = (Scorer *)p;
186
186
  FilteredQueryScorer *fqsc = (FilteredQueryScorer *)self->data;
187
187
  bv_destroy(fqsc->bv);
188
188
  fqsc->sub_scorer->destroy(fqsc->sub_scorer);
189
- scorer_destroy(self);
189
+ scorer_destroy_i(self);
190
190
  }
191
191
 
192
192
  Scorer *fqsc_create(Scorer *scorer, BitVector *bv, Similarity *sim)
data/ext/q_fuzzy.c CHANGED
@@ -24,7 +24,7 @@ void fuzq_initialize_max_distances(FuzzyQuery *fuzq)
24
24
  }
25
25
  }
26
26
 
27
- float fuzq_get_max_distance(FuzzyQuery *fuzq, int m)
27
+ int fuzq_get_max_distance(FuzzyQuery *fuzq, int m)
28
28
  {
29
29
  return (m < TYPICAL_LONGEST_WORD) ? fuzq->max_distances[m]
30
30
  : fuzq_calculate_max_distance(fuzq, m);
@@ -34,7 +34,7 @@ float fuzq_score(FuzzyQuery *fuzq, char *target)
34
34
  {
35
35
  int i, j;
36
36
  int max_distance;
37
- int m = strlen(target);
37
+ int m = (int)strlen(target);
38
38
  int n = fuzq->text_len;
39
39
  int *d = fuzq->da;
40
40
  char *text = fuzq->text;
@@ -82,7 +82,7 @@ float fuzq_score(FuzzyQuery *fuzq, char *target)
82
82
  } else {
83
83
  d[i + m*j] = min3(d[i-1 + m*j]+1, d[i + m*(j-1)]+1, d[i-1 + m*(j-1)]);
84
84
  }
85
- best_pos_ed_dist = min(best_pos_ed_dist, d[i + m*j]);
85
+ best_pos_ed_dist = min2(best_pos_ed_dist, d[i + m*j]);
86
86
  }
87
87
  //printf("(bped = %d, i = %d, md = %d)", best_pos_ed_dist, i, max_distance);
88
88
 
@@ -104,7 +104,7 @@ float fuzq_score(FuzzyQuery *fuzq, char *target)
104
104
  * number of characters in the shorter word. but this was the formula that
105
105
  * was previously used in FuzzyTermEnum, so it has not been changed (even
106
106
  * though min_sim must be greater than 0.0) */
107
- return 1.0f - ((float)d[n + m*m] / (float) (fuzq->pre_len + min(n, m)));
107
+ return 1.0f - ((float)d[n + m*m] / (float) (fuzq->pre_len + min2(n, m)));
108
108
  }
109
109
 
110
110
  /****************************************************************************
@@ -118,8 +118,8 @@ char *fuzq_to_s(Query *self, char *field)
118
118
  char *buffer, *bptr;
119
119
  FuzzyQuery *fuzq = (FuzzyQuery *)self->data;
120
120
  Term *term = fuzq->term;
121
- int tlen = strlen(term->text);
122
- int flen = strlen(term->field);
121
+ int tlen = (int)strlen(term->text);
122
+ int flen = (int)strlen(term->field);
123
123
  bptr = buffer = ALLOC_N(char, tlen + flen + 35);
124
124
 
125
125
  if (strcmp(term->field, field) != 0) {
@@ -155,11 +155,10 @@ bool scored_term_less_than(void *p1, void *p2)
155
155
  return (st1->score < st2->score);
156
156
  }
157
157
 
158
- void scored_term_destroy(void *p)
158
+ void scored_term_destroy(ScoredTerm *self)
159
159
  {
160
- ScoredTerm *st = (ScoredTerm *)p;
161
- term_destroy(st->term);
162
- free(st);
160
+ term_destroy(self->term);
161
+ free(self);
163
162
  }
164
163
 
165
164
  ScoredTerm *scored_term_create(Term *term, float score)
@@ -181,7 +180,7 @@ Query *fuzq_rewrite(Query *self, IndexReader *ir)
181
180
  char *field = term->field;
182
181
  Term prefix_term;
183
182
  prefix_term.field = field;
184
- if (fuzq->pre_len >= strlen(text)) {
183
+ if (fuzq->pre_len >= (int)strlen(text)) {
185
184
  q = tq_create(term_clone(term));
186
185
  } else {
187
186
  PriorityQueue *term_pq;
@@ -195,7 +194,7 @@ Query *fuzq_rewrite(Query *self, IndexReader *ir)
195
194
 
196
195
  term_pq = pq_create(((BooleanQuery *)q->data)->max_clause_cnt,
197
196
  &scored_term_less_than);
198
- term_pq->free_elem = &scored_term_destroy;
197
+ term_pq->free_elem = (free_ft)&scored_term_destroy;
199
198
 
200
199
  prefix_term.field = field;
201
200
  prefix_term.text = (char *)EMPTY_STRING;
@@ -207,9 +206,9 @@ Query *fuzq_rewrite(Query *self, IndexReader *ir)
207
206
  }
208
207
  te = ir->terms_from(ir, &prefix_term);
209
208
 
210
- fuzq->scale_factor = 1.0 / (1.0 - fuzq->min_sim);
209
+ fuzq->scale_factor = (float)(1.0 / (1.0 - fuzq->min_sim));
211
210
  fuzq->text = fuzq->term->text + pre_len;
212
- fuzq->text_len = strlen(fuzq->text);
211
+ fuzq->text_len = (int)strlen(fuzq->text);
213
212
  fuzq_initialize_max_distances(fuzq);
214
213
 
215
214
  if (te) {
@@ -246,36 +245,51 @@ Query *fuzq_rewrite(Query *self, IndexReader *ir)
246
245
  pq_destroy(term_pq);
247
246
  }
248
247
 
249
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
250
- return self->rewritten = q;
248
+ return q;
251
249
  }
252
250
 
253
- void fuzq_destroy(void *p)
251
+ void fuzq_destroy(Query *self)
254
252
  {
255
- Query *self = (Query *)p;
256
253
  FuzzyQuery *fuzq = (FuzzyQuery *)self->data;
257
254
  if (self->destroy_all) term_destroy((Term *)fuzq->term);
258
255
  free(fuzq->da);
259
256
  free(fuzq);
260
- q_destroy(self);
257
+ q_destroy_i(self);
258
+ }
259
+
260
+ static uint fuzq_hash(Query *self)
261
+ {
262
+ FuzzyQuery *fq = (FuzzyQuery *)self->data;
263
+ return term_hash(fq->term) ^ *((int *)&fq->min_sim) ^ fq->pre_len;
264
+ }
265
+
266
+ static int fuzq_eq(Query *self, Query *o)
267
+ {
268
+ FuzzyQuery *fq1 = (FuzzyQuery *)self->data;
269
+ FuzzyQuery *fq2 = (FuzzyQuery *)o->data;
270
+ return term_eq(fq1->term, fq2->term) &&
271
+ (fq1->pre_len == fq2->pre_len) &&
272
+ (fq1->min_sim == fq2->min_sim);
261
273
  }
262
274
 
263
275
  Query *fuzq_create(Term *term)
264
276
  {
265
277
  Query *self = q_create();
278
+
266
279
  FuzzyQuery *fq = ALLOC(FuzzyQuery);
267
280
  ZEROSET(fq, FuzzyQuery, 1);
268
-
269
281
  fq->term = term;
270
282
  fq->pre_len = DEF_PRE_LEN;
271
283
  fq->min_sim = DEF_MIN_SIM;
272
284
  self->data = fq;
285
+
273
286
  self->type = FUZZY_QUERY;
274
- self->create_weight = NULL;
275
287
  self->to_s = &fuzq_to_s;
288
+ self->hash = &fuzq_hash;
289
+ self->eq = &fuzq_eq;
276
290
  self->rewrite = &fuzq_rewrite;
277
- self->destroy = &fuzq_destroy;
278
- self->rewritten = NULL;
291
+ self->destroy_i = &fuzq_destroy;
292
+ self->create_weight_i = &q_create_weight_unsup;
279
293
 
280
294
  return self;
281
295
  }
data/ext/q_match_all.c CHANGED
@@ -12,11 +12,6 @@ char *maw_to_s(Weight *self)
12
12
  return strfmt("MatchAllWeight(%f)", self->value);
13
13
  }
14
14
 
15
- void maw_destroy(void *p)
16
- {
17
- free(p);
18
- }
19
-
20
15
  Explanation *maw_explain(Weight *self, IndexReader *ir, int doc_num)
21
16
  {
22
17
  Explanation *expl;
@@ -34,21 +29,15 @@ Explanation *maw_explain(Weight *self, IndexReader *ir, int doc_num)
34
29
 
35
30
  Weight *maw_create(Query *query, Searcher *searcher)
36
31
  {
37
- Weight *self = ALLOC(Weight);
38
- ZEROSET(self, Weight, 1);
39
- self->get_query = &w_get_query;
40
- self->get_value = &w_get_value;
41
- self->normalize = &w_normalize;
32
+ Weight *self = w_create(query);
33
+
42
34
  self->scorer = &masc_create;
43
35
  self->explain = &maw_explain;
44
36
  self->to_s = &maw_to_s;
45
- self->destroy = &maw_destroy;
46
37
  self->sum_of_squared_weights = &w_sum_of_squared_weights;
47
38
 
48
39
  self->similarity = query->get_similarity(query, searcher);
49
40
  self->idf = 1.0;
50
- self->query = query;
51
- self->value = 0.0;
52
41
 
53
42
  return self;
54
43
  }
@@ -68,19 +57,26 @@ char *maq_to_s(Query *self, char *field)
68
57
  }
69
58
  }
70
59
 
71
- void maq_destroy(void *p)
60
+ static uint maq_hash(Query *self)
72
61
  {
73
- Query *self = (Query *)p;
74
- q_destroy(self);
62
+ return 0;
63
+ }
64
+
65
+ static int maq_eq(Query *self, Query *o)
66
+ {
67
+ return true;
75
68
  }
76
69
 
77
70
  Query *maq_create()
78
71
  {
79
72
  Query *self = q_create();
73
+
80
74
  self->type = MATCH_ALL_QUERY;
81
- self->create_weight = &maw_create;
82
75
  self->to_s = &maq_to_s;
83
- self->destroy = &maq_destroy;
76
+ self->hash = &maq_hash;
77
+ self->eq = &maq_eq;
78
+ self->destroy_i = &q_destroy_i;
79
+ self->create_weight_i = &maw_create;
84
80
 
85
81
  return self;
86
82
  }
@@ -133,6 +129,6 @@ Scorer *masc_create(Weight *weight, IndexReader *ir)
133
129
  self->next = &masc_next;
134
130
  self->skip_to = &masc_skip_to;
135
131
  self->explain = &masc_explain;
136
- self->destroy = &scorer_destroy;
132
+ self->destroy = &scorer_destroy_i;
137
133
  return self;
138
134
  }
data/ext/q_multi_phrase.c CHANGED
@@ -1,5 +1,5 @@
1
- #include <string.h>
2
1
  #include "search.h"
2
+ #include <string.h>
3
3
 
4
4
  static char * const FIELD_CHANGE_ERROR_MSG = "All phrase terms must be in the same field.";
5
5
 
@@ -19,9 +19,13 @@ Scorer *mphw_scorer(Weight *self, IndexReader *ir)
19
19
  Scorer *phsc;
20
20
  MultiPhraseQuery *mphq = (MultiPhraseQuery *)self->query->data;
21
21
  int i;
22
- if (mphq->t_cnt == 0) return NULL; // optimize zero-term case
22
+ TermDocEnum **tps;
23
+
24
+ if (mphq->t_cnt == 0) {
25
+ return NULL; /* optimize zero-term case */
26
+ }
23
27
 
24
- TermDocEnum **tps = ALLOC_N(TermDocEnum *, mphq->t_cnt);
28
+ tps = ALLOC_N(TermDocEnum *, mphq->t_cnt);
25
29
 
26
30
  for (i = 0; i < mphq->t_cnt; i++) {
27
31
  if (mphq->pt_cnt[i] == 1) {
@@ -56,6 +60,17 @@ Scorer *mphw_scorer(Weight *self, IndexReader *ir)
56
60
 
57
61
  Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
58
62
  {
63
+ Explanation *idf_expl1;
64
+ Explanation *idf_expl2;
65
+ Explanation *query_expl;
66
+ Explanation *qnorm_expl;
67
+ Explanation *field_expl;
68
+ Explanation *tf_expl;
69
+ Scorer *scorer;
70
+ uchar *field_norms;
71
+ float field_norm;
72
+ Explanation *field_norm_expl;
73
+
59
74
  char *query_str = self->query->to_s(self->query, "");
60
75
  MultiPhraseQuery *mphq = (MultiPhraseQuery *)self->query->data;
61
76
  int i, j;
@@ -67,7 +82,7 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
67
82
 
68
83
  for (i = 0; i < mphq->t_cnt; i++) {
69
84
  for (j = 0; j < mphq->pt_cnt[i]; j++) {
70
- len += strlen(mphq->terms[i][j]->text) + 30;
85
+ len += (int)strlen(mphq->terms[i][j]->text) + 30;
71
86
  }
72
87
  }
73
88
  doc_freqs = ALLOC_N(char, len);
@@ -75,20 +90,20 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
75
90
  for (j = 0; j < mphq->pt_cnt[i]; j++) {
76
91
  Term *term = mphq->terms[i][j];
77
92
  sprintf(doc_freqs + pos, "%s=%d, ", term->text, ir->doc_freq(ir, term));
78
- pos += strlen(doc_freqs + pos);
93
+ pos += (int)strlen(doc_freqs + pos);
79
94
  }
80
95
  }
81
96
  pos -= 2; // remove ", " from the end
82
97
  doc_freqs[pos] = 0;
83
98
 
84
- Explanation *idf_expl1 = expl_create(self->idf,
99
+ idf_expl1 = expl_create(self->idf,
85
100
  strfmt("idf(%s:<%s>)", mphq->field, doc_freqs));
86
- Explanation *idf_expl2 = expl_create(self->idf,
101
+ idf_expl2 = expl_create(self->idf,
87
102
  strfmt("idf(%s:<%s>)", mphq->field, doc_freqs));
88
103
  free(doc_freqs);
89
104
 
90
- // explain query weight
91
- Explanation *query_expl = expl_create(0.0,
105
+ /* explain query weight */
106
+ query_expl = expl_create(0.0,
92
107
  strfmt("query_weight(%s), product of:", query_str));
93
108
 
94
109
  if (self->query->boost != 1.0) {
@@ -96,36 +111,36 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
96
111
  }
97
112
  expl_add_detail(query_expl, idf_expl1);
98
113
 
99
- Explanation *qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
114
+ qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
100
115
  expl_add_detail(query_expl, qnorm_expl);
101
116
 
102
117
  query_expl->value = self->query->boost * self->idf * self->qnorm;
103
118
 
104
119
  expl_add_detail(expl, query_expl);
105
120
 
106
- // explain field weight
107
- Explanation *field_expl = expl_create(0.0,
121
+ /* explain field weight */
122
+ field_expl = expl_create(0.0,
108
123
  strfmt("field_weight(%s in %d), product of:", query_str, doc_num));
109
124
  free(query_str);
110
125
 
111
- Scorer *scorer = self->scorer(self, ir);
112
- Explanation *tf_expl = scorer->explain(scorer, doc_num);
126
+ scorer = self->scorer(self, ir);
127
+ tf_expl = scorer->explain(scorer, doc_num);
113
128
  scorer->destroy(scorer);
114
129
  expl_add_detail(field_expl, tf_expl);
115
130
  expl_add_detail(field_expl, idf_expl2);
116
131
 
117
- uchar *field_norms = ir->get_norms(ir, mphq->field);
118
- float field_norm = (field_norms != NULL)
132
+ field_norms = ir->get_norms(ir, mphq->field);
133
+ field_norm = (field_norms != NULL)
119
134
  ? sim_decode_norm(self->similarity, field_norms[doc_num])
120
- : 0.0;
121
- Explanation *field_norm_expl = expl_create(field_norm,
135
+ : (float)0.0;
136
+ field_norm_expl = expl_create(field_norm,
122
137
  strfmt("field_norm(field=%s, doc=%d)", mphq->field, doc_num));
123
138
 
124
139
  expl_add_detail(field_expl, field_norm_expl);
125
140
 
126
141
  field_expl->value = tf_expl->value * self->idf * field_norm;
127
142
 
128
- // combine them
143
+ /* combine them */
129
144
  if (query_expl->value == 1.0) {
130
145
  expl_destoy(expl);
131
146
  return field_expl;
@@ -136,29 +151,23 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
136
151
  }
137
152
  }
138
153
 
139
-
140
-
141
-
142
154
  Weight *mphw_create(Query *query, Searcher *searcher)
143
155
  {
156
+ Weight *self = w_create(query);
157
+
144
158
  MultiPhraseQuery *mphq = (MultiPhraseQuery *)query->data;
145
- Weight *self = ALLOC(Weight);
146
159
  int i, j;
147
- ZEROSET(self, Weight, 1);
148
- self->get_query = &w_get_query;
149
- self->get_value = &w_get_value;
150
- self->normalize = &w_normalize;
160
+
151
161
  self->scorer = &mphw_scorer;
152
162
  self->explain = &mphw_explain;
153
163
  self->to_s = &mphw_to_s;
154
- self->destroy = &free;
155
164
  self->sum_of_squared_weights = &w_sum_of_squared_weights;
156
165
 
157
166
  self->similarity = query->get_similarity(query, searcher);
158
167
  self->query = query;
159
168
  self->value = query->boost;
160
-
161
169
  self->idf = 0.0;
170
+
162
171
  for (i = 0; i < mphq->t_cnt; i++) {
163
172
  for (j = 0; j < mphq->pt_cnt[i]; j++) {
164
173
  self->idf += sim_idf_term(self->similarity, mphq->terms[i][j], searcher);
@@ -209,10 +218,8 @@ void mphq_add_terms(Query *self, Term **terms, int t_cnt, int pos_inc)
209
218
  mphq->t_cnt++;
210
219
  }
211
220
 
212
- void mphq_destroy(void *p)
221
+ void mphq_destroy(Query *self)
213
222
  {
214
- Query *self = (Query *)p;
215
-
216
223
  GET_MPHQ;
217
224
  int i, j;
218
225
  if (self->destroy_all) {
@@ -228,16 +235,16 @@ void mphq_destroy(void *p)
228
235
  free(mphq->pt_cnt);
229
236
  free(mphq);
230
237
 
231
- q_destroy(self);
238
+ q_destroy_i(self);
232
239
  }
233
240
 
234
- void mphq_extract_terms(Query *self, Array *terms)
241
+ void mphq_extract_terms(Query *self, HashSet *terms)
235
242
  {
236
243
  GET_MPHQ;
237
244
  int i, j;
238
245
  for (i = 0; i < mphq->t_cnt; i++) {
239
246
  for (j = 0; j < mphq->pt_cnt[i]; j++) {
240
- ary_append(terms, mphq->terms[i][j]);
247
+ hs_add(terms, term_clone(mphq->terms[i][j]));
241
248
  }
242
249
  }
243
250
  }
@@ -248,10 +255,10 @@ char *mphq_to_s(Query *self, char *field)
248
255
  int i, j, buf_index = 0, len = 0, pos, last_pos = -1;
249
256
  char *buffer;
250
257
  if (!mphq->t_cnt) return NULL;
251
- len = strlen(mphq->field) + 1;
258
+ len = (int)strlen(mphq->field) + 1;
252
259
  for (i = 0; i < mphq->t_cnt; i++) {
253
260
  for (j = 0; j < mphq->pt_cnt[i]; j++) {
254
- len += strlen(mphq->terms[i][j]->text) + 1;
261
+ len += (int)strlen(mphq->terms[i][j]->text) + 1;
255
262
  }
256
263
  }
257
264
 
@@ -261,7 +268,7 @@ char *mphq_to_s(Query *self, char *field)
261
268
  buffer = ALLOC_N(char, len);
262
269
 
263
270
  if (strcmp(field, mphq->field) != 0) {
264
- len = strlen(mphq->field);
271
+ len = (int)strlen(mphq->field);
265
272
  memcpy(buffer, mphq->field, len);
266
273
  buffer[len] = ':';
267
274
  buf_index += len + 1;
@@ -278,7 +285,7 @@ char *mphq_to_s(Query *self, char *field)
278
285
 
279
286
  for (j = 0; j < mphq->pt_cnt[i]; j++) {
280
287
  Term *term = mphq->terms[i][j];
281
- len = strlen(term->text);
288
+ len = (int)strlen(term->text);
282
289
  memcpy(buffer + buf_index, term->text, len);
283
290
  buf_index += len;
284
291
  buffer[buf_index++] = '|';
@@ -291,7 +298,7 @@ char *mphq_to_s(Query *self, char *field)
291
298
  buffer[buf_index] = 0;
292
299
  if (mphq->slop != 0) {
293
300
  sprintf(buffer + buf_index, "~%d", mphq->slop);
294
- buf_index += strlen(buffer + buf_index);
301
+ buf_index += (int)strlen(buffer + buf_index);
295
302
  }
296
303
  if (self->boost != 1.0) {
297
304
  buffer[buf_index] = '^';
@@ -311,16 +318,48 @@ Query *mphq_rewrite(Query *self, IndexReader *ir)
311
318
  bq_add_query(bq, tq_create(term_clone(terms[i])), BC_SHOULD);
312
319
  }
313
320
  bq->boost = self->boost;
314
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
315
- return self->rewritten = bq;
321
+ return bq;
316
322
  } else {
323
+ self->ref_cnt++;
317
324
  return self;
318
325
  }
319
326
  }
320
327
 
328
+ static uint mphq_hash(Query *self)
329
+ {
330
+ int i, j;
331
+ uint hash = 0;
332
+ MultiPhraseQuery *mphq = (MultiPhraseQuery *)self->data;
333
+ for (i = 0; i < mphq->t_cnt; i++) {
334
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
335
+ hash ^= (term_hash(mphq->terms[i][j]) ^ mphq->positions[i]);
336
+ }
337
+ hash <<= 1;
338
+ }
339
+ return (hash ^ mphq->slop);
340
+ }
341
+
342
+ static int mphq_eq(Query *self, Query *o)
343
+ {
344
+ int i, j;
345
+ MultiPhraseQuery *mphq1 = (MultiPhraseQuery *)self->data;
346
+ MultiPhraseQuery *mphq2 = (MultiPhraseQuery *)o->data;
347
+ if (mphq1->slop != mphq2->slop) return false;
348
+ for (i = 0; i < mphq1->t_cnt; i++) {
349
+ if ((mphq1->pt_cnt[i] != mphq2->pt_cnt[i]) ||
350
+ (mphq1->positions[i] != mphq2->positions[i])) return false;
351
+
352
+ for (j = 0; j < mphq1->pt_cnt[i]; j++) {
353
+ if (!term_eq(mphq1->terms[i][j], mphq2->terms[i][j])) return false;
354
+ }
355
+ }
356
+ return true;
357
+ }
358
+
321
359
  Query *mphq_create()
322
360
  {
323
361
  Query *self = q_create();
362
+
324
363
  MultiPhraseQuery *mphq = ALLOC(MultiPhraseQuery);
325
364
  ZEROSET(mphq, MultiPhraseQuery, 1);
326
365
  mphq->t_capa = PHQ_INIT_CAPA;
@@ -329,11 +368,13 @@ Query *mphq_create()
329
368
  mphq->pt_cnt = ALLOC_N(int, PHQ_INIT_CAPA);
330
369
  self->data = mphq;
331
370
 
332
- self->create_weight = &mphw_create;
371
+ self->type = MULTI_PHRASE_QUERY;
372
+ self->rewrite = &mphq_rewrite;
333
373
  self->extract_terms = &mphq_extract_terms;
334
374
  self->to_s = &mphq_to_s;
335
- self->destroy = &mphq_destroy;
336
- self->rewrite = &mphq_rewrite;
337
- self->type = PHRASE_QUERY;
375
+ self->hash = &mphq_hash;
376
+ self->eq = &mphq_eq;
377
+ self->destroy_i = &mphq_destroy;
378
+ self->create_weight_i = &mphw_create;
338
379
  return self;
339
380
  }