ferret 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/q_const_score.c CHANGED
@@ -12,11 +12,6 @@ char *csw_to_s(Weight *self)
12
12
  return strfmt("ConstantScoreWeight(%f)", self->value);
13
13
  }
14
14
 
15
- void csw_destroy(void *p)
16
- {
17
- free(p);
18
- }
19
-
20
15
  Explanation *csw_explain(Weight *self, IndexReader *ir, int doc_num)
21
16
  {
22
17
  Filter *filter = (Filter *)self->query->data;
@@ -40,21 +35,14 @@ Explanation *csw_explain(Weight *self, IndexReader *ir, int doc_num)
40
35
 
41
36
  Weight *csw_create(Query *query, Searcher *searcher)
42
37
  {
43
- Weight *self = ALLOC(Weight);
44
- ZEROSET(self, Weight, 1);
45
- self->get_query = &w_get_query;
46
- self->get_value = &w_get_value;
47
- self->normalize = &w_normalize;
38
+ Weight *self = w_create(query);
48
39
  self->scorer = &cssc_create;
49
40
  self->explain = &csw_explain;
50
41
  self->to_s = &csw_to_s;
51
- self->destroy = &csw_destroy;
52
42
  self->sum_of_squared_weights = &w_sum_of_squared_weights;
53
43
 
54
44
  self->similarity = query->get_similarity(query, searcher);
55
45
  self->idf = 1.0;
56
- self->query = query;
57
- self->value = 0.0;
58
46
 
59
47
  return self;
60
48
  }
@@ -79,24 +67,36 @@ char *csq_to_s(Query *self, char *field)
79
67
  return buffer;;
80
68
  }
81
69
 
82
- void csq_destroy(void *p)
70
+ void csq_destroy(Query *self)
83
71
  {
84
- Query *self = (Query *)p;
85
72
  if (self->destroy_all) {
86
73
  Filter *filter = (Filter *)self->data;
87
74
  filter->destroy(filter);
88
75
  }
89
- q_destroy(self);
76
+ q_destroy_i(self);
77
+ }
78
+
79
+ static uint csq_hash(Query *self)
80
+ {
81
+ return filt_hash((Filter *)self->data);
82
+ }
83
+
84
+ static int csq_eq(Query *self, Query *o)
85
+ {
86
+ return filt_eq((Filter *)self->data, (Filter *)o->data);
90
87
  }
91
88
 
92
89
  Query *csq_create(Filter *filter)
93
90
  {
94
91
  Query *self = q_create();
95
- self->type = CONSTANT_QUERY;
96
92
  self->data = filter;
97
- self->create_weight = &csw_create;
93
+
94
+ self->type = CONSTANT_QUERY;
98
95
  self->to_s = &csq_to_s;
99
- self->destroy = &csq_destroy;
96
+ self->hash = &csq_hash;
97
+ self->eq = &csq_eq;
98
+ self->destroy_i = &csq_destroy;
99
+ self->create_weight_i = &csw_create;
100
100
 
101
101
  return self;
102
102
  }
@@ -143,6 +143,6 @@ Scorer *cssc_create(Weight *weight, IndexReader *ir)
143
143
  self->next = &cssc_next;
144
144
  self->skip_to = &cssc_skip_to;
145
145
  self->explain = &cssc_explain;
146
- self->destroy = &scorer_destroy;
146
+ self->destroy = &scorer_destroy_i;
147
147
  return self;
148
148
  }
@@ -14,11 +14,6 @@ char *fqw_to_s(Weight *self)
14
14
  return strfmt("FilteredQueryWeight(%f)", self->value);
15
15
  }
16
16
 
17
- void fqw_destroy(void *p)
18
- {
19
- free(p);
20
- }
21
-
22
17
  float fqw_sum_of_squared_weights(Weight *self)
23
18
  {
24
19
  Weight *sw = (Weight *)self->data;
@@ -28,7 +23,7 @@ float fqw_sum_of_squared_weights(Weight *self)
28
23
  void fqw_normalize(Weight *self, float normalization_factor)
29
24
  {
30
25
  Weight *sw = (Weight *)self->data;
31
- return sw->normalize(sw, normalization_factor);
26
+ sw->normalize(sw, normalization_factor);
32
27
  }
33
28
 
34
29
  float fqw_get_value(Weight *self)
@@ -52,13 +47,19 @@ Scorer *fqw_scorer(Weight *self, IndexReader *ir)
52
47
  return fqsc_create(scorer, filter->get_bv(filter, ir), self->similarity);
53
48
  }
54
49
 
50
+ void fqw_destroy(Weight *self)
51
+ {
52
+ Weight *sw = (Weight *)self->data;
53
+ sw->destroy(sw);
54
+ w_destroy(self);
55
+ }
56
+
55
57
  Weight *fqw_create(Query *query, Weight *sub_weight, Similarity *sim)
56
58
  {
57
- Weight *self = ALLOC(Weight);
58
- ZEROSET(self, Weight, 1);
59
+ Weight *self = w_create(query);
60
+
59
61
  self->data = sub_weight;
60
62
 
61
- self->get_query = &w_get_query;
62
63
  self->get_value = &fqw_get_value;
63
64
  self->normalize = &fqw_normalize;
64
65
  self->scorer = &fqw_scorer;
@@ -69,7 +70,6 @@ Weight *fqw_create(Query *query, Weight *sub_weight, Similarity *sim)
69
70
 
70
71
  self->similarity = sim;
71
72
  self->idf = 1.0;
72
- self->query = query;
73
73
  self->value = sub_weight->value;
74
74
 
75
75
  return self;
@@ -99,16 +99,15 @@ char *fq_to_s(Query *self, char *field)
99
99
  return buffer;;
100
100
  }
101
101
 
102
- void fq_destroy(void *p)
102
+ void fq_destroy(Query *self)
103
103
  {
104
- Query *self = (Query *)p;
105
104
  if (self->destroy_all) {
106
105
  FilteredQuery *fq = (FilteredQuery *)self->data;
107
106
  fq->filter->destroy(fq->filter);
108
- fq->query->destroy(fq->query);
107
+ q_deref(fq->query);
109
108
  }
110
109
  free(self->data);
111
- q_destroy(self);
110
+ q_destroy_i(self);
112
111
  }
113
112
 
114
113
  Weight *fq_create_weight(Query *self, Searcher *searcher)
@@ -121,14 +120,16 @@ Weight *fq_create_weight(Query *self, Searcher *searcher)
121
120
  Query *fq_create(Query *query, Filter *filter)
122
121
  {
123
122
  Query *self = q_create();
123
+
124
124
  FilteredQuery *fq = ALLOC(FilteredQuery);
125
125
  fq->query = query;
126
126
  fq->filter = filter;
127
- self->type = FILTERED_QUERY;
128
127
  self->data = fq;
129
- self->create_weight = &fq_create_weight;
128
+
129
+ self->type = FILTERED_QUERY;
130
130
  self->to_s = &fq_to_s;
131
- self->destroy = &fq_destroy;
131
+ self->destroy_i = &fq_destroy;
132
+ self->create_weight_i = &fq_create_weight;
132
133
 
133
134
  return self;
134
135
  }
@@ -180,13 +181,12 @@ Explanation *fqsc_explain(Scorer *self, int doc_num)
180
181
  return sub_sc->explain(sub_sc, doc_num);
181
182
  }
182
183
 
183
- void fqsc_destroy(void *p)
184
+ void fqsc_destroy(Scorer *self)
184
185
  {
185
- Scorer *self = (Scorer *)p;
186
186
  FilteredQueryScorer *fqsc = (FilteredQueryScorer *)self->data;
187
187
  bv_destroy(fqsc->bv);
188
188
  fqsc->sub_scorer->destroy(fqsc->sub_scorer);
189
- scorer_destroy(self);
189
+ scorer_destroy_i(self);
190
190
  }
191
191
 
192
192
  Scorer *fqsc_create(Scorer *scorer, BitVector *bv, Similarity *sim)
data/ext/q_fuzzy.c CHANGED
@@ -24,7 +24,7 @@ void fuzq_initialize_max_distances(FuzzyQuery *fuzq)
24
24
  }
25
25
  }
26
26
 
27
- float fuzq_get_max_distance(FuzzyQuery *fuzq, int m)
27
+ int fuzq_get_max_distance(FuzzyQuery *fuzq, int m)
28
28
  {
29
29
  return (m < TYPICAL_LONGEST_WORD) ? fuzq->max_distances[m]
30
30
  : fuzq_calculate_max_distance(fuzq, m);
@@ -34,7 +34,7 @@ float fuzq_score(FuzzyQuery *fuzq, char *target)
34
34
  {
35
35
  int i, j;
36
36
  int max_distance;
37
- int m = strlen(target);
37
+ int m = (int)strlen(target);
38
38
  int n = fuzq->text_len;
39
39
  int *d = fuzq->da;
40
40
  char *text = fuzq->text;
@@ -82,7 +82,7 @@ float fuzq_score(FuzzyQuery *fuzq, char *target)
82
82
  } else {
83
83
  d[i + m*j] = min3(d[i-1 + m*j]+1, d[i + m*(j-1)]+1, d[i-1 + m*(j-1)]);
84
84
  }
85
- best_pos_ed_dist = min(best_pos_ed_dist, d[i + m*j]);
85
+ best_pos_ed_dist = min2(best_pos_ed_dist, d[i + m*j]);
86
86
  }
87
87
  //printf("(bped = %d, i = %d, md = %d)", best_pos_ed_dist, i, max_distance);
88
88
 
@@ -104,7 +104,7 @@ float fuzq_score(FuzzyQuery *fuzq, char *target)
104
104
  * number of characters in the shorter word. but this was the formula that
105
105
  * was previously used in FuzzyTermEnum, so it has not been changed (even
106
106
  * though min_sim must be greater than 0.0) */
107
- return 1.0f - ((float)d[n + m*m] / (float) (fuzq->pre_len + min(n, m)));
107
+ return 1.0f - ((float)d[n + m*m] / (float) (fuzq->pre_len + min2(n, m)));
108
108
  }
109
109
 
110
110
  /****************************************************************************
@@ -118,8 +118,8 @@ char *fuzq_to_s(Query *self, char *field)
118
118
  char *buffer, *bptr;
119
119
  FuzzyQuery *fuzq = (FuzzyQuery *)self->data;
120
120
  Term *term = fuzq->term;
121
- int tlen = strlen(term->text);
122
- int flen = strlen(term->field);
121
+ int tlen = (int)strlen(term->text);
122
+ int flen = (int)strlen(term->field);
123
123
  bptr = buffer = ALLOC_N(char, tlen + flen + 35);
124
124
 
125
125
  if (strcmp(term->field, field) != 0) {
@@ -155,11 +155,10 @@ bool scored_term_less_than(void *p1, void *p2)
155
155
  return (st1->score < st2->score);
156
156
  }
157
157
 
158
- void scored_term_destroy(void *p)
158
+ void scored_term_destroy(ScoredTerm *self)
159
159
  {
160
- ScoredTerm *st = (ScoredTerm *)p;
161
- term_destroy(st->term);
162
- free(st);
160
+ term_destroy(self->term);
161
+ free(self);
163
162
  }
164
163
 
165
164
  ScoredTerm *scored_term_create(Term *term, float score)
@@ -181,7 +180,7 @@ Query *fuzq_rewrite(Query *self, IndexReader *ir)
181
180
  char *field = term->field;
182
181
  Term prefix_term;
183
182
  prefix_term.field = field;
184
- if (fuzq->pre_len >= strlen(text)) {
183
+ if (fuzq->pre_len >= (int)strlen(text)) {
185
184
  q = tq_create(term_clone(term));
186
185
  } else {
187
186
  PriorityQueue *term_pq;
@@ -195,7 +194,7 @@ Query *fuzq_rewrite(Query *self, IndexReader *ir)
195
194
 
196
195
  term_pq = pq_create(((BooleanQuery *)q->data)->max_clause_cnt,
197
196
  &scored_term_less_than);
198
- term_pq->free_elem = &scored_term_destroy;
197
+ term_pq->free_elem = (free_ft)&scored_term_destroy;
199
198
 
200
199
  prefix_term.field = field;
201
200
  prefix_term.text = (char *)EMPTY_STRING;
@@ -207,9 +206,9 @@ Query *fuzq_rewrite(Query *self, IndexReader *ir)
207
206
  }
208
207
  te = ir->terms_from(ir, &prefix_term);
209
208
 
210
- fuzq->scale_factor = 1.0 / (1.0 - fuzq->min_sim);
209
+ fuzq->scale_factor = (float)(1.0 / (1.0 - fuzq->min_sim));
211
210
  fuzq->text = fuzq->term->text + pre_len;
212
- fuzq->text_len = strlen(fuzq->text);
211
+ fuzq->text_len = (int)strlen(fuzq->text);
213
212
  fuzq_initialize_max_distances(fuzq);
214
213
 
215
214
  if (te) {
@@ -246,36 +245,51 @@ Query *fuzq_rewrite(Query *self, IndexReader *ir)
246
245
  pq_destroy(term_pq);
247
246
  }
248
247
 
249
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
250
- return self->rewritten = q;
248
+ return q;
251
249
  }
252
250
 
253
- void fuzq_destroy(void *p)
251
+ void fuzq_destroy(Query *self)
254
252
  {
255
- Query *self = (Query *)p;
256
253
  FuzzyQuery *fuzq = (FuzzyQuery *)self->data;
257
254
  if (self->destroy_all) term_destroy((Term *)fuzq->term);
258
255
  free(fuzq->da);
259
256
  free(fuzq);
260
- q_destroy(self);
257
+ q_destroy_i(self);
258
+ }
259
+
260
+ static uint fuzq_hash(Query *self)
261
+ {
262
+ FuzzyQuery *fq = (FuzzyQuery *)self->data;
263
+ return term_hash(fq->term) ^ *((int *)&fq->min_sim) ^ fq->pre_len;
264
+ }
265
+
266
+ static int fuzq_eq(Query *self, Query *o)
267
+ {
268
+ FuzzyQuery *fq1 = (FuzzyQuery *)self->data;
269
+ FuzzyQuery *fq2 = (FuzzyQuery *)o->data;
270
+ return term_eq(fq1->term, fq2->term) &&
271
+ (fq1->pre_len == fq2->pre_len) &&
272
+ (fq1->min_sim == fq2->min_sim);
261
273
  }
262
274
 
263
275
  Query *fuzq_create(Term *term)
264
276
  {
265
277
  Query *self = q_create();
278
+
266
279
  FuzzyQuery *fq = ALLOC(FuzzyQuery);
267
280
  ZEROSET(fq, FuzzyQuery, 1);
268
-
269
281
  fq->term = term;
270
282
  fq->pre_len = DEF_PRE_LEN;
271
283
  fq->min_sim = DEF_MIN_SIM;
272
284
  self->data = fq;
285
+
273
286
  self->type = FUZZY_QUERY;
274
- self->create_weight = NULL;
275
287
  self->to_s = &fuzq_to_s;
288
+ self->hash = &fuzq_hash;
289
+ self->eq = &fuzq_eq;
276
290
  self->rewrite = &fuzq_rewrite;
277
- self->destroy = &fuzq_destroy;
278
- self->rewritten = NULL;
291
+ self->destroy_i = &fuzq_destroy;
292
+ self->create_weight_i = &q_create_weight_unsup;
279
293
 
280
294
  return self;
281
295
  }
data/ext/q_match_all.c CHANGED
@@ -12,11 +12,6 @@ char *maw_to_s(Weight *self)
12
12
  return strfmt("MatchAllWeight(%f)", self->value);
13
13
  }
14
14
 
15
- void maw_destroy(void *p)
16
- {
17
- free(p);
18
- }
19
-
20
15
  Explanation *maw_explain(Weight *self, IndexReader *ir, int doc_num)
21
16
  {
22
17
  Explanation *expl;
@@ -34,21 +29,15 @@ Explanation *maw_explain(Weight *self, IndexReader *ir, int doc_num)
34
29
 
35
30
  Weight *maw_create(Query *query, Searcher *searcher)
36
31
  {
37
- Weight *self = ALLOC(Weight);
38
- ZEROSET(self, Weight, 1);
39
- self->get_query = &w_get_query;
40
- self->get_value = &w_get_value;
41
- self->normalize = &w_normalize;
32
+ Weight *self = w_create(query);
33
+
42
34
  self->scorer = &masc_create;
43
35
  self->explain = &maw_explain;
44
36
  self->to_s = &maw_to_s;
45
- self->destroy = &maw_destroy;
46
37
  self->sum_of_squared_weights = &w_sum_of_squared_weights;
47
38
 
48
39
  self->similarity = query->get_similarity(query, searcher);
49
40
  self->idf = 1.0;
50
- self->query = query;
51
- self->value = 0.0;
52
41
 
53
42
  return self;
54
43
  }
@@ -68,19 +57,26 @@ char *maq_to_s(Query *self, char *field)
68
57
  }
69
58
  }
70
59
 
71
- void maq_destroy(void *p)
60
+ static uint maq_hash(Query *self)
72
61
  {
73
- Query *self = (Query *)p;
74
- q_destroy(self);
62
+ return 0;
63
+ }
64
+
65
+ static int maq_eq(Query *self, Query *o)
66
+ {
67
+ return true;
75
68
  }
76
69
 
77
70
  Query *maq_create()
78
71
  {
79
72
  Query *self = q_create();
73
+
80
74
  self->type = MATCH_ALL_QUERY;
81
- self->create_weight = &maw_create;
82
75
  self->to_s = &maq_to_s;
83
- self->destroy = &maq_destroy;
76
+ self->hash = &maq_hash;
77
+ self->eq = &maq_eq;
78
+ self->destroy_i = &q_destroy_i;
79
+ self->create_weight_i = &maw_create;
84
80
 
85
81
  return self;
86
82
  }
@@ -133,6 +129,6 @@ Scorer *masc_create(Weight *weight, IndexReader *ir)
133
129
  self->next = &masc_next;
134
130
  self->skip_to = &masc_skip_to;
135
131
  self->explain = &masc_explain;
136
- self->destroy = &scorer_destroy;
132
+ self->destroy = &scorer_destroy_i;
137
133
  return self;
138
134
  }
data/ext/q_multi_phrase.c CHANGED
@@ -1,5 +1,5 @@
1
- #include <string.h>
2
1
  #include "search.h"
2
+ #include <string.h>
3
3
 
4
4
  static char * const FIELD_CHANGE_ERROR_MSG = "All phrase terms must be in the same field.";
5
5
 
@@ -19,9 +19,13 @@ Scorer *mphw_scorer(Weight *self, IndexReader *ir)
19
19
  Scorer *phsc;
20
20
  MultiPhraseQuery *mphq = (MultiPhraseQuery *)self->query->data;
21
21
  int i;
22
- if (mphq->t_cnt == 0) return NULL; // optimize zero-term case
22
+ TermDocEnum **tps;
23
+
24
+ if (mphq->t_cnt == 0) {
25
+ return NULL; /* optimize zero-term case */
26
+ }
23
27
 
24
- TermDocEnum **tps = ALLOC_N(TermDocEnum *, mphq->t_cnt);
28
+ tps = ALLOC_N(TermDocEnum *, mphq->t_cnt);
25
29
 
26
30
  for (i = 0; i < mphq->t_cnt; i++) {
27
31
  if (mphq->pt_cnt[i] == 1) {
@@ -56,6 +60,17 @@ Scorer *mphw_scorer(Weight *self, IndexReader *ir)
56
60
 
57
61
  Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
58
62
  {
63
+ Explanation *idf_expl1;
64
+ Explanation *idf_expl2;
65
+ Explanation *query_expl;
66
+ Explanation *qnorm_expl;
67
+ Explanation *field_expl;
68
+ Explanation *tf_expl;
69
+ Scorer *scorer;
70
+ uchar *field_norms;
71
+ float field_norm;
72
+ Explanation *field_norm_expl;
73
+
59
74
  char *query_str = self->query->to_s(self->query, "");
60
75
  MultiPhraseQuery *mphq = (MultiPhraseQuery *)self->query->data;
61
76
  int i, j;
@@ -67,7 +82,7 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
67
82
 
68
83
  for (i = 0; i < mphq->t_cnt; i++) {
69
84
  for (j = 0; j < mphq->pt_cnt[i]; j++) {
70
- len += strlen(mphq->terms[i][j]->text) + 30;
85
+ len += (int)strlen(mphq->terms[i][j]->text) + 30;
71
86
  }
72
87
  }
73
88
  doc_freqs = ALLOC_N(char, len);
@@ -75,20 +90,20 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
75
90
  for (j = 0; j < mphq->pt_cnt[i]; j++) {
76
91
  Term *term = mphq->terms[i][j];
77
92
  sprintf(doc_freqs + pos, "%s=%d, ", term->text, ir->doc_freq(ir, term));
78
- pos += strlen(doc_freqs + pos);
93
+ pos += (int)strlen(doc_freqs + pos);
79
94
  }
80
95
  }
81
96
  pos -= 2; // remove ", " from the end
82
97
  doc_freqs[pos] = 0;
83
98
 
84
- Explanation *idf_expl1 = expl_create(self->idf,
99
+ idf_expl1 = expl_create(self->idf,
85
100
  strfmt("idf(%s:<%s>)", mphq->field, doc_freqs));
86
- Explanation *idf_expl2 = expl_create(self->idf,
101
+ idf_expl2 = expl_create(self->idf,
87
102
  strfmt("idf(%s:<%s>)", mphq->field, doc_freqs));
88
103
  free(doc_freqs);
89
104
 
90
- // explain query weight
91
- Explanation *query_expl = expl_create(0.0,
105
+ /* explain query weight */
106
+ query_expl = expl_create(0.0,
92
107
  strfmt("query_weight(%s), product of:", query_str));
93
108
 
94
109
  if (self->query->boost != 1.0) {
@@ -96,36 +111,36 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
96
111
  }
97
112
  expl_add_detail(query_expl, idf_expl1);
98
113
 
99
- Explanation *qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
114
+ qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
100
115
  expl_add_detail(query_expl, qnorm_expl);
101
116
 
102
117
  query_expl->value = self->query->boost * self->idf * self->qnorm;
103
118
 
104
119
  expl_add_detail(expl, query_expl);
105
120
 
106
- // explain field weight
107
- Explanation *field_expl = expl_create(0.0,
121
+ /* explain field weight */
122
+ field_expl = expl_create(0.0,
108
123
  strfmt("field_weight(%s in %d), product of:", query_str, doc_num));
109
124
  free(query_str);
110
125
 
111
- Scorer *scorer = self->scorer(self, ir);
112
- Explanation *tf_expl = scorer->explain(scorer, doc_num);
126
+ scorer = self->scorer(self, ir);
127
+ tf_expl = scorer->explain(scorer, doc_num);
113
128
  scorer->destroy(scorer);
114
129
  expl_add_detail(field_expl, tf_expl);
115
130
  expl_add_detail(field_expl, idf_expl2);
116
131
 
117
- uchar *field_norms = ir->get_norms(ir, mphq->field);
118
- float field_norm = (field_norms != NULL)
132
+ field_norms = ir->get_norms(ir, mphq->field);
133
+ field_norm = (field_norms != NULL)
119
134
  ? sim_decode_norm(self->similarity, field_norms[doc_num])
120
- : 0.0;
121
- Explanation *field_norm_expl = expl_create(field_norm,
135
+ : (float)0.0;
136
+ field_norm_expl = expl_create(field_norm,
122
137
  strfmt("field_norm(field=%s, doc=%d)", mphq->field, doc_num));
123
138
 
124
139
  expl_add_detail(field_expl, field_norm_expl);
125
140
 
126
141
  field_expl->value = tf_expl->value * self->idf * field_norm;
127
142
 
128
- // combine them
143
+ /* combine them */
129
144
  if (query_expl->value == 1.0) {
130
145
  expl_destoy(expl);
131
146
  return field_expl;
@@ -136,29 +151,23 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
136
151
  }
137
152
  }
138
153
 
139
-
140
-
141
-
142
154
  Weight *mphw_create(Query *query, Searcher *searcher)
143
155
  {
156
+ Weight *self = w_create(query);
157
+
144
158
  MultiPhraseQuery *mphq = (MultiPhraseQuery *)query->data;
145
- Weight *self = ALLOC(Weight);
146
159
  int i, j;
147
- ZEROSET(self, Weight, 1);
148
- self->get_query = &w_get_query;
149
- self->get_value = &w_get_value;
150
- self->normalize = &w_normalize;
160
+
151
161
  self->scorer = &mphw_scorer;
152
162
  self->explain = &mphw_explain;
153
163
  self->to_s = &mphw_to_s;
154
- self->destroy = &free;
155
164
  self->sum_of_squared_weights = &w_sum_of_squared_weights;
156
165
 
157
166
  self->similarity = query->get_similarity(query, searcher);
158
167
  self->query = query;
159
168
  self->value = query->boost;
160
-
161
169
  self->idf = 0.0;
170
+
162
171
  for (i = 0; i < mphq->t_cnt; i++) {
163
172
  for (j = 0; j < mphq->pt_cnt[i]; j++) {
164
173
  self->idf += sim_idf_term(self->similarity, mphq->terms[i][j], searcher);
@@ -209,10 +218,8 @@ void mphq_add_terms(Query *self, Term **terms, int t_cnt, int pos_inc)
209
218
  mphq->t_cnt++;
210
219
  }
211
220
 
212
- void mphq_destroy(void *p)
221
+ void mphq_destroy(Query *self)
213
222
  {
214
- Query *self = (Query *)p;
215
-
216
223
  GET_MPHQ;
217
224
  int i, j;
218
225
  if (self->destroy_all) {
@@ -228,16 +235,16 @@ void mphq_destroy(void *p)
228
235
  free(mphq->pt_cnt);
229
236
  free(mphq);
230
237
 
231
- q_destroy(self);
238
+ q_destroy_i(self);
232
239
  }
233
240
 
234
- void mphq_extract_terms(Query *self, Array *terms)
241
+ void mphq_extract_terms(Query *self, HashSet *terms)
235
242
  {
236
243
  GET_MPHQ;
237
244
  int i, j;
238
245
  for (i = 0; i < mphq->t_cnt; i++) {
239
246
  for (j = 0; j < mphq->pt_cnt[i]; j++) {
240
- ary_append(terms, mphq->terms[i][j]);
247
+ hs_add(terms, term_clone(mphq->terms[i][j]));
241
248
  }
242
249
  }
243
250
  }
@@ -248,10 +255,10 @@ char *mphq_to_s(Query *self, char *field)
248
255
  int i, j, buf_index = 0, len = 0, pos, last_pos = -1;
249
256
  char *buffer;
250
257
  if (!mphq->t_cnt) return NULL;
251
- len = strlen(mphq->field) + 1;
258
+ len = (int)strlen(mphq->field) + 1;
252
259
  for (i = 0; i < mphq->t_cnt; i++) {
253
260
  for (j = 0; j < mphq->pt_cnt[i]; j++) {
254
- len += strlen(mphq->terms[i][j]->text) + 1;
261
+ len += (int)strlen(mphq->terms[i][j]->text) + 1;
255
262
  }
256
263
  }
257
264
 
@@ -261,7 +268,7 @@ char *mphq_to_s(Query *self, char *field)
261
268
  buffer = ALLOC_N(char, len);
262
269
 
263
270
  if (strcmp(field, mphq->field) != 0) {
264
- len = strlen(mphq->field);
271
+ len = (int)strlen(mphq->field);
265
272
  memcpy(buffer, mphq->field, len);
266
273
  buffer[len] = ':';
267
274
  buf_index += len + 1;
@@ -278,7 +285,7 @@ char *mphq_to_s(Query *self, char *field)
278
285
 
279
286
  for (j = 0; j < mphq->pt_cnt[i]; j++) {
280
287
  Term *term = mphq->terms[i][j];
281
- len = strlen(term->text);
288
+ len = (int)strlen(term->text);
282
289
  memcpy(buffer + buf_index, term->text, len);
283
290
  buf_index += len;
284
291
  buffer[buf_index++] = '|';
@@ -291,7 +298,7 @@ char *mphq_to_s(Query *self, char *field)
291
298
  buffer[buf_index] = 0;
292
299
  if (mphq->slop != 0) {
293
300
  sprintf(buffer + buf_index, "~%d", mphq->slop);
294
- buf_index += strlen(buffer + buf_index);
301
+ buf_index += (int)strlen(buffer + buf_index);
295
302
  }
296
303
  if (self->boost != 1.0) {
297
304
  buffer[buf_index] = '^';
@@ -311,16 +318,48 @@ Query *mphq_rewrite(Query *self, IndexReader *ir)
311
318
  bq_add_query(bq, tq_create(term_clone(terms[i])), BC_SHOULD);
312
319
  }
313
320
  bq->boost = self->boost;
314
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
315
- return self->rewritten = bq;
321
+ return bq;
316
322
  } else {
323
+ self->ref_cnt++;
317
324
  return self;
318
325
  }
319
326
  }
320
327
 
328
+ static uint mphq_hash(Query *self)
329
+ {
330
+ int i, j;
331
+ uint hash = 0;
332
+ MultiPhraseQuery *mphq = (MultiPhraseQuery *)self->data;
333
+ for (i = 0; i < mphq->t_cnt; i++) {
334
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
335
+ hash ^= (term_hash(mphq->terms[i][j]) ^ mphq->positions[i]);
336
+ }
337
+ hash <<= 1;
338
+ }
339
+ return (hash ^ mphq->slop);
340
+ }
341
+
342
+ static int mphq_eq(Query *self, Query *o)
343
+ {
344
+ int i, j;
345
+ MultiPhraseQuery *mphq1 = (MultiPhraseQuery *)self->data;
346
+ MultiPhraseQuery *mphq2 = (MultiPhraseQuery *)o->data;
347
+ if (mphq1->slop != mphq2->slop) return false;
348
+ for (i = 0; i < mphq1->t_cnt; i++) {
349
+ if ((mphq1->pt_cnt[i] != mphq2->pt_cnt[i]) ||
350
+ (mphq1->positions[i] != mphq2->positions[i])) return false;
351
+
352
+ for (j = 0; j < mphq1->pt_cnt[i]; j++) {
353
+ if (!term_eq(mphq1->terms[i][j], mphq2->terms[i][j])) return false;
354
+ }
355
+ }
356
+ return true;
357
+ }
358
+
321
359
  Query *mphq_create()
322
360
  {
323
361
  Query *self = q_create();
362
+
324
363
  MultiPhraseQuery *mphq = ALLOC(MultiPhraseQuery);
325
364
  ZEROSET(mphq, MultiPhraseQuery, 1);
326
365
  mphq->t_capa = PHQ_INIT_CAPA;
@@ -329,11 +368,13 @@ Query *mphq_create()
329
368
  mphq->pt_cnt = ALLOC_N(int, PHQ_INIT_CAPA);
330
369
  self->data = mphq;
331
370
 
332
- self->create_weight = &mphw_create;
371
+ self->type = MULTI_PHRASE_QUERY;
372
+ self->rewrite = &mphq_rewrite;
333
373
  self->extract_terms = &mphq_extract_terms;
334
374
  self->to_s = &mphq_to_s;
335
- self->destroy = &mphq_destroy;
336
- self->rewrite = &mphq_rewrite;
337
- self->type = PHRASE_QUERY;
375
+ self->hash = &mphq_hash;
376
+ self->eq = &mphq_eq;
377
+ self->destroy_i = &mphq_destroy;
378
+ self->create_weight_i = &mphw_create;
338
379
  return self;
339
380
  }