ferret 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/lang.c CHANGED
@@ -29,3 +29,13 @@ void ft_raise(char *file, int line_num, VALUE etype, const char *fmt, ...)
29
29
  sprintf(buf_ptr, "\n");
30
30
  rb_raise(etype, buf); /* conventional value for failed execution */
31
31
  }
32
+
33
+ #ifdef WIN32
34
+ void eprintf(VALUE etype, const char *fmt, ...)
35
+ {
36
+ va_list args;
37
+ va_start(args, fmt);
38
+ ft_raise("Windows", -1, etype, fmt, args);
39
+ va_end(args);
40
+ }
41
+ #endif
data/ext/lang.h CHANGED
@@ -7,7 +7,17 @@
7
7
  #define FERRET_EXT
8
8
 
9
9
  #define MAX_ERROR_LEN 2048
10
- #define eprintf(...) ft_raise(__FILE__, __LINE__, __VA_ARGS__)
10
+
11
+ typedef LONG_LONG llong;
12
+ typedef unsigned LONG_LONG ullong;
13
+
14
+ #ifdef WIN32
15
+ # undef close
16
+ # undef rename
17
+ extern void eprintf(VALUE etype, const char *fmt, ...);
18
+ #else
19
+ # define eprintf(...) ft_raise(__FILE__, __LINE__, __VA_ARGS__)
20
+ #endif
11
21
  extern void ft_raise(char *file, int line_num, VALUE etype, const char *fmt, ...);
12
22
  extern void weprintf(const char *fmt, ...);
13
23
  extern char *progname(void);
data/ext/nix_io.c ADDED
@@ -0,0 +1,135 @@
1
+ #ifndef WIN32
2
+
3
+ #include "global.h"
4
+ #include "store.h"
5
+ #include <sys/dir.h>
6
+ #include <dirent.h>
7
+ #include <unistd.h>
8
+ #include <string.h>
9
+ #include <errno.h>
10
+ #include <fcntl.h>
11
+
12
+ /**
13
+ * Create a filepath for a file in the store using the operating systems
14
+ * default file seperator.
15
+ */
16
+ char *join_path(char *buf, const char *base, const char *filename)
17
+ {
18
+ sprintf(buf, "%s/%s", base, filename);
19
+ return buf;
20
+ }
21
+
22
+ bool exists(char *path)
23
+ {
24
+ int fd = open(path, 0);
25
+ if (fd < 0) {
26
+ if (errno != ENOENT) {
27
+ RAISE(IO_ERROR, strerror(errno));
28
+ }
29
+ return false;
30
+ }
31
+ close(fd);
32
+ return true;
33
+ }
34
+
35
+ int fcount(char *path)
36
+ {
37
+ int cnt = 0;
38
+ struct dirent *de;
39
+ DIR *d = opendir(path);
40
+
41
+ if (!d) RAISE(IO_ERROR, strerror(errno));
42
+
43
+ while ((de = readdir(d)) != NULL) {
44
+ if (de->d_name[0] != '.') {
45
+ cnt++;
46
+ }
47
+ }
48
+ closedir(d);
49
+
50
+ return cnt;
51
+ }
52
+
53
+ void dir_each(char *path, void (*func)(char *fname, void *arg), void *arg)
54
+ {
55
+ struct dirent *de;
56
+ DIR *d = opendir(path);
57
+
58
+ if (!d) RAISE(IO_ERROR, strerror(errno));
59
+
60
+ while ((de = readdir(d)) != NULL) {
61
+ if (de->d_name[0] != '.' && !file_is_lock(de->d_name)) {
62
+ func(de->d_name, arg);
63
+ }
64
+ }
65
+ closedir(d);
66
+ }
67
+
68
+ /**
69
+ * Clear all the locks in the store.
70
+ *
71
+ * @param store the store to clear the locks from
72
+ * @throws IO_ERROR if there is an error opening the directory
73
+ */
74
+ void fs_clear_locks(Store *store)
75
+ {
76
+ struct dirent *de;
77
+ DIR *d = opendir(store->dir.path);
78
+
79
+ if (!d) RAISE(IO_ERROR, strerror(errno));
80
+
81
+ while ((de = readdir(d)) != NULL) {
82
+ if (file_is_lock(de->d_name)) {
83
+ char buf[MAX_FILE_PATH];
84
+ remove(join_path(buf, store->dir.path, de->d_name));
85
+ }
86
+ }
87
+ closedir(d);
88
+ }
89
+
90
+ /**
91
+ * Clear all files from the store except the lock files.
92
+ *
93
+ * @param store the store to clear all the files from
94
+ * @throws IO_ERROR if there is an error deleting the files
95
+ */
96
+ void fs_clear(Store *store)
97
+ {
98
+ struct dirent *de;
99
+ DIR *d = opendir(store->dir.path);
100
+
101
+ if (!d) RAISE(IO_ERROR, strerror(errno));
102
+
103
+ while ((de = readdir(d)) != NULL) {
104
+ if (de->d_name[0] != '.' && !file_is_lock(de->d_name)) {
105
+ char buf[MAX_FILE_PATH];
106
+ remove(join_path(buf, store->dir.path, de->d_name));
107
+ }
108
+ }
109
+ closedir(d);
110
+ }
111
+
112
+ /**
113
+ * Clear all files from the store including the lock files.
114
+ *
115
+ * @param store the store to clear all the files from
116
+ * @throws IO_ERROR if there is an error deleting the files
117
+ */
118
+ void fs_clear_all(Store *store)
119
+ {
120
+ struct dirent *de;
121
+ DIR *d = opendir(store->dir.path);
122
+
123
+ if (!d) RAISE(IO_ERROR, strerror(errno));
124
+
125
+ while ((de = readdir(d)) != NULL) {
126
+ if (de->d_name[0] != '.') {
127
+ char buf[MAX_FILE_PATH];
128
+ remove(join_path(buf, store->dir.path, de->d_name));
129
+ }
130
+ }
131
+ closedir(d);
132
+ }
133
+
134
+ #endif
135
+
data/ext/priorityqueue.c CHANGED
@@ -11,20 +11,20 @@ PriorityQueue *pq_create(int max_size, bool (*less_than)(void *p1, void *p2))
11
11
  return pq;
12
12
  }
13
13
 
14
- void pq_destroy(void *p)
14
+ void pq_destroy(PriorityQueue *pq)
15
15
  {
16
- PriorityQueue *pq = (PriorityQueue *)p;
17
16
  free(pq->heap);
18
- free(p);
17
+ free(pq);
19
18
  }
20
19
 
21
20
  void pq_up(PriorityQueue *pq)
22
21
  {
23
- int i,j;
24
- i = pq->count;
25
- j = i >> 1;
26
- void **heap = pq->heap;
27
- void *node = heap[i];
22
+ void **heap = pq->heap;
23
+ void *node;
24
+ int i = pq->count;
25
+ int j = i >> 1;
26
+
27
+ node = heap[i];
28
28
 
29
29
  while ((j > 0) && pq->lt(node, heap[j])) {
30
30
  heap[i] = heap[j];
@@ -117,7 +117,7 @@ int pq_insert(PriorityQueue *pq, void *elem)
117
117
 
118
118
  PriorityQueue2 *pq2_create(int max_size,
119
119
  bool (*less_than)(PriorityQueue2 *pq, void *p1, void *p2),
120
- void (*destroy)(void *p))
120
+ free_ft destroy)
121
121
  {
122
122
  PriorityQueue2 *pq = ALLOC(PriorityQueue2);
123
123
  pq->count = 0;
@@ -129,20 +129,20 @@ PriorityQueue2 *pq2_create(int max_size,
129
129
  return pq;
130
130
  }
131
131
 
132
- void pq2_destroy(void *p)
132
+ void pq2_destroy(PriorityQueue2 *pq)
133
133
  {
134
- PriorityQueue2 *pq = (PriorityQueue2 *)p;
135
134
  free(pq->heap);
136
- free(p);
135
+ free(pq);
137
136
  }
138
137
 
139
138
  void pq2_up(PriorityQueue2 *pq)
140
139
  {
141
- int i,j;
142
- i = pq->count;
143
- j = i >> 1;
144
140
  void **heap = pq->heap;
145
- void *node = heap[i];
141
+ void *node;
142
+ int i = pq->count;
143
+ int j = i >> 1;
144
+
145
+ node = heap[i];
146
146
 
147
147
  while ((j > 0) && pq->lt(pq, node, heap[j])) {
148
148
  heap[i] = heap[j];
data/ext/priorityqueue.h CHANGED
@@ -3,16 +3,19 @@
3
3
 
4
4
  #include "global.h"
5
5
 
6
+ typedef bool (*lt_ft)(void *p1, void *p2);
7
+
6
8
  typedef struct PriorityQueue {
7
9
  int count;
8
10
  int size;
9
11
  void **heap;
10
- bool (*lt)(void *p1, void *p2);
11
- void (*free_elem)(void *p1);
12
+ lt_ft lt;
13
+ //bool (*lt)(void *p1, void *p2);
14
+ free_ft free_elem;
12
15
  } PriorityQueue;
13
16
 
14
17
  PriorityQueue *pq_create(int max_size, bool (*less_than)(void *p1, void *p2));
15
- void pq_destroy(void *p);
18
+ void pq_destroy(PriorityQueue *pq);
16
19
  void pq_push(PriorityQueue *pq, void *elem);
17
20
  void *pq_top(PriorityQueue *pq);
18
21
  void *pq_pop(PriorityQueue *pq);
@@ -28,13 +31,13 @@ typedef struct PriorityQueue2 {
28
31
  void *data;
29
32
  bool (*lt)(struct PriorityQueue2 *pq, void *p1, void *p2);
30
33
  void (*free_elem)(void *p);
31
- void (*destroy)(void *p);
34
+ free_ft destroy;
32
35
  } PriorityQueue2;
33
36
 
34
37
  PriorityQueue2 *pq2_create(int max_size,
35
38
  bool (*less_than)(PriorityQueue2 *pq, void *p1, void *p2),
36
- void (*destroy)(void *p));
37
- void pq2_destroy(void *p);
39
+ free_ft destroy);
40
+ void pq2_destroy(PriorityQueue2 *pq);
38
41
  void pq2_push(PriorityQueue2 *pq, void *elem);
39
42
  void *pq2_top(PriorityQueue2 *pq);
40
43
  void *pq2_pop(PriorityQueue2 *pq);
data/ext/q_boolean.c CHANGED
@@ -38,9 +38,9 @@ void bw_normalize(Weight *self, float normalization_factor)
38
38
  {
39
39
  BooleanWeight *bw = (BooleanWeight *)self->data;
40
40
  BooleanQuery *bq = (BooleanQuery *)self->query->data;
41
- normalization_factor *= self->value; // multiply by query boost
42
41
  Weight *weight;
43
42
  int i;
43
+ normalization_factor *= self->value; /* multiply by query boost */
44
44
 
45
45
  for (i = 0; i < bw->w_cnt; i++) {
46
46
  if (! bq->clauses[i]->is_prohibited) {
@@ -79,13 +79,18 @@ char *bw_to_s(Weight *self)
79
79
  return strfmt("BooleanWeight(%f)", self->value);
80
80
  }
81
81
 
82
- void bw_destroy(void *p)
82
+ void bw_destroy(Weight *self)
83
83
  {
84
- Weight *weight = (Weight *)p;
85
- BooleanWeight *bw = (BooleanWeight *)weight->data;
84
+ int i;
85
+ BooleanWeight *bw = (BooleanWeight *)self->data;
86
+
87
+ for (i = 0; i < bw->w_cnt; i++) {
88
+ bw->weights[i]->destroy(bw->weights[i]);
89
+ }
90
+
86
91
  free(bw->weights);
87
92
  free(bw);
88
- free(weight);
93
+ w_destroy(self);
89
94
  }
90
95
 
91
96
  Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
@@ -127,8 +132,8 @@ Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
127
132
  }
128
133
  sum_expl->value = sum;
129
134
 
130
- if (coord == 1) { // only one clause matched
131
- explanation = sum_expl; // eliminate wrapper
135
+ if (coord == 1) { /* only one clause matched */
136
+ explanation = sum_expl; /* eliminate wrapper */
132
137
  sum_expl->dcnt = 0;
133
138
  sum_expl = sum_expl->details[0];
134
139
  expl_destoy(explanation);
@@ -136,8 +141,8 @@ Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
136
141
 
137
142
  coord_factor = sim_coord(self->similarity, coord, max_coord);
138
143
 
139
- if (coord_factor == 1.0) { // coord is no-op
140
- return sum_expl; // eliminate wrapper
144
+ if (coord_factor == 1.0) { /* coord is no-op */
145
+ return sum_expl; /* eliminate wrapper */
141
146
  } else {
142
147
  explanation = expl_create(sum * coord_factor, estrdup("product of:"));
143
148
  expl_add_detail(explanation, sum_expl);
@@ -150,13 +155,17 @@ Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
150
155
  Weight *bw_create(Query *query, Searcher *searcher)
151
156
  {
152
157
  int i;
153
- BooleanQuery *bq = (BooleanQuery *)query->data;
158
+ Weight *self = w_create(query);
154
159
  BooleanWeight *bw = ALLOC(BooleanWeight);
155
- Weight *self = ALLOC(Weight);
156
- ZEROSET(self, Weight, 1);
160
+ BooleanQuery *bq = (BooleanQuery *)query->data;
161
+
162
+ bw->w_cnt = bq->clause_cnt;
163
+ bw->weights = ALLOC_N(Weight *, bw->w_cnt);
164
+ for (i = 0; i < bw->w_cnt; i++) {
165
+ bw->weights[i] = q_weight(bq->clauses[i]->query, searcher);
166
+ }
157
167
  self->data = bw;
158
- self->get_query = &w_get_query;
159
- self->get_value = &w_get_value;
168
+
160
169
  self->normalize = &bw_normalize;
161
170
  self->scorer = &bw_scorer;
162
171
  self->explain = &bw_explain;
@@ -165,15 +174,8 @@ Weight *bw_create(Query *query, Searcher *searcher)
165
174
  self->sum_of_squared_weights = &bw_sum_of_squared_weights;
166
175
 
167
176
  self->similarity = query->get_similarity(query, searcher);
168
- self->query = query;
169
177
  self->value = query->boost;
170
178
 
171
- bw->w_cnt = bq->clause_cnt;
172
- bw->weights = ALLOC_N(Weight *, bw->w_cnt);
173
- for (i = 0; i < bw->w_cnt; i++) {
174
- bw->weights[i] = q_weight(bq->clauses[i]->query, searcher);
175
- }
176
-
177
179
  return self;
178
180
  }
179
181
 
@@ -204,15 +206,28 @@ void bc_set_occur(BooleanClause *self, unsigned int occur)
204
206
  }
205
207
  }
206
208
 
207
- void bc_destroy(BooleanClause *self)
209
+ void bc_deref(BooleanClause *self)
210
+ {
211
+ if (--self->ref_cnt <= 0) {
212
+ q_deref(self->query);
213
+ free(self);
214
+ }
215
+ }
216
+
217
+ uint bc_hash(BooleanClause *self)
218
+ {
219
+ return ((q_hash(self->query) << 2) | self->occur);
220
+ }
221
+
222
+ int bc_eq(BooleanClause *self, BooleanClause *o)
208
223
  {
209
- self->query->destroy(self->query);
210
- free(self);
224
+ return ((self->occur == o->occur) && q_eq(self->query, o->query));
211
225
  }
212
226
 
213
227
  BooleanClause *bc_create(Query *query, unsigned int occur)
214
228
  {
215
229
  BooleanClause *self = ALLOC(BooleanClause);
230
+ self->ref_cnt = 1;
216
231
  self->query = query;
217
232
  bc_set_occur(self, occur);
218
233
  return self;
@@ -228,38 +243,44 @@ Query *bq_rewrite(Query *self, IndexReader *ir)
228
243
  {
229
244
  BooleanQuery *bq = (BooleanQuery *)self->data;
230
245
  BooleanClause *clause;
231
- Query *query;
246
+ Query *q, *rq;
232
247
  int i;
233
248
 
234
249
  if (bq->clause_cnt == 1) { // optimize 1-clause queries
235
250
  clause = bq->clauses[0];
236
251
  if (! clause->is_prohibited) { // just return clause
237
- query = clause->query->rewrite(clause->query, ir); // rewrite first
252
+ q = clause->query->rewrite(clause->query, ir); // rewrite first
238
253
 
239
254
  if (self->boost != 1.0) {// incorporate boost
240
- // original_boost is initialized to 0.0. If it has been set to
241
- // something else it means this query has already been boosted before
242
- // so boost from the original value
243
- if ((query == clause->query) && query->original_boost) { // rewrite was no-op
244
- query->boost = query->original_boost * self->boost;
255
+ /* original_boost is initialized to 0.0. If it has been set to
256
+ * something else it means this query has already been boosted before
257
+ * so boost from the original value */
258
+ if ((q == clause->query) && bq->original_boost) {
259
+ /* rewrite was no-op */
260
+ q->boost = bq->original_boost * self->boost;
245
261
  } else {
246
- query->original_boost = query->boost; // save original boost
247
- query->boost *= self->boost;
262
+ bq->original_boost = q->boost; /* save original boost */
263
+ q->boost *= self->boost;
248
264
  }
249
265
  }
250
266
 
251
- return query;
267
+ return q;
252
268
  }
253
269
  }
254
270
 
271
+ /* replace each clause's query with its rewritten query */
255
272
  for (i = 0; i < bq->clause_cnt; i++) {
256
273
  clause = bq->clauses[i];
257
- clause->rewritten = clause->query->rewrite(clause->query, ir);
274
+ rq = clause->query->rewrite(clause->query, ir);
275
+ q_deref(clause->query);
276
+ clause->query = rq;
258
277
  }
259
- return self; // no clauses rewritten
278
+
279
+ self->ref_cnt++;
280
+ return self;
260
281
  }
261
282
 
262
- void bq_extract_terms(Query *self, Array *terms)
283
+ void bq_extract_terms(Query *self, HashSet *terms)
263
284
  {
264
285
  BooleanQuery *bq = (BooleanQuery *)self->data;
265
286
  BooleanClause *clause;
@@ -272,6 +293,7 @@ void bq_extract_terms(Query *self, Array *terms)
272
293
 
273
294
  char *bq_to_s(Query *self, char *field)
274
295
  {
296
+ int i;
275
297
  BooleanQuery *bq = (BooleanQuery *)self->data;
276
298
  BooleanClause *clause;
277
299
  Query *sub_query;
@@ -288,11 +310,10 @@ char *bq_to_s(Query *self, char *field)
288
310
  bp++;
289
311
  }
290
312
 
291
- int i;
292
313
  for (i = 0; i < bq->clause_cnt; i++) {
293
314
  clause = bq->clauses[i];
294
315
  clause_str = clause->query->to_s(clause->query, field);
295
- clause_len = strlen(clause_str);
316
+ clause_len = (int)strlen(clause_str);
296
317
  needed = clause_len + 5;
297
318
  while ((size - bp) < needed) {
298
319
  size *= 2;
@@ -323,7 +344,7 @@ char *bq_to_s(Query *self, char *field)
323
344
 
324
345
  if (self->boost != 1.0) {
325
346
  char *boost_str = strfmt(")^%f", self->boost);
326
- int boost_len = strlen(boost_str);
347
+ int boost_len = (int)strlen(boost_str);
327
348
  REALLOC_N(buffer, char, bp + boost_len + 1);
328
349
  memcpy(buffer + bp, boost_str, sizeof(char) * boost_len);
329
350
  bp += boost_len;
@@ -333,22 +354,19 @@ char *bq_to_s(Query *self, char *field)
333
354
  return buffer;
334
355
  }
335
356
 
336
- void bq_destroy(void *p)
357
+ static void bq_destroy(Query *self)
337
358
  {
338
- Query *self = (Query *)p;
339
359
  BooleanQuery *bq = (BooleanQuery *)self->data;
340
360
  int i;
341
- if (self->destroy_all) {
342
- for (i = 0; i < bq->clause_cnt; i++) {
343
- bc_destroy(bq->clauses[i]);
344
- }
361
+ for (i = 0; i < bq->clause_cnt; i++) {
362
+ bc_deref(bq->clauses[i]);
345
363
  }
346
364
  free(bq->clauses);
347
365
  if (bq->similarity) {
348
366
  bq->similarity->destroy(bq->similarity);
349
367
  }
350
368
  free(bq);
351
- q_destroy(self);
369
+ q_destroy_i(self);
352
370
  }
353
371
 
354
372
  float bq_coord_disabled(Similarity *sim, int overlap, int max_overlap)
@@ -360,27 +378,50 @@ Similarity *bq_get_similarity(Query *self, Searcher *searcher)
360
378
  {
361
379
  BooleanQuery *bq = (BooleanQuery *)self->data;
362
380
  if (!bq->similarity) {
363
- Similarity *sim = q_get_similarity(self, searcher);
381
+ Similarity *sim = q_get_similarity_i(self, searcher);
364
382
  bq->similarity = ALLOC(Similarity);
365
383
  memcpy(bq->similarity, sim, sizeof(Similarity));
366
384
  bq->similarity->coord = &bq_coord_disabled;
367
- bq->similarity->destroy = &free;
385
+ bq->similarity->destroy = (void (*)(Similarity *))&free;
368
386
  }
369
387
 
370
388
  return bq->similarity;
371
389
  }
372
390
 
391
+ static uint bq_hash(Query *self)
392
+ {
393
+ int i;
394
+ uint hash = 0;
395
+ BooleanQuery *bq = (BooleanQuery *)self->data;
396
+ for (i = 0; i < bq->clause_cnt; i++) {
397
+ hash ^= bc_hash(bq->clauses[i]);
398
+ }
399
+ return (hash << 1) | bq->coord_disabled;
400
+ }
401
+
402
+ static int bq_eq(Query *self, Query *o)
403
+ {
404
+ int i;
405
+ BooleanQuery *bq1 = (BooleanQuery *)self->data;
406
+ BooleanQuery *bq2 = (BooleanQuery *)o->data;
407
+ if ((bq1->coord_disabled != bq2->coord_disabled) ||
408
+ (bq1->max_clause_cnt != bq1->max_clause_cnt) ||
409
+ (bq1->clause_cnt != bq2->clause_cnt)) {
410
+ return false;
411
+ }
412
+
413
+ for (i = 0; i < bq1->clause_cnt; i++) {
414
+ if (!bc_eq(bq1->clauses[i], bq2->clauses[i])) {
415
+ return false;
416
+ }
417
+ }
418
+ return true;
419
+ }
420
+
373
421
  Query *bq_create(bool coord_disabled)
374
422
  {
375
423
  Query *self = q_create();
376
424
  BooleanQuery *bq = ALLOC(BooleanQuery);
377
- self->type = BOOLEAN_QUERY;
378
- self->create_weight = &bw_create;
379
- self->rewrite = &bq_rewrite;
380
- self->extract_terms = &bq_extract_terms;
381
- self->to_s = &bq_to_s;
382
- self->destroy = &bq_destroy;
383
- self->data = bq;
384
425
  bq->coord_disabled = coord_disabled;
385
426
  if (coord_disabled) {
386
427
  self->get_similarity = &bq_get_similarity;
@@ -390,13 +431,24 @@ Query *bq_create(bool coord_disabled)
390
431
  bq->clause_capa = BOOLEAN_CLAUSES_START_CAPA;
391
432
  bq->clauses = ALLOC_N(BooleanClause *, BOOLEAN_CLAUSES_START_CAPA);
392
433
  bq->similarity = NULL;
434
+ bq->original_boost = 0.0;
435
+ self->data = bq;
393
436
 
437
+ self->type = BOOLEAN_QUERY;
438
+ self->rewrite = &bq_rewrite;
439
+ self->extract_terms = &bq_extract_terms;
440
+ self->to_s = &bq_to_s;
441
+ self->hash = &bq_hash;
442
+ self->eq = &bq_eq;
443
+ self->destroy_i = &bq_destroy;
444
+ self->create_weight_i = &bw_create;
394
445
  return self;
395
446
  }
396
447
 
397
448
  BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
398
449
  {
399
450
  BooleanQuery *bq = (BooleanQuery *)self->data;
451
+ if (!self->destroy_all) ref(bc);
400
452
  if (bq->clause_cnt >= bq->clause_capa) {
401
453
  bq->clause_capa *= 2;
402
454
  REALLOC_N(bq->clauses, BooleanClause *, bq->clause_capa);
@@ -412,7 +464,12 @@ BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
412
464
  BooleanClause *bq_add_query(Query *self, Query *sub_query, unsigned int occur)
413
465
  {
414
466
  BooleanClause *bc = bc_create(sub_query, occur);
415
- return bq_add_clause(self, bc);
467
+ bq_add_clause(self, bc);
468
+ if (!self->destroy_all) {
469
+ ref(sub_query);
470
+ bc_deref(bc); /* bc would have been referenced unnecessarily */
471
+ }
472
+ return bc;
416
473
  }
417
474
 
418
475
  /***************************************************************************
@@ -572,16 +629,15 @@ Explanation *dssc_explain(Scorer *self, int doc_num)
572
629
  return e;
573
630
  }
574
631
 
575
- void dssc_destroy(void *p)
632
+ void dssc_destroy(Scorer *self)
576
633
  {
577
- Scorer *self = (Scorer *)p;
578
634
  DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
579
635
  int i;
580
636
  for (i = 0; i < dssc->ss_cnt; i++) {
581
637
  dssc->sub_scorers[i]->destroy(dssc->sub_scorers[i]);
582
638
  }
583
639
  if (dssc->scorer_queue) pq_destroy(dssc->scorer_queue);
584
- scorer_destroy(self);
640
+ scorer_destroy_i(self);
585
641
  }
586
642
 
587
643
  Scorer *disjunction_sum_scorer_create(Scorer **sub_scorers, int ss_cnt,
@@ -655,7 +711,8 @@ void csc_init(Scorer *self, bool init_scorers)
655
711
  ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
656
712
  Scorer *sub_scorer;
657
713
  int i;
658
- // compute coord factor
714
+
715
+ /* compute coord factor */
659
716
  csc->coord = sim_coord(self->similarity, csc->ss_cnt, csc->ss_cnt);
660
717
 
661
718
  csc->more = (csc->ss_cnt > 0);
@@ -746,16 +803,15 @@ bool csc_skip_to(Scorer *self, int doc_num)
746
803
  return csc_do_next(self);
747
804
  }
748
805
 
749
- void csc_destroy(void *p)
806
+ void csc_destroy(Scorer *self)
750
807
  {
751
- Scorer *self = (Scorer *)p;
752
808
  ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
753
809
  int i;
754
810
  for (i = 0; i < csc->ss_cnt; i++) {
755
811
  csc->sub_scorers[i]->destroy(csc->sub_scorers[i]);
756
812
  }
757
813
  free(csc->sub_scorers);
758
- scorer_destroy(self);
814
+ scorer_destroy_i(self);
759
815
  }
760
816
 
761
817
  Scorer *conjunction_scorer_create(Similarity *similarity)
@@ -842,12 +898,11 @@ Explanation *smsc_explain(Scorer *self, int doc_num)
842
898
  return scorer->explain(scorer, doc_num);
843
899
  }
844
900
 
845
- void smsc_destroy(void *p)
901
+ void smsc_destroy(Scorer *self)
846
902
  {
847
- Scorer *self = (Scorer *)p;
848
903
  Scorer *scorer = ((SingleMatchScorer *)self->data)->scorer;
849
904
  scorer->destroy(scorer);
850
- scorer_destroy(self);
905
+ scorer_destroy_i(self);
851
906
  }
852
907
 
853
908
  Scorer *single_match_scorer_create(Coordinator *coordinator, Scorer *scorer)
@@ -929,13 +984,12 @@ Explanation *rossc_explain(Scorer *self, int doc_num)
929
984
  return e;
930
985
  }
931
986
 
932
- void rossc_destroy(void *p)
987
+ void rossc_destroy(Scorer *self)
933
988
  {
934
- Scorer *self = (Scorer *)p;
935
989
  ReqOptSumScorer *rossc = (ReqOptSumScorer *)self->data;
936
990
  if (rossc->req_scorer) rossc->req_scorer->destroy(rossc->req_scorer);
937
991
  if (rossc->opt_scorer) rossc->opt_scorer->destroy(rossc->opt_scorer);
938
- scorer_destroy(self);
992
+ scorer_destroy_i(self);
939
993
  }
940
994
 
941
995
 
@@ -1070,13 +1124,12 @@ Explanation *rxsc_explain(Scorer *self, int doc_num)
1070
1124
  return e;
1071
1125
  }
1072
1126
 
1073
- void rxsc_destroy(void *p)
1127
+ void rxsc_destroy(Scorer *self)
1074
1128
  {
1075
- Scorer *self = (Scorer *)p;
1076
1129
  ReqExclScorer *rxsc = (ReqExclScorer *)self->data;
1077
1130
  if (rxsc->req_scorer) rxsc->req_scorer->destroy(rxsc->req_scorer);
1078
1131
  if (rxsc->excl_scorer) rxsc->excl_scorer->destroy(rxsc->excl_scorer);
1079
- scorer_destroy(self);
1132
+ scorer_destroy_i(self);
1080
1133
  }
1081
1134
 
1082
1135
  Scorer *req_excl_scorer_create(Scorer *req_scorer, Scorer *excl_scorer)
@@ -1288,9 +1341,8 @@ bool bsc_skip_to(Scorer *self, int doc_num)
1288
1341
  }
1289
1342
  }
1290
1343
 
1291
- void bsc_destroy(void *p)
1344
+ void bsc_destroy(Scorer *self)
1292
1345
  {
1293
- Scorer *self = (Scorer *)p;
1294
1346
  BooleanScorer *bsc = (BooleanScorer *)self->data;
1295
1347
  Coordinator *coord = bsc->coordinator;
1296
1348
 
@@ -1316,7 +1368,7 @@ void bsc_destroy(void *p)
1316
1368
  free(bsc->required_scorers);
1317
1369
  free(bsc->optional_scorers);
1318
1370
  free(bsc->prohibited_scorers);
1319
- scorer_destroy(self);
1371
+ scorer_destroy_i(self);
1320
1372
  }
1321
1373
 
1322
1374
  Explanation *bsc_explain(Scorer *self, int doc_num)