ferret 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/lang.c CHANGED
@@ -29,3 +29,13 @@ void ft_raise(char *file, int line_num, VALUE etype, const char *fmt, ...)
29
29
  sprintf(buf_ptr, "\n");
30
30
  rb_raise(etype, buf); /* conventional value for failed execution */
31
31
  }
32
+
33
+ #ifdef WIN32
34
+ void eprintf(VALUE etype, const char *fmt, ...)
35
+ {
36
+ va_list args;
37
+ va_start(args, fmt);
38
+ ft_raise("Windows", -1, etype, fmt, args);
39
+ va_end(args);
40
+ }
41
+ #endif
data/ext/lang.h CHANGED
@@ -7,7 +7,17 @@
7
7
  #define FERRET_EXT
8
8
 
9
9
  #define MAX_ERROR_LEN 2048
10
- #define eprintf(...) ft_raise(__FILE__, __LINE__, __VA_ARGS__)
10
+
11
+ typedef LONG_LONG llong;
12
+ typedef unsigned LONG_LONG ullong;
13
+
14
+ #ifdef WIN32
15
+ # undef close
16
+ # undef rename
17
+ extern void eprintf(VALUE etype, const char *fmt, ...);
18
+ #else
19
+ # define eprintf(...) ft_raise(__FILE__, __LINE__, __VA_ARGS__)
20
+ #endif
11
21
  extern void ft_raise(char *file, int line_num, VALUE etype, const char *fmt, ...);
12
22
  extern void weprintf(const char *fmt, ...);
13
23
  extern char *progname(void);
data/ext/nix_io.c ADDED
@@ -0,0 +1,135 @@
1
+ #ifndef WIN32
2
+
3
+ #include "global.h"
4
+ #include "store.h"
5
+ #include <sys/dir.h>
6
+ #include <dirent.h>
7
+ #include <unistd.h>
8
+ #include <string.h>
9
+ #include <errno.h>
10
+ #include <fcntl.h>
11
+
12
+ /**
13
+ * Create a filepath for a file in the store using the operating systems
14
+ * default file seperator.
15
+ */
16
+ char *join_path(char *buf, const char *base, const char *filename)
17
+ {
18
+ sprintf(buf, "%s/%s", base, filename);
19
+ return buf;
20
+ }
21
+
22
+ bool exists(char *path)
23
+ {
24
+ int fd = open(path, 0);
25
+ if (fd < 0) {
26
+ if (errno != ENOENT) {
27
+ RAISE(IO_ERROR, strerror(errno));
28
+ }
29
+ return false;
30
+ }
31
+ close(fd);
32
+ return true;
33
+ }
34
+
35
+ int fcount(char *path)
36
+ {
37
+ int cnt = 0;
38
+ struct dirent *de;
39
+ DIR *d = opendir(path);
40
+
41
+ if (!d) RAISE(IO_ERROR, strerror(errno));
42
+
43
+ while ((de = readdir(d)) != NULL) {
44
+ if (de->d_name[0] != '.') {
45
+ cnt++;
46
+ }
47
+ }
48
+ closedir(d);
49
+
50
+ return cnt;
51
+ }
52
+
53
+ void dir_each(char *path, void (*func)(char *fname, void *arg), void *arg)
54
+ {
55
+ struct dirent *de;
56
+ DIR *d = opendir(path);
57
+
58
+ if (!d) RAISE(IO_ERROR, strerror(errno));
59
+
60
+ while ((de = readdir(d)) != NULL) {
61
+ if (de->d_name[0] != '.' && !file_is_lock(de->d_name)) {
62
+ func(de->d_name, arg);
63
+ }
64
+ }
65
+ closedir(d);
66
+ }
67
+
68
+ /**
69
+ * Clear all the locks in the store.
70
+ *
71
+ * @param store the store to clear the locks from
72
+ * @throws IO_ERROR if there is an error opening the directory
73
+ */
74
+ void fs_clear_locks(Store *store)
75
+ {
76
+ struct dirent *de;
77
+ DIR *d = opendir(store->dir.path);
78
+
79
+ if (!d) RAISE(IO_ERROR, strerror(errno));
80
+
81
+ while ((de = readdir(d)) != NULL) {
82
+ if (file_is_lock(de->d_name)) {
83
+ char buf[MAX_FILE_PATH];
84
+ remove(join_path(buf, store->dir.path, de->d_name));
85
+ }
86
+ }
87
+ closedir(d);
88
+ }
89
+
90
+ /**
91
+ * Clear all files from the store except the lock files.
92
+ *
93
+ * @param store the store to clear all the files from
94
+ * @throws IO_ERROR if there is an error deleting the files
95
+ */
96
+ void fs_clear(Store *store)
97
+ {
98
+ struct dirent *de;
99
+ DIR *d = opendir(store->dir.path);
100
+
101
+ if (!d) RAISE(IO_ERROR, strerror(errno));
102
+
103
+ while ((de = readdir(d)) != NULL) {
104
+ if (de->d_name[0] != '.' && !file_is_lock(de->d_name)) {
105
+ char buf[MAX_FILE_PATH];
106
+ remove(join_path(buf, store->dir.path, de->d_name));
107
+ }
108
+ }
109
+ closedir(d);
110
+ }
111
+
112
+ /**
113
+ * Clear all files from the store including the lock files.
114
+ *
115
+ * @param store the store to clear all the files from
116
+ * @throws IO_ERROR if there is an error deleting the files
117
+ */
118
+ void fs_clear_all(Store *store)
119
+ {
120
+ struct dirent *de;
121
+ DIR *d = opendir(store->dir.path);
122
+
123
+ if (!d) RAISE(IO_ERROR, strerror(errno));
124
+
125
+ while ((de = readdir(d)) != NULL) {
126
+ if (de->d_name[0] != '.') {
127
+ char buf[MAX_FILE_PATH];
128
+ remove(join_path(buf, store->dir.path, de->d_name));
129
+ }
130
+ }
131
+ closedir(d);
132
+ }
133
+
134
+ #endif
135
+
data/ext/priorityqueue.c CHANGED
@@ -11,20 +11,20 @@ PriorityQueue *pq_create(int max_size, bool (*less_than)(void *p1, void *p2))
11
11
  return pq;
12
12
  }
13
13
 
14
- void pq_destroy(void *p)
14
+ void pq_destroy(PriorityQueue *pq)
15
15
  {
16
- PriorityQueue *pq = (PriorityQueue *)p;
17
16
  free(pq->heap);
18
- free(p);
17
+ free(pq);
19
18
  }
20
19
 
21
20
  void pq_up(PriorityQueue *pq)
22
21
  {
23
- int i,j;
24
- i = pq->count;
25
- j = i >> 1;
26
- void **heap = pq->heap;
27
- void *node = heap[i];
22
+ void **heap = pq->heap;
23
+ void *node;
24
+ int i = pq->count;
25
+ int j = i >> 1;
26
+
27
+ node = heap[i];
28
28
 
29
29
  while ((j > 0) && pq->lt(node, heap[j])) {
30
30
  heap[i] = heap[j];
@@ -117,7 +117,7 @@ int pq_insert(PriorityQueue *pq, void *elem)
117
117
 
118
118
  PriorityQueue2 *pq2_create(int max_size,
119
119
  bool (*less_than)(PriorityQueue2 *pq, void *p1, void *p2),
120
- void (*destroy)(void *p))
120
+ free_ft destroy)
121
121
  {
122
122
  PriorityQueue2 *pq = ALLOC(PriorityQueue2);
123
123
  pq->count = 0;
@@ -129,20 +129,20 @@ PriorityQueue2 *pq2_create(int max_size,
129
129
  return pq;
130
130
  }
131
131
 
132
- void pq2_destroy(void *p)
132
+ void pq2_destroy(PriorityQueue2 *pq)
133
133
  {
134
- PriorityQueue2 *pq = (PriorityQueue2 *)p;
135
134
  free(pq->heap);
136
- free(p);
135
+ free(pq);
137
136
  }
138
137
 
139
138
  void pq2_up(PriorityQueue2 *pq)
140
139
  {
141
- int i,j;
142
- i = pq->count;
143
- j = i >> 1;
144
140
  void **heap = pq->heap;
145
- void *node = heap[i];
141
+ void *node;
142
+ int i = pq->count;
143
+ int j = i >> 1;
144
+
145
+ node = heap[i];
146
146
 
147
147
  while ((j > 0) && pq->lt(pq, node, heap[j])) {
148
148
  heap[i] = heap[j];
data/ext/priorityqueue.h CHANGED
@@ -3,16 +3,19 @@
3
3
 
4
4
  #include "global.h"
5
5
 
6
+ typedef bool (*lt_ft)(void *p1, void *p2);
7
+
6
8
  typedef struct PriorityQueue {
7
9
  int count;
8
10
  int size;
9
11
  void **heap;
10
- bool (*lt)(void *p1, void *p2);
11
- void (*free_elem)(void *p1);
12
+ lt_ft lt;
13
+ //bool (*lt)(void *p1, void *p2);
14
+ free_ft free_elem;
12
15
  } PriorityQueue;
13
16
 
14
17
  PriorityQueue *pq_create(int max_size, bool (*less_than)(void *p1, void *p2));
15
- void pq_destroy(void *p);
18
+ void pq_destroy(PriorityQueue *pq);
16
19
  void pq_push(PriorityQueue *pq, void *elem);
17
20
  void *pq_top(PriorityQueue *pq);
18
21
  void *pq_pop(PriorityQueue *pq);
@@ -28,13 +31,13 @@ typedef struct PriorityQueue2 {
28
31
  void *data;
29
32
  bool (*lt)(struct PriorityQueue2 *pq, void *p1, void *p2);
30
33
  void (*free_elem)(void *p);
31
- void (*destroy)(void *p);
34
+ free_ft destroy;
32
35
  } PriorityQueue2;
33
36
 
34
37
  PriorityQueue2 *pq2_create(int max_size,
35
38
  bool (*less_than)(PriorityQueue2 *pq, void *p1, void *p2),
36
- void (*destroy)(void *p));
37
- void pq2_destroy(void *p);
39
+ free_ft destroy);
40
+ void pq2_destroy(PriorityQueue2 *pq);
38
41
  void pq2_push(PriorityQueue2 *pq, void *elem);
39
42
  void *pq2_top(PriorityQueue2 *pq);
40
43
  void *pq2_pop(PriorityQueue2 *pq);
data/ext/q_boolean.c CHANGED
@@ -38,9 +38,9 @@ void bw_normalize(Weight *self, float normalization_factor)
38
38
  {
39
39
  BooleanWeight *bw = (BooleanWeight *)self->data;
40
40
  BooleanQuery *bq = (BooleanQuery *)self->query->data;
41
- normalization_factor *= self->value; // multiply by query boost
42
41
  Weight *weight;
43
42
  int i;
43
+ normalization_factor *= self->value; /* multiply by query boost */
44
44
 
45
45
  for (i = 0; i < bw->w_cnt; i++) {
46
46
  if (! bq->clauses[i]->is_prohibited) {
@@ -79,13 +79,18 @@ char *bw_to_s(Weight *self)
79
79
  return strfmt("BooleanWeight(%f)", self->value);
80
80
  }
81
81
 
82
- void bw_destroy(void *p)
82
+ void bw_destroy(Weight *self)
83
83
  {
84
- Weight *weight = (Weight *)p;
85
- BooleanWeight *bw = (BooleanWeight *)weight->data;
84
+ int i;
85
+ BooleanWeight *bw = (BooleanWeight *)self->data;
86
+
87
+ for (i = 0; i < bw->w_cnt; i++) {
88
+ bw->weights[i]->destroy(bw->weights[i]);
89
+ }
90
+
86
91
  free(bw->weights);
87
92
  free(bw);
88
- free(weight);
93
+ w_destroy(self);
89
94
  }
90
95
 
91
96
  Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
@@ -127,8 +132,8 @@ Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
127
132
  }
128
133
  sum_expl->value = sum;
129
134
 
130
- if (coord == 1) { // only one clause matched
131
- explanation = sum_expl; // eliminate wrapper
135
+ if (coord == 1) { /* only one clause matched */
136
+ explanation = sum_expl; /* eliminate wrapper */
132
137
  sum_expl->dcnt = 0;
133
138
  sum_expl = sum_expl->details[0];
134
139
  expl_destoy(explanation);
@@ -136,8 +141,8 @@ Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
136
141
 
137
142
  coord_factor = sim_coord(self->similarity, coord, max_coord);
138
143
 
139
- if (coord_factor == 1.0) { // coord is no-op
140
- return sum_expl; // eliminate wrapper
144
+ if (coord_factor == 1.0) { /* coord is no-op */
145
+ return sum_expl; /* eliminate wrapper */
141
146
  } else {
142
147
  explanation = expl_create(sum * coord_factor, estrdup("product of:"));
143
148
  expl_add_detail(explanation, sum_expl);
@@ -150,13 +155,17 @@ Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
150
155
  Weight *bw_create(Query *query, Searcher *searcher)
151
156
  {
152
157
  int i;
153
- BooleanQuery *bq = (BooleanQuery *)query->data;
158
+ Weight *self = w_create(query);
154
159
  BooleanWeight *bw = ALLOC(BooleanWeight);
155
- Weight *self = ALLOC(Weight);
156
- ZEROSET(self, Weight, 1);
160
+ BooleanQuery *bq = (BooleanQuery *)query->data;
161
+
162
+ bw->w_cnt = bq->clause_cnt;
163
+ bw->weights = ALLOC_N(Weight *, bw->w_cnt);
164
+ for (i = 0; i < bw->w_cnt; i++) {
165
+ bw->weights[i] = q_weight(bq->clauses[i]->query, searcher);
166
+ }
157
167
  self->data = bw;
158
- self->get_query = &w_get_query;
159
- self->get_value = &w_get_value;
168
+
160
169
  self->normalize = &bw_normalize;
161
170
  self->scorer = &bw_scorer;
162
171
  self->explain = &bw_explain;
@@ -165,15 +174,8 @@ Weight *bw_create(Query *query, Searcher *searcher)
165
174
  self->sum_of_squared_weights = &bw_sum_of_squared_weights;
166
175
 
167
176
  self->similarity = query->get_similarity(query, searcher);
168
- self->query = query;
169
177
  self->value = query->boost;
170
178
 
171
- bw->w_cnt = bq->clause_cnt;
172
- bw->weights = ALLOC_N(Weight *, bw->w_cnt);
173
- for (i = 0; i < bw->w_cnt; i++) {
174
- bw->weights[i] = q_weight(bq->clauses[i]->query, searcher);
175
- }
176
-
177
179
  return self;
178
180
  }
179
181
 
@@ -204,15 +206,28 @@ void bc_set_occur(BooleanClause *self, unsigned int occur)
204
206
  }
205
207
  }
206
208
 
207
- void bc_destroy(BooleanClause *self)
209
+ void bc_deref(BooleanClause *self)
210
+ {
211
+ if (--self->ref_cnt <= 0) {
212
+ q_deref(self->query);
213
+ free(self);
214
+ }
215
+ }
216
+
217
+ uint bc_hash(BooleanClause *self)
218
+ {
219
+ return ((q_hash(self->query) << 2) | self->occur);
220
+ }
221
+
222
+ int bc_eq(BooleanClause *self, BooleanClause *o)
208
223
  {
209
- self->query->destroy(self->query);
210
- free(self);
224
+ return ((self->occur == o->occur) && q_eq(self->query, o->query));
211
225
  }
212
226
 
213
227
  BooleanClause *bc_create(Query *query, unsigned int occur)
214
228
  {
215
229
  BooleanClause *self = ALLOC(BooleanClause);
230
+ self->ref_cnt = 1;
216
231
  self->query = query;
217
232
  bc_set_occur(self, occur);
218
233
  return self;
@@ -228,38 +243,44 @@ Query *bq_rewrite(Query *self, IndexReader *ir)
228
243
  {
229
244
  BooleanQuery *bq = (BooleanQuery *)self->data;
230
245
  BooleanClause *clause;
231
- Query *query;
246
+ Query *q, *rq;
232
247
  int i;
233
248
 
234
249
  if (bq->clause_cnt == 1) { // optimize 1-clause queries
235
250
  clause = bq->clauses[0];
236
251
  if (! clause->is_prohibited) { // just return clause
237
- query = clause->query->rewrite(clause->query, ir); // rewrite first
252
+ q = clause->query->rewrite(clause->query, ir); // rewrite first
238
253
 
239
254
  if (self->boost != 1.0) {// incorporate boost
240
- // original_boost is initialized to 0.0. If it has been set to
241
- // something else it means this query has already been boosted before
242
- // so boost from the original value
243
- if ((query == clause->query) && query->original_boost) { // rewrite was no-op
244
- query->boost = query->original_boost * self->boost;
255
+ /* original_boost is initialized to 0.0. If it has been set to
256
+ * something else it means this query has already been boosted before
257
+ * so boost from the original value */
258
+ if ((q == clause->query) && bq->original_boost) {
259
+ /* rewrite was no-op */
260
+ q->boost = bq->original_boost * self->boost;
245
261
  } else {
246
- query->original_boost = query->boost; // save original boost
247
- query->boost *= self->boost;
262
+ bq->original_boost = q->boost; /* save original boost */
263
+ q->boost *= self->boost;
248
264
  }
249
265
  }
250
266
 
251
- return query;
267
+ return q;
252
268
  }
253
269
  }
254
270
 
271
+ /* replace each clause's query with its rewritten query */
255
272
  for (i = 0; i < bq->clause_cnt; i++) {
256
273
  clause = bq->clauses[i];
257
- clause->rewritten = clause->query->rewrite(clause->query, ir);
274
+ rq = clause->query->rewrite(clause->query, ir);
275
+ q_deref(clause->query);
276
+ clause->query = rq;
258
277
  }
259
- return self; // no clauses rewritten
278
+
279
+ self->ref_cnt++;
280
+ return self;
260
281
  }
261
282
 
262
- void bq_extract_terms(Query *self, Array *terms)
283
+ void bq_extract_terms(Query *self, HashSet *terms)
263
284
  {
264
285
  BooleanQuery *bq = (BooleanQuery *)self->data;
265
286
  BooleanClause *clause;
@@ -272,6 +293,7 @@ void bq_extract_terms(Query *self, Array *terms)
272
293
 
273
294
  char *bq_to_s(Query *self, char *field)
274
295
  {
296
+ int i;
275
297
  BooleanQuery *bq = (BooleanQuery *)self->data;
276
298
  BooleanClause *clause;
277
299
  Query *sub_query;
@@ -288,11 +310,10 @@ char *bq_to_s(Query *self, char *field)
288
310
  bp++;
289
311
  }
290
312
 
291
- int i;
292
313
  for (i = 0; i < bq->clause_cnt; i++) {
293
314
  clause = bq->clauses[i];
294
315
  clause_str = clause->query->to_s(clause->query, field);
295
- clause_len = strlen(clause_str);
316
+ clause_len = (int)strlen(clause_str);
296
317
  needed = clause_len + 5;
297
318
  while ((size - bp) < needed) {
298
319
  size *= 2;
@@ -323,7 +344,7 @@ char *bq_to_s(Query *self, char *field)
323
344
 
324
345
  if (self->boost != 1.0) {
325
346
  char *boost_str = strfmt(")^%f", self->boost);
326
- int boost_len = strlen(boost_str);
347
+ int boost_len = (int)strlen(boost_str);
327
348
  REALLOC_N(buffer, char, bp + boost_len + 1);
328
349
  memcpy(buffer + bp, boost_str, sizeof(char) * boost_len);
329
350
  bp += boost_len;
@@ -333,22 +354,19 @@ char *bq_to_s(Query *self, char *field)
333
354
  return buffer;
334
355
  }
335
356
 
336
- void bq_destroy(void *p)
357
+ static void bq_destroy(Query *self)
337
358
  {
338
- Query *self = (Query *)p;
339
359
  BooleanQuery *bq = (BooleanQuery *)self->data;
340
360
  int i;
341
- if (self->destroy_all) {
342
- for (i = 0; i < bq->clause_cnt; i++) {
343
- bc_destroy(bq->clauses[i]);
344
- }
361
+ for (i = 0; i < bq->clause_cnt; i++) {
362
+ bc_deref(bq->clauses[i]);
345
363
  }
346
364
  free(bq->clauses);
347
365
  if (bq->similarity) {
348
366
  bq->similarity->destroy(bq->similarity);
349
367
  }
350
368
  free(bq);
351
- q_destroy(self);
369
+ q_destroy_i(self);
352
370
  }
353
371
 
354
372
  float bq_coord_disabled(Similarity *sim, int overlap, int max_overlap)
@@ -360,27 +378,50 @@ Similarity *bq_get_similarity(Query *self, Searcher *searcher)
360
378
  {
361
379
  BooleanQuery *bq = (BooleanQuery *)self->data;
362
380
  if (!bq->similarity) {
363
- Similarity *sim = q_get_similarity(self, searcher);
381
+ Similarity *sim = q_get_similarity_i(self, searcher);
364
382
  bq->similarity = ALLOC(Similarity);
365
383
  memcpy(bq->similarity, sim, sizeof(Similarity));
366
384
  bq->similarity->coord = &bq_coord_disabled;
367
- bq->similarity->destroy = &free;
385
+ bq->similarity->destroy = (void (*)(Similarity *))&free;
368
386
  }
369
387
 
370
388
  return bq->similarity;
371
389
  }
372
390
 
391
+ static uint bq_hash(Query *self)
392
+ {
393
+ int i;
394
+ uint hash = 0;
395
+ BooleanQuery *bq = (BooleanQuery *)self->data;
396
+ for (i = 0; i < bq->clause_cnt; i++) {
397
+ hash ^= bc_hash(bq->clauses[i]);
398
+ }
399
+ return (hash << 1) | bq->coord_disabled;
400
+ }
401
+
402
+ static int bq_eq(Query *self, Query *o)
403
+ {
404
+ int i;
405
+ BooleanQuery *bq1 = (BooleanQuery *)self->data;
406
+ BooleanQuery *bq2 = (BooleanQuery *)o->data;
407
+ if ((bq1->coord_disabled != bq2->coord_disabled) ||
408
+ (bq1->max_clause_cnt != bq1->max_clause_cnt) ||
409
+ (bq1->clause_cnt != bq2->clause_cnt)) {
410
+ return false;
411
+ }
412
+
413
+ for (i = 0; i < bq1->clause_cnt; i++) {
414
+ if (!bc_eq(bq1->clauses[i], bq2->clauses[i])) {
415
+ return false;
416
+ }
417
+ }
418
+ return true;
419
+ }
420
+
373
421
  Query *bq_create(bool coord_disabled)
374
422
  {
375
423
  Query *self = q_create();
376
424
  BooleanQuery *bq = ALLOC(BooleanQuery);
377
- self->type = BOOLEAN_QUERY;
378
- self->create_weight = &bw_create;
379
- self->rewrite = &bq_rewrite;
380
- self->extract_terms = &bq_extract_terms;
381
- self->to_s = &bq_to_s;
382
- self->destroy = &bq_destroy;
383
- self->data = bq;
384
425
  bq->coord_disabled = coord_disabled;
385
426
  if (coord_disabled) {
386
427
  self->get_similarity = &bq_get_similarity;
@@ -390,13 +431,24 @@ Query *bq_create(bool coord_disabled)
390
431
  bq->clause_capa = BOOLEAN_CLAUSES_START_CAPA;
391
432
  bq->clauses = ALLOC_N(BooleanClause *, BOOLEAN_CLAUSES_START_CAPA);
392
433
  bq->similarity = NULL;
434
+ bq->original_boost = 0.0;
435
+ self->data = bq;
393
436
 
437
+ self->type = BOOLEAN_QUERY;
438
+ self->rewrite = &bq_rewrite;
439
+ self->extract_terms = &bq_extract_terms;
440
+ self->to_s = &bq_to_s;
441
+ self->hash = &bq_hash;
442
+ self->eq = &bq_eq;
443
+ self->destroy_i = &bq_destroy;
444
+ self->create_weight_i = &bw_create;
394
445
  return self;
395
446
  }
396
447
 
397
448
  BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
398
449
  {
399
450
  BooleanQuery *bq = (BooleanQuery *)self->data;
451
+ if (!self->destroy_all) ref(bc);
400
452
  if (bq->clause_cnt >= bq->clause_capa) {
401
453
  bq->clause_capa *= 2;
402
454
  REALLOC_N(bq->clauses, BooleanClause *, bq->clause_capa);
@@ -412,7 +464,12 @@ BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
412
464
  BooleanClause *bq_add_query(Query *self, Query *sub_query, unsigned int occur)
413
465
  {
414
466
  BooleanClause *bc = bc_create(sub_query, occur);
415
- return bq_add_clause(self, bc);
467
+ bq_add_clause(self, bc);
468
+ if (!self->destroy_all) {
469
+ ref(sub_query);
470
+ bc_deref(bc); /* bc would have been referenced unnecessarily */
471
+ }
472
+ return bc;
416
473
  }
417
474
 
418
475
  /***************************************************************************
@@ -572,16 +629,15 @@ Explanation *dssc_explain(Scorer *self, int doc_num)
572
629
  return e;
573
630
  }
574
631
 
575
- void dssc_destroy(void *p)
632
+ void dssc_destroy(Scorer *self)
576
633
  {
577
- Scorer *self = (Scorer *)p;
578
634
  DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
579
635
  int i;
580
636
  for (i = 0; i < dssc->ss_cnt; i++) {
581
637
  dssc->sub_scorers[i]->destroy(dssc->sub_scorers[i]);
582
638
  }
583
639
  if (dssc->scorer_queue) pq_destroy(dssc->scorer_queue);
584
- scorer_destroy(self);
640
+ scorer_destroy_i(self);
585
641
  }
586
642
 
587
643
  Scorer *disjunction_sum_scorer_create(Scorer **sub_scorers, int ss_cnt,
@@ -655,7 +711,8 @@ void csc_init(Scorer *self, bool init_scorers)
655
711
  ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
656
712
  Scorer *sub_scorer;
657
713
  int i;
658
- // compute coord factor
714
+
715
+ /* compute coord factor */
659
716
  csc->coord = sim_coord(self->similarity, csc->ss_cnt, csc->ss_cnt);
660
717
 
661
718
  csc->more = (csc->ss_cnt > 0);
@@ -746,16 +803,15 @@ bool csc_skip_to(Scorer *self, int doc_num)
746
803
  return csc_do_next(self);
747
804
  }
748
805
 
749
- void csc_destroy(void *p)
806
+ void csc_destroy(Scorer *self)
750
807
  {
751
- Scorer *self = (Scorer *)p;
752
808
  ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
753
809
  int i;
754
810
  for (i = 0; i < csc->ss_cnt; i++) {
755
811
  csc->sub_scorers[i]->destroy(csc->sub_scorers[i]);
756
812
  }
757
813
  free(csc->sub_scorers);
758
- scorer_destroy(self);
814
+ scorer_destroy_i(self);
759
815
  }
760
816
 
761
817
  Scorer *conjunction_scorer_create(Similarity *similarity)
@@ -842,12 +898,11 @@ Explanation *smsc_explain(Scorer *self, int doc_num)
842
898
  return scorer->explain(scorer, doc_num);
843
899
  }
844
900
 
845
- void smsc_destroy(void *p)
901
+ void smsc_destroy(Scorer *self)
846
902
  {
847
- Scorer *self = (Scorer *)p;
848
903
  Scorer *scorer = ((SingleMatchScorer *)self->data)->scorer;
849
904
  scorer->destroy(scorer);
850
- scorer_destroy(self);
905
+ scorer_destroy_i(self);
851
906
  }
852
907
 
853
908
  Scorer *single_match_scorer_create(Coordinator *coordinator, Scorer *scorer)
@@ -929,13 +984,12 @@ Explanation *rossc_explain(Scorer *self, int doc_num)
929
984
  return e;
930
985
  }
931
986
 
932
- void rossc_destroy(void *p)
987
+ void rossc_destroy(Scorer *self)
933
988
  {
934
- Scorer *self = (Scorer *)p;
935
989
  ReqOptSumScorer *rossc = (ReqOptSumScorer *)self->data;
936
990
  if (rossc->req_scorer) rossc->req_scorer->destroy(rossc->req_scorer);
937
991
  if (rossc->opt_scorer) rossc->opt_scorer->destroy(rossc->opt_scorer);
938
- scorer_destroy(self);
992
+ scorer_destroy_i(self);
939
993
  }
940
994
 
941
995
 
@@ -1070,13 +1124,12 @@ Explanation *rxsc_explain(Scorer *self, int doc_num)
1070
1124
  return e;
1071
1125
  }
1072
1126
 
1073
- void rxsc_destroy(void *p)
1127
+ void rxsc_destroy(Scorer *self)
1074
1128
  {
1075
- Scorer *self = (Scorer *)p;
1076
1129
  ReqExclScorer *rxsc = (ReqExclScorer *)self->data;
1077
1130
  if (rxsc->req_scorer) rxsc->req_scorer->destroy(rxsc->req_scorer);
1078
1131
  if (rxsc->excl_scorer) rxsc->excl_scorer->destroy(rxsc->excl_scorer);
1079
- scorer_destroy(self);
1132
+ scorer_destroy_i(self);
1080
1133
  }
1081
1134
 
1082
1135
  Scorer *req_excl_scorer_create(Scorer *req_scorer, Scorer *excl_scorer)
@@ -1288,9 +1341,8 @@ bool bsc_skip_to(Scorer *self, int doc_num)
1288
1341
  }
1289
1342
  }
1290
1343
 
1291
- void bsc_destroy(void *p)
1344
+ void bsc_destroy(Scorer *self)
1292
1345
  {
1293
- Scorer *self = (Scorer *)p;
1294
1346
  BooleanScorer *bsc = (BooleanScorer *)self->data;
1295
1347
  Coordinator *coord = bsc->coordinator;
1296
1348
 
@@ -1316,7 +1368,7 @@ void bsc_destroy(void *p)
1316
1368
  free(bsc->required_scorers);
1317
1369
  free(bsc->optional_scorers);
1318
1370
  free(bsc->prohibited_scorers);
1319
- scorer_destroy(self);
1371
+ scorer_destroy_i(self);
1320
1372
  }
1321
1373
 
1322
1374
  Explanation *bsc_explain(Scorer *self, int doc_num)