sdsykes-ferret 0.11.6.19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (195) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/analysis.c +1555 -0
  9. data/ext/analysis.h +219 -0
  10. data/ext/api.c +69 -0
  11. data/ext/api.h +27 -0
  12. data/ext/array.c +123 -0
  13. data/ext/array.h +53 -0
  14. data/ext/bitvector.c +540 -0
  15. data/ext/bitvector.h +272 -0
  16. data/ext/compound_io.c +383 -0
  17. data/ext/config.h +42 -0
  18. data/ext/document.c +156 -0
  19. data/ext/document.h +53 -0
  20. data/ext/except.c +120 -0
  21. data/ext/except.h +168 -0
  22. data/ext/extconf.rb +14 -0
  23. data/ext/ferret.c +402 -0
  24. data/ext/ferret.h +91 -0
  25. data/ext/filter.c +156 -0
  26. data/ext/fs_store.c +483 -0
  27. data/ext/global.c +418 -0
  28. data/ext/global.h +117 -0
  29. data/ext/hash.c +567 -0
  30. data/ext/hash.h +473 -0
  31. data/ext/hashset.c +170 -0
  32. data/ext/hashset.h +187 -0
  33. data/ext/header.h +58 -0
  34. data/ext/helper.c +62 -0
  35. data/ext/helper.h +13 -0
  36. data/ext/inc/lang.h +48 -0
  37. data/ext/inc/threading.h +31 -0
  38. data/ext/index.c +6425 -0
  39. data/ext/index.h +961 -0
  40. data/ext/lang.h +66 -0
  41. data/ext/libstemmer.c +92 -0
  42. data/ext/libstemmer.h +79 -0
  43. data/ext/mempool.c +87 -0
  44. data/ext/mempool.h +35 -0
  45. data/ext/modules.h +162 -0
  46. data/ext/multimapper.c +310 -0
  47. data/ext/multimapper.h +51 -0
  48. data/ext/posh.c +1006 -0
  49. data/ext/posh.h +1007 -0
  50. data/ext/priorityqueue.c +151 -0
  51. data/ext/priorityqueue.h +143 -0
  52. data/ext/q_boolean.c +1608 -0
  53. data/ext/q_const_score.c +161 -0
  54. data/ext/q_filtered_query.c +209 -0
  55. data/ext/q_fuzzy.c +268 -0
  56. data/ext/q_match_all.c +148 -0
  57. data/ext/q_multi_term.c +677 -0
  58. data/ext/q_parser.c +2825 -0
  59. data/ext/q_phrase.c +1126 -0
  60. data/ext/q_prefix.c +100 -0
  61. data/ext/q_range.c +350 -0
  62. data/ext/q_span.c +2402 -0
  63. data/ext/q_term.c +337 -0
  64. data/ext/q_wildcard.c +171 -0
  65. data/ext/r_analysis.c +2575 -0
  66. data/ext/r_index.c +3472 -0
  67. data/ext/r_qparser.c +585 -0
  68. data/ext/r_search.c +4105 -0
  69. data/ext/r_store.c +513 -0
  70. data/ext/r_utils.c +963 -0
  71. data/ext/ram_store.c +471 -0
  72. data/ext/search.c +1741 -0
  73. data/ext/search.h +885 -0
  74. data/ext/similarity.c +150 -0
  75. data/ext/similarity.h +82 -0
  76. data/ext/sort.c +983 -0
  77. data/ext/stem_ISO_8859_1_danish.c +338 -0
  78. data/ext/stem_ISO_8859_1_danish.h +16 -0
  79. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  80. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  81. data/ext/stem_ISO_8859_1_english.c +1156 -0
  82. data/ext/stem_ISO_8859_1_english.h +16 -0
  83. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  84. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  85. data/ext/stem_ISO_8859_1_french.c +1276 -0
  86. data/ext/stem_ISO_8859_1_french.h +16 -0
  87. data/ext/stem_ISO_8859_1_german.c +512 -0
  88. data/ext/stem_ISO_8859_1_german.h +16 -0
  89. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  90. data/ext/stem_ISO_8859_1_italian.h +16 -0
  91. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  92. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  93. data/ext/stem_ISO_8859_1_porter.c +776 -0
  94. data/ext/stem_ISO_8859_1_porter.h +16 -0
  95. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  96. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  97. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  98. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  99. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  100. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  101. data/ext/stem_KOI8_R_russian.c +701 -0
  102. data/ext/stem_KOI8_R_russian.h +16 -0
  103. data/ext/stem_UTF_8_danish.c +344 -0
  104. data/ext/stem_UTF_8_danish.h +16 -0
  105. data/ext/stem_UTF_8_dutch.c +653 -0
  106. data/ext/stem_UTF_8_dutch.h +16 -0
  107. data/ext/stem_UTF_8_english.c +1176 -0
  108. data/ext/stem_UTF_8_english.h +16 -0
  109. data/ext/stem_UTF_8_finnish.c +808 -0
  110. data/ext/stem_UTF_8_finnish.h +16 -0
  111. data/ext/stem_UTF_8_french.c +1296 -0
  112. data/ext/stem_UTF_8_french.h +16 -0
  113. data/ext/stem_UTF_8_german.c +526 -0
  114. data/ext/stem_UTF_8_german.h +16 -0
  115. data/ext/stem_UTF_8_italian.c +1113 -0
  116. data/ext/stem_UTF_8_italian.h +16 -0
  117. data/ext/stem_UTF_8_norwegian.c +302 -0
  118. data/ext/stem_UTF_8_norwegian.h +16 -0
  119. data/ext/stem_UTF_8_porter.c +794 -0
  120. data/ext/stem_UTF_8_porter.h +16 -0
  121. data/ext/stem_UTF_8_portuguese.c +1055 -0
  122. data/ext/stem_UTF_8_portuguese.h +16 -0
  123. data/ext/stem_UTF_8_russian.c +709 -0
  124. data/ext/stem_UTF_8_russian.h +16 -0
  125. data/ext/stem_UTF_8_spanish.c +1137 -0
  126. data/ext/stem_UTF_8_spanish.h +16 -0
  127. data/ext/stem_UTF_8_swedish.c +313 -0
  128. data/ext/stem_UTF_8_swedish.h +16 -0
  129. data/ext/stopwords.c +401 -0
  130. data/ext/store.c +692 -0
  131. data/ext/store.h +777 -0
  132. data/ext/term_vectors.c +352 -0
  133. data/ext/threading.h +31 -0
  134. data/ext/utilities.c +446 -0
  135. data/ext/win32.h +54 -0
  136. data/lib/ferret.rb +29 -0
  137. data/lib/ferret/browser.rb +246 -0
  138. data/lib/ferret/browser/s/global.js +192 -0
  139. data/lib/ferret/browser/s/style.css +148 -0
  140. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  141. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  142. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  143. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  144. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  145. data/lib/ferret/browser/views/layout.rhtml +22 -0
  146. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  147. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  148. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  149. data/lib/ferret/browser/webrick.rb +14 -0
  150. data/lib/ferret/document.rb +130 -0
  151. data/lib/ferret/field_infos.rb +44 -0
  152. data/lib/ferret/index.rb +786 -0
  153. data/lib/ferret/number_tools.rb +157 -0
  154. data/lib/ferret_version.rb +3 -0
  155. data/setup.rb +1555 -0
  156. data/test/test_all.rb +5 -0
  157. data/test/test_helper.rb +24 -0
  158. data/test/threading/number_to_spoken.rb +132 -0
  159. data/test/threading/thread_safety_index_test.rb +79 -0
  160. data/test/threading/thread_safety_read_write_test.rb +76 -0
  161. data/test/threading/thread_safety_test.rb +133 -0
  162. data/test/unit/analysis/tc_analyzer.rb +548 -0
  163. data/test/unit/analysis/tc_token_stream.rb +646 -0
  164. data/test/unit/index/tc_index.rb +762 -0
  165. data/test/unit/index/tc_index_reader.rb +699 -0
  166. data/test/unit/index/tc_index_writer.rb +437 -0
  167. data/test/unit/index/th_doc.rb +315 -0
  168. data/test/unit/largefile/tc_largefile.rb +46 -0
  169. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  170. data/test/unit/search/tc_filter.rb +135 -0
  171. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  172. data/test/unit/search/tc_index_searcher.rb +61 -0
  173. data/test/unit/search/tc_multi_searcher.rb +128 -0
  174. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  175. data/test/unit/search/tc_search_and_sort.rb +179 -0
  176. data/test/unit/search/tc_sort.rb +49 -0
  177. data/test/unit/search/tc_sort_field.rb +27 -0
  178. data/test/unit/search/tc_spans.rb +190 -0
  179. data/test/unit/search/tm_searcher.rb +384 -0
  180. data/test/unit/store/tc_fs_store.rb +77 -0
  181. data/test/unit/store/tc_ram_store.rb +35 -0
  182. data/test/unit/store/tm_store.rb +34 -0
  183. data/test/unit/store/tm_store_lock.rb +68 -0
  184. data/test/unit/tc_document.rb +81 -0
  185. data/test/unit/ts_analysis.rb +2 -0
  186. data/test/unit/ts_index.rb +2 -0
  187. data/test/unit/ts_largefile.rb +4 -0
  188. data/test/unit/ts_query_parser.rb +2 -0
  189. data/test/unit/ts_search.rb +2 -0
  190. data/test/unit/ts_store.rb +2 -0
  191. data/test/unit/ts_utils.rb +2 -0
  192. data/test/unit/utils/tc_bit_vector.rb +295 -0
  193. data/test/unit/utils/tc_number_tools.rb +117 -0
  194. data/test/unit/utils/tc_priority_queue.rb +106 -0
  195. metadata +285 -0
@@ -0,0 +1,1741 @@
1
+ #include <string.h>
2
+ #include <limits.h>
3
+ #include "search.h"
4
+ #include "array.h"
5
+
6
+ /***************************************************************************
7
+ *
8
+ * Explanation
9
+ *
10
+ ***************************************************************************/
11
+
12
+ Explanation *expl_new(float value, const char *description, ...)
13
+ {
14
+ Explanation *expl = ALLOC(Explanation);
15
+
16
+ va_list args;
17
+ va_start(args, description);
18
+ expl->description = vstrfmt(description, args);
19
+ va_end(args);
20
+
21
+ expl->value = value;
22
+ expl->details = ary_new_type_capa(Explanation *,
23
+ EXPLANATION_DETAILS_START_SIZE);
24
+ return expl;
25
+ }
26
+
27
+ void expl_destroy(Explanation *expl)
28
+ {
29
+ ary_destroy((void **)expl->details, (free_ft)expl_destroy);
30
+ free(expl->description);
31
+ free(expl);
32
+ }
33
+
34
+ Explanation *expl_add_detail(Explanation *expl, Explanation *detail)
35
+ {
36
+ ary_push(expl->details, detail);
37
+ return expl;
38
+ }
39
+
40
+ char *expl_to_s_depth(Explanation *expl, int depth)
41
+ {
42
+ int i;
43
+ char *buffer = ALLOC_N(char, depth * 2 + 1);
44
+ const int num_details = ary_size(expl->details);
45
+
46
+ memset(buffer, ' ', sizeof(char) * depth * 2);
47
+ buffer[depth*2] = 0;
48
+
49
+ buffer = estrcat(buffer, strfmt("%f = %s\n", expl->value, expl->description));
50
+ for (i = 0; i < num_details; i++) {
51
+ buffer = estrcat(buffer, expl_to_s_depth(expl->details[i], depth + 1));
52
+ }
53
+
54
+ return buffer;
55
+ }
56
+
57
+ char *expl_to_html(Explanation *expl)
58
+ {
59
+ int i;
60
+ char *buffer;
61
+ const int num_details = ary_size(expl->details);
62
+
63
+ buffer = strfmt("<ul>\n<li>%f = %s</li>\n", expl->value, expl->description);
64
+
65
+ for (i = 0; i < num_details; i++) {
66
+ estrcat(buffer, expl_to_html(expl->details[i]));
67
+ }
68
+
69
+ REALLOC_N(buffer, char, strlen(buffer) + 10);
70
+ return strcat(buffer, "</ul>\n");
71
+ }
72
+
73
+ /***************************************************************************
74
+ *
75
+ * Hit
76
+ *
77
+ ***************************************************************************/
78
+
79
+ static bool hit_less_than(const Hit *hit1, const Hit *hit2)
80
+ {
81
+ if (hit1->score == hit2->score) {
82
+ return hit1->doc > hit2->doc;
83
+ }
84
+ else {
85
+ return hit1->score < hit1->score;
86
+ }
87
+ }
88
+
89
+ static bool hit_lt(Hit *hit1, Hit *hit2)
90
+ {
91
+ if (hit1->score == hit2->score) {
92
+ return hit1->doc > hit2->doc;
93
+ }
94
+ else {
95
+ return hit1->score < hit2->score;
96
+ }
97
+ }
98
+
99
+ static void hit_pq_down(PriorityQueue *pq)
100
+ {
101
+ register int i = 1;
102
+ register int j = 2; /* i << 1; */
103
+ register int k = 3; /* j + 1; */
104
+ Hit **heap = (Hit **)pq->heap;
105
+ Hit *node = heap[i]; /* save top node */
106
+
107
+ if ((k <= pq->size) && hit_lt(heap[k], heap[j])) {
108
+ j = k;
109
+ }
110
+
111
+ while ((j <= pq->size) && hit_lt(heap[j], node)) {
112
+ heap[i] = heap[j]; /* shift up child */
113
+ i = j;
114
+ j = i << 1;
115
+ k = j + 1;
116
+ if ((k <= pq->size) && hit_lt(heap[k], heap[j])) {
117
+ j = k;
118
+ }
119
+ }
120
+ heap[i] = node;
121
+ }
122
+
123
+ static Hit *hit_pq_pop(PriorityQueue *pq)
124
+ {
125
+ if (pq->size > 0) {
126
+ Hit **heap = (Hit **)pq->heap;
127
+ Hit *result = heap[1]; /* save first value */
128
+ heap[1] = heap[pq->size]; /* move last to first */
129
+ heap[pq->size] = NULL;
130
+ pq->size--;
131
+ hit_pq_down(pq); /* adjust heap */
132
+ return result;
133
+ }
134
+ else {
135
+ return NULL;
136
+ }
137
+ }
138
+
139
+ static void hit_pq_up(PriorityQueue *pq)
140
+ {
141
+ Hit **heap = (Hit **)pq->heap;
142
+ Hit *node;
143
+ int i = pq->size;
144
+ int j = i >> 1;
145
+ node = heap[i];
146
+
147
+ while ((j > 0) && hit_lt(node, heap[j])) {
148
+ heap[i] = heap[j];
149
+ i = j;
150
+ j = j >> 1;
151
+ }
152
+ heap[i] = node;
153
+ }
154
+
155
+ static void hit_pq_insert(PriorityQueue *pq, Hit *hit)
156
+ {
157
+ if (pq->size < pq->capa) {
158
+ Hit *new_hit = ALLOC(Hit);
159
+ memcpy(new_hit, hit, sizeof(Hit));
160
+ pq->size++;
161
+ if (pq->size >= pq->mem_capa) {
162
+ pq->mem_capa <<= 1;
163
+ REALLOC_N(pq->heap, void *, pq->mem_capa);
164
+ }
165
+ pq->heap[pq->size] = new_hit;
166
+ hit_pq_up(pq);
167
+ }
168
+ else if (pq->size > 0 && hit_lt((Hit *)pq->heap[1], hit)) {
169
+ memcpy(pq->heap[1], hit, sizeof(Hit));
170
+ hit_pq_down(pq);
171
+ }
172
+ }
173
+
174
+ static void hit_pq_multi_insert(PriorityQueue *pq, Hit *hit)
175
+ {
176
+ hit_pq_insert(pq, hit);
177
+ free(hit);
178
+ }
179
+
180
+ /***************************************************************************
181
+ *
182
+ * TopDocs
183
+ *
184
+ ***************************************************************************/
185
+
186
+ TopDocs *td_new(int total_hits, int size, Hit **hits, float max_score)
187
+ {
188
+ TopDocs *td = ALLOC(TopDocs);
189
+ td->total_hits = total_hits;
190
+ td->size = size;
191
+ td->hits = hits;
192
+ td->max_score = max_score;
193
+ return td;
194
+ }
195
+
196
+ void td_destroy(TopDocs *td)
197
+ {
198
+ int i;
199
+
200
+ for (i = 0; i < td->size; i++) {
201
+ free(td->hits[i]);
202
+ }
203
+ free(td->hits);
204
+ free(td);
205
+ }
206
+
207
+ char *td_to_s(TopDocs *td)
208
+ {
209
+ int i;
210
+ Hit *hit;
211
+ char *buffer = strfmt("%d hits sorted by <score, doc_num>\n",
212
+ td->total_hits);
213
+ for (i = 0; i < td->size; i++) {
214
+ hit = td->hits[i];
215
+ estrcat(buffer, strfmt("\t%d:%f\n", hit->doc, hit->score));
216
+ }
217
+ return buffer;
218
+ }
219
+
220
+ /***************************************************************************
221
+ *
222
+ * Weight
223
+ *
224
+ ***************************************************************************/
225
+
226
+ Query *w_get_query(Weight *self)
227
+ {
228
+ return self->query;
229
+ }
230
+
231
+ float w_get_value(Weight *self)
232
+ {
233
+ return self->value;
234
+ }
235
+
236
+ float w_sum_of_squared_weights(Weight *self)
237
+ {
238
+ self->qweight = self->idf * self->query->boost;
239
+ return self->qweight * self->qweight; /* square it */
240
+ }
241
+
242
+ void w_normalize(Weight *self, float normalization_factor)
243
+ {
244
+ self->qnorm = normalization_factor;
245
+ self->qweight *= normalization_factor; /* normalize query weight */
246
+ self->value = self->qweight * self->idf;/* idf for document */
247
+ }
248
+
249
+ void w_destroy(Weight *self)
250
+ {
251
+ q_deref(self->query);
252
+ free(self);
253
+ }
254
+
255
+ Weight *w_create(size_t size, Query *query)
256
+ {
257
+ Weight *self = (Weight *)ecalloc(size);
258
+ #ifdef DEBUG
259
+ if (size < sizeof(Weight)) {
260
+ RAISE(FERRET_ERROR, "size of weight <%d> should be at least <%d>",
261
+ (int)size, (int)sizeof(Weight));
262
+ }
263
+ #endif
264
+ REF(query);
265
+ self->query = query;
266
+ self->get_query = &w_get_query;
267
+ self->get_value = &w_get_value;
268
+ self->normalize = &w_normalize;
269
+ self->destroy = &w_destroy;
270
+ self->sum_of_squared_weights = &w_sum_of_squared_weights;
271
+ return self;
272
+ }
273
+
274
+ /***************************************************************************
275
+ *
276
+ * Query
277
+ *
278
+ ***************************************************************************/
279
+
280
+ static const char *QUERY_NAMES[] = {
281
+ "TermQuery",
282
+ "MultiTermQuery",
283
+ "BooleanQuery",
284
+ "PhraseQuery",
285
+ "ConstantScoreQuery",
286
+ "FilteredQuery",
287
+ "MatchAllQuery",
288
+ "RangeQuery",
289
+ "WildCardQuery",
290
+ "FuzzyQuery",
291
+ "PrefixQuery",
292
+ "SpanTermQuery",
293
+ "SpanMultiTermQuery",
294
+ "SpanPrefixQuery",
295
+ "SpanFirstQuery",
296
+ "SpanOrQuery",
297
+ "SpanNotQuery",
298
+ "SpanNearQuery"
299
+ };
300
+
301
+ static const char *UNKNOWN_QUERY_NAME = "UnkownQuery";
302
+
303
+ const char *q_get_query_name(enum QUERY_TYPE type) {
304
+ if (type >= NELEMS(QUERY_NAMES)) {
305
+ return UNKNOWN_QUERY_NAME;
306
+ }
307
+ else {
308
+ return QUERY_NAMES[type];
309
+ }
310
+ }
311
+
312
+ static Query *q_rewrite(Query *self, IndexReader *ir)
313
+ {
314
+ (void)ir;
315
+ self->ref_cnt++;
316
+ return self;
317
+ }
318
+
319
+ static void q_extract_terms(Query *self, HashSet *terms)
320
+ {
321
+ /* do nothing by default */
322
+ (void)self;
323
+ (void)terms;
324
+ }
325
+
326
+ Similarity *q_get_similarity_i(Query *self, Searcher *searcher)
327
+ {
328
+ (void)self;
329
+ return searcher->get_similarity(searcher);
330
+ }
331
+
332
+ void q_destroy_i(Query *self)
333
+ {
334
+ free(self);
335
+ }
336
+
337
+ void q_deref(Query *self)
338
+ {
339
+ if (--(self->ref_cnt) == 0) {
340
+ self->destroy_i(self);
341
+ }
342
+ }
343
+
344
+ Weight *q_create_weight_unsup(Query *self, Searcher *searcher)
345
+ {
346
+ (void)self;
347
+ (void)searcher;
348
+ RAISE(UNSUPPORTED_ERROR,
349
+ "Create weight is unsupported for this type of query");
350
+ return NULL;
351
+ }
352
+
353
+ Weight *q_weight(Query *self, Searcher *searcher)
354
+ {
355
+ Query *query = searcher->rewrite(searcher, self);
356
+ Weight *weight = query->create_weight_i(query, searcher);
357
+ float sum = weight->sum_of_squared_weights(weight);
358
+ Similarity *sim = query->get_similarity(query, searcher);
359
+ float norm = sim_query_norm(sim, sum);
360
+ q_deref(query);
361
+
362
+ weight->normalize(weight, norm);
363
+ return self->weight = weight;
364
+ }
365
+
366
+ #define BQ(query) ((BooleanQuery *)(query))
367
+ Query *q_combine(Query **queries, int q_cnt)
368
+ {
369
+ int i;
370
+ Query *q, *ret_q;
371
+ HashSet *uniques = hs_new((hash_ft)&q_hash, (eq_ft)&q_eq, NULL);
372
+
373
+ for (i = 0; i < q_cnt; i++) {
374
+ q = queries[i];
375
+ if (q->type == BOOLEAN_QUERY) {
376
+ int j;
377
+ bool splittable = true;
378
+ if (BQ(q)->coord_disabled == false) {
379
+ splittable = false;
380
+ }
381
+ else {
382
+ for (j = 0; j < BQ(q)->clause_cnt; j++) {
383
+ if (BQ(q)->clauses[j]->occur != BC_SHOULD) {
384
+ splittable = false;
385
+ break;
386
+ }
387
+ }
388
+ }
389
+ if (splittable) {
390
+ for (j = 0; j < BQ(q)->clause_cnt; j++) {
391
+ Query *sub_q = BQ(q)->clauses[j]->query;
392
+ hs_add(uniques, sub_q);
393
+ }
394
+ }
395
+ else {
396
+ hs_add(uniques, q);
397
+ }
398
+ }
399
+ else {
400
+ hs_add(uniques, q);
401
+ }
402
+ }
403
+ if (uniques->size == 1) {
404
+ ret_q = (Query *)uniques->elems[0];
405
+ REF(ret_q);
406
+ }
407
+ else {
408
+ ret_q = bq_new(true);
409
+ for (i = 0; i < uniques->size; i++) {
410
+ q = (Query *)uniques->elems[i];
411
+ bq_add_query(ret_q, q, BC_SHOULD);
412
+ }
413
+ }
414
+ hs_destroy(uniques);
415
+
416
+ return ret_q;
417
+ }
418
+
419
+ unsigned long q_hash(Query *self)
420
+ {
421
+ return (self->hash(self) << 5) | self->type;
422
+ }
423
+
424
+ int q_eq(Query *self, Query *o)
425
+ {
426
+ return (self == o)
427
+ || ((self->type == o->type)
428
+ && (self->boost == o->boost)
429
+ && self->eq(self, o));
430
+ }
431
+
432
+ static MatchVector *q_get_matchv_i(Query *self, MatchVector *mv, TermVector *tv)
433
+ {
434
+ /* be default we don't add any matches */
435
+ (void)self; (void)tv;
436
+ return mv;
437
+ }
438
+
439
+ Query *q_create(size_t size)
440
+ {
441
+ Query *self = (Query *)ecalloc(size);
442
+ #ifdef DEBUG
443
+ if (size < sizeof(Query)) {
444
+ RAISE(FERRET_ERROR, "Size of a query <%d> should never be smaller than the "
445
+ "size of a Query struct <%d>", (int)size, (int)sizeof(Query));
446
+ }
447
+ #endif
448
+ self->boost = 1.0;
449
+ self->rewrite = &q_rewrite;
450
+ self->get_similarity = &q_get_similarity_i;
451
+ self->extract_terms = &q_extract_terms;
452
+ self->get_matchv_i = &q_get_matchv_i;
453
+ self->weight = NULL;
454
+ self->ref_cnt = 1;
455
+ return self;
456
+ }
457
+
458
+ /***************************************************************************
459
+ *
460
+ * Scorer
461
+ *
462
+ ***************************************************************************/
463
+
464
+ void scorer_destroy_i(Scorer *scorer)
465
+ {
466
+ free(scorer);
467
+ }
468
+
469
+ Scorer *scorer_create(size_t size, Similarity *similarity)
470
+ {
471
+ Scorer *self = (Scorer *)ecalloc(size);
472
+ #ifdef DEBUG
473
+ if (size < sizeof(Scorer)) {
474
+ RAISE(FERRET_ERROR, "size of scorer <%d> should be at least <%d>",
475
+ (int)size, (int)sizeof(Scorer));
476
+ }
477
+ #endif
478
+ self->destroy = &scorer_destroy_i;
479
+ self->similarity = similarity;
480
+ return self;
481
+ }
482
+
483
+ bool scorer_less_than(void *p1, void *p2)
484
+ {
485
+ Scorer *s1 = (Scorer *)p1;
486
+ Scorer *s2 = (Scorer *)p2;
487
+ return s1->score(s1) < s2->score(s2);
488
+ }
489
+
490
+ bool scorer_doc_less_than(const Scorer *s1, const Scorer *s2)
491
+ {
492
+ return s1->doc < s2->doc;
493
+ }
494
+
495
+ int scorer_doc_cmp(const void *p1, const void *p2)
496
+ {
497
+ return (*(Scorer **)p1)->doc - (*(Scorer **)p2)->doc;
498
+ }
499
+
500
+ /***************************************************************************
501
+ *
502
+ * Highlighter
503
+ *
504
+ ***************************************************************************/
505
+
506
+ /* ** MatchRange ** */
507
+ static int match_range_cmp(const void *p1, const void *p2)
508
+ {
509
+ int diff = ((MatchRange *)p1)->start - ((MatchRange *)p2)->start;
510
+ if (diff != 0) {
511
+ return diff;
512
+ }
513
+ else {
514
+ return ((MatchRange *)p2)->end - ((MatchRange *)p1)->end;
515
+ }
516
+ }
517
+
518
+
519
+
520
+ /* ** MatchVector ** */
521
+ MatchVector *matchv_new()
522
+ {
523
+ MatchVector *matchv = ALLOC(MatchVector);
524
+
525
+ matchv->size = 0;
526
+ matchv->capa = MATCH_VECTOR_INIT_CAPA;
527
+ matchv->matches = ALLOC_N(MatchRange, MATCH_VECTOR_INIT_CAPA);
528
+
529
+ return matchv;
530
+ }
531
+
532
+ MatchVector *matchv_add(MatchVector *self, int start, int end)
533
+ {
534
+ if (self->size >= self->capa) {
535
+ self->capa <<= 1;
536
+ REALLOC_N(self->matches, MatchRange, self->capa);
537
+ }
538
+ self->matches[self->size].start = start;
539
+ self->matches[self->size].end = end;
540
+ self->matches[self->size++].score = 1.0;
541
+ return self;
542
+ }
543
+
544
+ MatchVector *matchv_sort(MatchVector *self)
545
+ {
546
+ qsort(self->matches, self->size, sizeof(MatchRange), &match_range_cmp);
547
+ return self;
548
+ }
549
+
550
+ MatchVector *matchv_compact(MatchVector *self)
551
+ {
552
+ int left, right;
553
+ matchv_sort(self);
554
+ for (right = left = 0; right < self->size; right++) {
555
+ /* Note the end + 1. This compacts a range 3:5 and 6:8 inleft 3:8 */
556
+ if (self->matches[right].start > self->matches[left].end + 1) {
557
+ left++;
558
+ self->matches[left].start = self->matches[right].start;
559
+ self->matches[left].end = self->matches[right].end;
560
+ self->matches[left].score = self->matches[right].score;
561
+ }
562
+ else if (self->matches[right].end > self->matches[left].end) {
563
+ self->matches[left].end = self->matches[right].end;
564
+ }
565
+ else {
566
+ self->matches[left].score += self->matches[right].score;
567
+ }
568
+ }
569
+ self->size = left + 1;
570
+ return self;
571
+ }
572
+
573
+ MatchVector *matchv_compact_with_breaks(MatchVector *self)
574
+ {
575
+ int left, right;
576
+ matchv_sort(self);
577
+ for (right = left = 0; right < self->size; right++) {
578
+ /* Note: no end + 1. Unlike above won't compact ranges 3:5 and 6:8 */
579
+ if (self->matches[right].start > self->matches[left].end) {
580
+ left++;
581
+ self->matches[left].start = self->matches[right].start;
582
+ self->matches[left].end = self->matches[right].end;
583
+ self->matches[left].score = self->matches[right].score;
584
+ }
585
+ else if (self->matches[right].end > self->matches[left].end) {
586
+ self->matches[left].end = self->matches[right].end;
587
+ self->matches[left].score += self->matches[right].score;
588
+ }
589
+ else if (right > left) {
590
+ self->matches[left].score += self->matches[right].score;
591
+ }
592
+ }
593
+ self->size = left + 1;
594
+ return self;
595
+ }
596
+
597
+
598
+ static MatchVector *matchv_set_offsets(MatchVector *mv, Offset *offsets)
599
+ {
600
+ int i;
601
+ for (i = 0; i < mv->size; i++) {
602
+ mv->matches[i].start_offset = offsets[mv->matches[i].start].start;
603
+ mv->matches[i].end_offset = offsets[mv->matches[i].end].end;
604
+ }
605
+ return mv;
606
+ }
607
+
608
+ void matchv_destroy(MatchVector *self)
609
+ {
610
+ free(self->matches);
611
+ free(self);
612
+ }
613
+
614
+ /***************************************************************************
615
+ *
616
+ * Searcher
617
+ *
618
+ ***************************************************************************/
619
+
620
+ MatchVector *searcher_get_match_vector(Searcher *self,
621
+ Query *query,
622
+ const int doc_num,
623
+ const char *field)
624
+ {
625
+ MatchVector *mv = matchv_new();
626
+ bool rewrite = query->get_matchv_i == q_get_matchv_i;
627
+ TermVector *tv = self->get_term_vector(self, doc_num, field);
628
+ if (rewrite) {
629
+ query = self->rewrite(self, query);
630
+ }
631
+ if (tv && tv->term_cnt > 0 && tv->terms[0].positions != NULL) {
632
+ mv = query->get_matchv_i(query, mv, tv);
633
+ tv_destroy(tv);
634
+ }
635
+ if (rewrite) {
636
+ q_deref(query);
637
+ }
638
+ return mv;
639
+ }
640
+
641
+ typedef struct Excerpt
642
+ {
643
+ int start;
644
+ int end;
645
+ int start_pos;
646
+ int end_pos;
647
+ int start_offset;
648
+ int end_offset;
649
+ double score;
650
+ } Excerpt;
651
+
652
+ /*
653
+ static int excerpt_cmp(const void *p1, const void *p2)
654
+ {
655
+ double score1 = (*((Excerpt **)p1))->score;
656
+ double score2 = (*((Excerpt **)p2))->score;
657
+ if (score1 > score2) return 1;
658
+ if (score1 < score2) return -1;
659
+ return 0;
660
+ }
661
+ */
662
+
663
+ static int excerpt_start_cmp(const void *p1, const void *p2)
664
+ {
665
+ return (*((Excerpt **)p1))->start - (*((Excerpt **)p2))->start;
666
+ }
667
+
668
+ static int excerpt_lt(Excerpt *e1, Excerpt *e2)
669
+ {
670
+ return e1->score > e2->score; /* want the highest score at top */
671
+ }
672
+
673
+ static Excerpt *excerpt_new(int start, int end, double score)
674
+ {
675
+ Excerpt *excerpt = ALLOC_AND_ZERO(Excerpt);
676
+ excerpt->start = start;
677
+ excerpt->end = end;
678
+ excerpt->score = score;
679
+ return excerpt;
680
+ }
681
+
682
+ static Excerpt *excerpt_recalc_score(Excerpt *e, MatchVector *mv)
683
+ {
684
+ int i;
685
+ double score = 0.0;
686
+ for (i = e->start; i <= e->end; i++) {
687
+ score += mv->matches[i].score;
688
+ }
689
+ e->score = score;
690
+ return e;
691
+ }
692
+
693
+ /* expand an excerpt to it's largest possible size */
694
+ static Excerpt *excerpt_expand(Excerpt *e, const int len, TermVector *tv)
695
+ {
696
+ Offset *offsets = tv->offsets;
697
+ int offset_cnt = tv->offset_cnt;
698
+ bool did_expansion = true;
699
+ int i;
700
+ /* fill in skipped offsets */
701
+ for (i = 1; i < offset_cnt; i++) {
702
+ if (offsets[i].start == 0) {
703
+ offsets[i].start = offsets[i-1].start;
704
+ }
705
+ if (offsets[i].end == 0) {
706
+ offsets[i].end = offsets[i-1].end;
707
+ }
708
+ }
709
+
710
+ while (did_expansion) {
711
+ did_expansion = false;
712
+ if (e->start_pos > 0
713
+ && (e->end_offset - offsets[e->start_pos - 1].start) < len) {
714
+ e->start_pos--;
715
+ e->start_offset = offsets[e->start_pos].start;
716
+ did_expansion = true;
717
+ }
718
+ if (e->end_pos < (offset_cnt - 1)
719
+ && (offsets[e->end_pos + 1].end - e->start_offset) < len) {
720
+ e->end_pos++;
721
+ e->end_offset = offsets[e->end_pos].end;
722
+ did_expansion = true;
723
+ }
724
+ }
725
+ return e;
726
+ }
727
+
728
+ static char *excerpt_get_str(Excerpt *e, MatchVector *mv,
729
+ LazyDocField *lazy_df,
730
+ const char *pre_tag,
731
+ const char *post_tag,
732
+ const char *ellipsis)
733
+ {
734
+ int i, len;
735
+ int last_offset = e->start_offset;
736
+ const int num_matches = e->end - e->start + 1;
737
+ const int pre_tag_len = (int)strlen(pre_tag);
738
+ const int post_tag_len = (int)strlen(post_tag);
739
+ const int ellipsis_len = (int)strlen(ellipsis);
740
+ char *excerpt_str = ALLOC_N(char,
741
+ 10 + e->end_offset - e->start_offset
742
+ + (num_matches * (pre_tag_len + post_tag_len))
743
+ + (2 * ellipsis_len));
744
+ char *e_ptr = excerpt_str;
745
+ if (e->start_offset > 0) {
746
+ memcpy(e_ptr, ellipsis, ellipsis_len);
747
+ e_ptr += ellipsis_len;
748
+ }
749
+ for (i = e->start; i <= e->end; i++) {
750
+ MatchRange *mr = mv->matches + i;
751
+ len = mr->start_offset - last_offset;
752
+ if (len) {
753
+ lazy_df_get_bytes(lazy_df, e_ptr, last_offset, len);
754
+ e_ptr += len;
755
+ }
756
+ memcpy(e_ptr, pre_tag, pre_tag_len);
757
+ e_ptr += pre_tag_len;
758
+ len = mr->end_offset - mr->start_offset;
759
+ if (len) {
760
+ lazy_df_get_bytes(lazy_df, e_ptr, mr->start_offset, len);
761
+ e_ptr += len;
762
+ }
763
+ memcpy(e_ptr, post_tag, post_tag_len);
764
+ e_ptr += post_tag_len;
765
+ last_offset = mr->end_offset;
766
+ }
767
+ if ((lazy_df->len - e->end_offset) <= ellipsis_len) {
768
+ /* no point using ellipsis if it takes up more space */
769
+ e->end_offset = lazy_df->len;
770
+ }
771
+ len = e->end_offset - last_offset;
772
+ if (len) {
773
+ lazy_df_get_bytes(lazy_df, e_ptr, last_offset, len);
774
+ e_ptr += len;
775
+ }
776
+ if (e->end_offset < lazy_df->len) {
777
+ memcpy(e_ptr, ellipsis, ellipsis_len);
778
+ e_ptr += ellipsis_len;
779
+ }
780
+ *e_ptr = '\0';
781
+ return excerpt_str;
782
+ }
783
+
784
+ static char *highlight_field(MatchVector *mv,
785
+ LazyDocField *lazy_df,
786
+ TermVector *tv,
787
+ const char *pre_tag,
788
+ const char *post_tag)
789
+ {
790
+ const int pre_len = (int)strlen(pre_tag);
791
+ const int post_len = (int)strlen(post_tag);
792
+ char *excerpt_str =
793
+ ALLOC_N(char, 10 + lazy_df->len + (mv->size * (pre_len + post_len)));
794
+ if (mv->size > 0) {
795
+ int last_offset = 0;
796
+ int i, len;
797
+ char *e_ptr = excerpt_str;
798
+ matchv_compact_with_breaks(mv);
799
+ matchv_set_offsets(mv, tv->offsets);
800
+ for (i = 0; i < mv->size; i++) {
801
+ MatchRange *mr = mv->matches + i;
802
+ len = mr->start_offset - last_offset;
803
+ if (len) {
804
+ lazy_df_get_bytes(lazy_df, e_ptr, last_offset, len);
805
+ e_ptr += len;
806
+ }
807
+ memcpy(e_ptr, pre_tag, pre_len);
808
+ e_ptr += pre_len;
809
+ len = mr->end_offset - mr->start_offset;
810
+ if (len) {
811
+ lazy_df_get_bytes(lazy_df, e_ptr, mr->start_offset, len);
812
+ e_ptr += len;
813
+ }
814
+ memcpy(e_ptr, post_tag, post_len);
815
+ e_ptr += post_len;
816
+ last_offset = mr->end_offset;
817
+ }
818
+ len = lazy_df->len - last_offset;
819
+ if (len) {
820
+ lazy_df_get_bytes(lazy_df, e_ptr, last_offset, len);
821
+ e_ptr += len;
822
+ }
823
+ *e_ptr = '\0';
824
+ }
825
+ else {
826
+ lazy_df_get_bytes(lazy_df, excerpt_str, 0, lazy_df->len);
827
+ excerpt_str[lazy_df->len] = '\0';
828
+ }
829
+ return excerpt_str;
830
+ }
831
+
832
+ char **searcher_highlight(Searcher *self,
833
+ Query *query,
834
+ const int doc_num,
835
+ const char *field,
836
+ const int excerpt_len,
837
+ const int num_excerpts,
838
+ const char *pre_tag,
839
+ const char *post_tag,
840
+ const char *ellipsis)
841
+ {
842
+ char **excerpt_strs = NULL;
843
+ TermVector *tv = self->get_term_vector(self, doc_num, field);
844
+ LazyDoc *lazy_doc = self->get_lazy_doc(self, doc_num);
845
+ LazyDocField *lazy_df = NULL;
846
+ if (lazy_doc) {
847
+ lazy_df = h_get(lazy_doc->field_dict, field);
848
+ }
849
+ if (tv && lazy_df && tv->term_cnt > 0 && tv->terms[0].positions != NULL
850
+ && tv->offsets != NULL) {
851
+ MatchVector *mv;
852
+ query = self->rewrite(self, query);
853
+ mv = query->get_matchv_i(query, matchv_new(), tv);
854
+ q_deref(query);
855
+ if (lazy_df->len < (excerpt_len * num_excerpts)) {
856
+ excerpt_strs = ary_new_type_capa(char *, 1);
857
+ ary_push(excerpt_strs,
858
+ highlight_field(mv, lazy_df, tv, pre_tag, post_tag));
859
+ }
860
+ else if (mv->size > 0) {
861
+ Excerpt **excerpts = ALLOC_AND_ZERO_N(Excerpt *, num_excerpts);
862
+ int e_start, e_end, i, j;
863
+ MatchRange *matches = mv->matches;
864
+ double running_score = 0.0;
865
+ Offset *offsets = tv->offsets;
866
+ PriorityQueue *excerpt_pq;
867
+
868
+ matchv_compact_with_breaks(mv);
869
+ matchv_set_offsets(mv, offsets);
870
+ excerpt_pq = pq_new(mv->size, (lt_ft)&excerpt_lt, &free);
871
+ /* add all possible excerpts to the priority queue */
872
+
873
+ for (e_start = e_end = 0; e_start < mv->size; e_start++) {
874
+ const int start_offset = matches[e_start].start_offset;
875
+ if (e_start > e_end) {
876
+ running_score = 0.0;
877
+ e_end = e_start;
878
+ }
879
+ while (e_end < mv->size && (matches[e_end].end_offset
880
+ <= start_offset + excerpt_len)) {
881
+ running_score += matches[e_end].score;
882
+ e_end++;
883
+ }
884
+ pq_push(excerpt_pq,
885
+ excerpt_new(e_start, e_end - 1, running_score));
886
+ /* - 0.1 so that earlier matches take priority */
887
+ running_score -= matches[e_start].score;
888
+ }
889
+
890
+ for (i = 0; i < num_excerpts && excerpt_pq->size > 0; i++) {
891
+ excerpts[i] = pq_pop(excerpt_pq);
892
+ if (i < num_excerpts - 1) {
893
+ /* set match ranges alread included to 0 */
894
+ Excerpt *e = excerpts[i];
895
+ for (j = e->start; j <= e->end; j++) {
896
+ matches[j].score = 0.0;
897
+ }
898
+ e = NULL;
899
+ while (e != (Excerpt *)pq_top(excerpt_pq)) {
900
+ e = pq_top(excerpt_pq);
901
+ excerpt_recalc_score(e, mv);
902
+ pq_down(excerpt_pq);
903
+ }
904
+ }
905
+ }
906
+
907
+ qsort(excerpts, i, sizeof(Excerpt *), &excerpt_start_cmp);
908
+ for (j = 0; j < i; j++) {
909
+ Excerpt *e = excerpts[j];
910
+ e->start_pos = matches[e->start].start;
911
+ e->end_pos = matches[e->end].end;
912
+ e->start_offset = offsets[e->start_pos].start;
913
+ e->end_offset = offsets[e->end_pos].end;
914
+ }
915
+
916
+ if (i < num_excerpts) {
917
+ const int diff = num_excerpts - i;
918
+ memmove(excerpts + (diff), excerpts,
919
+ i * sizeof(Excerpt *));
920
+ for (j = 0; j < diff; j++) {
921
+ /* these new excerpts will grow into one long excerpt at
922
+ * the start */
923
+ excerpts[j] = ALLOC_AND_ZERO(Excerpt);
924
+ excerpts[j]->end = -1;
925
+ }
926
+ }
927
+
928
+ excerpt_strs = ary_new_type_capa(char *, num_excerpts);
929
+ /* merge excerpts where possible */
930
+ for (i = 0; i < num_excerpts;) {
931
+ Excerpt *ei = excerpts[i];
932
+ int merged = 1; /* 1 means a single excerpt, ie no merges */
933
+ for (j = i + 1; j < num_excerpts; j++) {
934
+ Excerpt *ej = excerpts[j];
935
+ if ((ej->end_offset - ei->start_offset)
936
+ < (j - i + 1) * excerpt_len) {
937
+ ei->end = ej->end;
938
+ ei->end_pos = ej->end_pos;
939
+ ei->end_offset = ej->end_offset;
940
+ merged = j - i + 1;
941
+ }
942
+ }
943
+ excerpt_expand(ei, merged * excerpt_len, tv);
944
+ ary_push(excerpt_strs,
945
+ excerpt_get_str(ei, mv, lazy_df,
946
+ pre_tag, post_tag, ellipsis));
947
+ i += merged;
948
+ }
949
+ for (i = 0; i < num_excerpts; i++) {
950
+ free(excerpts[i]);
951
+ }
952
+ free(excerpts);
953
+ pq_destroy(excerpt_pq);
954
+ }
955
+ matchv_destroy(mv);
956
+ }
957
+ if (tv) tv_destroy(tv);
958
+ if (lazy_doc) lazy_doc_close(lazy_doc);
959
+ return excerpt_strs;
960
+ }
961
+
962
+ static Weight *sea_create_weight(Searcher *self, Query *query)
963
+ {
964
+ return q_weight(query, self);
965
+ }
966
+
967
+ static void sea_check_args(int num_docs, int first_doc)
968
+ {
969
+ if (num_docs <= 0) {
970
+ RAISE(ARG_ERROR, ":num_docs was set to %d but should be greater "
971
+ "than 0 : %d <= 0", num_docs, num_docs);
972
+ }
973
+
974
+ if (first_doc < 0) {
975
+ RAISE(ARG_ERROR, ":first_doc was set to %d but should be greater "
976
+ "than or equal to 0 : %d < 0", first_doc, first_doc);
977
+ }
978
+ }
979
+
980
+ static Similarity *sea_get_similarity(Searcher *self)
981
+ {
982
+ return self->similarity;
983
+ }
984
+
985
+ /***************************************************************************
986
+ *
987
+ * IndexSearcher
988
+ *
989
+ ***************************************************************************/
990
+
991
+ #define ISEA(searcher) ((IndexSearcher *)(searcher))
992
+
993
+ int isea_doc_freq(Searcher *self, const char *field, const char *term)
994
+ {
995
+ return ir_doc_freq(ISEA(self)->ir, field, term);
996
+ }
997
+
998
+ static Document *isea_get_doc(Searcher *self, int doc_num)
999
+ {
1000
+ IndexReader *ir = ISEA(self)->ir;
1001
+ return ir->get_doc(ir, doc_num);
1002
+ }
1003
+
1004
+ static LazyDoc *isea_get_lazy_doc(Searcher *self, int doc_num)
1005
+ {
1006
+ IndexReader *ir = ISEA(self)->ir;
1007
+ return ir->get_lazy_doc(ir, doc_num);
1008
+ }
1009
+
1010
+ static int isea_max_doc(Searcher *self)
1011
+ {
1012
+ IndexReader *ir = ISEA(self)->ir;
1013
+ return ir->max_doc(ir);
1014
+ }
1015
+
1016
+ #define IS_FILTERED(bits, filter_func, scorer, searcher) \
1017
+ ((bits && !bv_get(bits, scorer->doc))\
1018
+ || (filter_func \
1019
+ && !filter_func(scorer->doc, scorer->score(scorer), searcher)))
1020
+
1021
+ static TopDocs *isea_search_w(Searcher *self,
1022
+ Weight *weight,
1023
+ int first_doc,
1024
+ int num_docs,
1025
+ Filter *filter,
1026
+ Sort *sort,
1027
+ filter_ft filter_func,
1028
+ bool load_fields)
1029
+ {
1030
+ int max_size = num_docs + (num_docs == INT_MAX ? 0 : first_doc);
1031
+ int i;
1032
+ Scorer *scorer;
1033
+ Hit **score_docs = NULL;
1034
+ Hit hit;
1035
+ int total_hits = 0;
1036
+ float score, max_score = 0.0;
1037
+ BitVector *bits = (filter
1038
+ ? filt_get_bv(filter, ISEA(self)->ir)
1039
+ : NULL);
1040
+ Hit *(*hq_pop)(PriorityQueue *pq);
1041
+ void (*hq_insert)(PriorityQueue *pq, Hit *hit);
1042
+ void (*hq_destroy)(PriorityQueue *self);
1043
+ PriorityQueue *hq;
1044
+
1045
+ sea_check_args(num_docs, first_doc);
1046
+
1047
+ scorer = weight->scorer(weight, ISEA(self)->ir);
1048
+ if (!scorer || 0 == ISEA(self)->ir->num_docs(ISEA(self)->ir)) {
1049
+ if (scorer) scorer->destroy(scorer);
1050
+ return td_new(0, 0, NULL, 0.0);
1051
+ }
1052
+
1053
+ if (sort) {
1054
+ hq = fshq_pq_new(max_size, sort, ISEA(self)->ir);
1055
+ hq_insert = &fshq_pq_insert;
1056
+ hq_destroy = &fshq_pq_destroy;
1057
+ if (load_fields) {
1058
+ hq_pop = &fshq_pq_pop_fd;
1059
+ }
1060
+ else {
1061
+ hq_pop = &fshq_pq_pop;
1062
+ }
1063
+ }
1064
+ else {
1065
+ hq = pq_new(max_size, (lt_ft)&hit_less_than, &free);
1066
+ hq_pop = &hit_pq_pop;
1067
+ hq_insert = &hit_pq_insert;
1068
+ hq_destroy = &pq_destroy;
1069
+ }
1070
+
1071
+ while (scorer->next(scorer)) {
1072
+ if (IS_FILTERED(bits, filter_func, scorer, self)) {
1073
+ continue;
1074
+ }
1075
+ total_hits++;
1076
+ score = scorer->score(scorer);
1077
+ if (score > max_score) max_score = score;
1078
+ hit.doc = scorer->doc; hit.score = score;
1079
+ hq_insert(hq, &hit);
1080
+ }
1081
+ scorer->destroy(scorer);
1082
+
1083
+ if (hq->size > first_doc) {
1084
+ if ((hq->size - first_doc) < num_docs) {
1085
+ num_docs = hq->size - first_doc;
1086
+ }
1087
+ score_docs = ALLOC_N(Hit *, num_docs);
1088
+ for (i = num_docs - 1; i >= 0; i--) {
1089
+ score_docs[i] = hq_pop(hq);
1090
+ /*
1091
+ printf("score_docs[i][%d] = [%ld] => %d-->%f\n", i,
1092
+ score_docs[i], score_docs[i]->doc, score_docs[i]->score);
1093
+ */
1094
+ }
1095
+ }
1096
+ else {
1097
+ num_docs = 0;
1098
+ }
1099
+ pq_clear(hq);
1100
+ hq_destroy(hq);
1101
+
1102
+ return td_new(total_hits, num_docs, score_docs, max_score);
1103
+ }
1104
+
1105
+ static TopDocs *isea_search(Searcher *self,
1106
+ Query *query,
1107
+ int first_doc,
1108
+ int num_docs,
1109
+ Filter *filter,
1110
+ Sort *sort,
1111
+ filter_ft filter_func,
1112
+ bool load_fields)
1113
+ {
1114
+ TopDocs *td;
1115
+ Weight *weight = q_weight(query, self);
1116
+ td = isea_search_w(self, weight, first_doc, num_docs, filter,
1117
+ sort, filter_func, load_fields);
1118
+ weight->destroy(weight);
1119
+ return td;
1120
+ }
1121
+
1122
+ static void isea_search_each_w(Searcher *self, Weight *weight, Filter *filter,
1123
+ filter_ft filter_func,
1124
+ void (*fn)(Searcher *, int, float, void *),
1125
+ void *arg)
1126
+ {
1127
+ Scorer *scorer;
1128
+ BitVector *bits = (filter
1129
+ ? filt_get_bv(filter, ISEA(self)->ir)
1130
+ : NULL);
1131
+
1132
+ scorer = weight->scorer(weight, ISEA(self)->ir);
1133
+ if (!scorer) {
1134
+ return;
1135
+ }
1136
+
1137
+ while (scorer->next(scorer)) {
1138
+ if (IS_FILTERED(bits, filter_func, scorer, self)) {
1139
+ continue;
1140
+ }
1141
+ fn(self, scorer->doc, scorer->score(scorer), arg);
1142
+ }
1143
+ scorer->destroy(scorer);
1144
+ }
1145
+
1146
+ static void isea_search_each(Searcher *self, Query *query, Filter *filter,
1147
+ filter_ft filter_func,
1148
+ void (*fn)(Searcher *, int, float, void *),
1149
+ void *arg)
1150
+ {
1151
+ Weight *weight = q_weight(query, self);
1152
+ isea_search_each_w(self, weight, filter, filter_func, fn, arg);
1153
+ weight->destroy(weight);
1154
+ }
1155
+
1156
+ static Query *isea_rewrite(Searcher *self, Query *original)
1157
+ {
1158
+ int q_is_destroyed = false;
1159
+ Query *query = original;
1160
+ Query *rewritten_query = query->rewrite(query, ISEA(self)->ir);
1161
+ while (q_is_destroyed || (query != rewritten_query)) {
1162
+ query = rewritten_query;
1163
+ rewritten_query = query->rewrite(query, ISEA(self)->ir);
1164
+ q_is_destroyed = (query->ref_cnt <= 1);
1165
+ q_deref(query); /* destroy intermediate queries */
1166
+ }
1167
+ return query;
1168
+ }
1169
+
1170
+ static Explanation *isea_explain(Searcher *self, Query *query, int doc_num)
1171
+ {
1172
+ Weight *weight = q_weight(query, self);
1173
+ Explanation *e = weight->explain(weight, ISEA(self)->ir, doc_num);
1174
+ weight->destroy(weight);
1175
+ return e;
1176
+ }
1177
+
1178
+ static Explanation *isea_explain_w(Searcher *self, Weight *w, int doc_num)
1179
+ {
1180
+ return w->explain(w, ISEA(self)->ir, doc_num);
1181
+ }
1182
+
1183
+ static TermVector *isea_get_term_vector(Searcher *self,
1184
+ const int doc_num,
1185
+ const char *field)
1186
+ {
1187
+ IndexReader *ir = ISEA(self)->ir;
1188
+ return ir->term_vector(ir, doc_num, field);
1189
+ }
1190
+
1191
+ static void isea_close(Searcher *self)
1192
+ {
1193
+ if (ISEA(self)->ir && ISEA(self)->close_ir) {
1194
+ ir_close(ISEA(self)->ir);
1195
+ }
1196
+ free(self);
1197
+ }
1198
+
1199
+ Searcher *isea_new(IndexReader *ir)
1200
+ {
1201
+ Searcher *self = (Searcher *)ecalloc(sizeof(IndexSearcher));
1202
+
1203
+ ISEA(self)->ir = ir;
1204
+ ISEA(self)->close_ir = true;
1205
+
1206
+ self->similarity = sim_create_default();
1207
+ self->doc_freq = &isea_doc_freq;
1208
+ self->get_doc = &isea_get_doc;
1209
+ self->get_lazy_doc = &isea_get_lazy_doc;
1210
+ self->max_doc = &isea_max_doc;
1211
+ self->create_weight = &sea_create_weight;
1212
+ self->search = &isea_search;
1213
+ self->search_w = &isea_search_w;
1214
+ self->search_each = &isea_search_each;
1215
+ self->search_each_w = &isea_search_each_w;
1216
+ self->rewrite = &isea_rewrite;
1217
+ self->explain = &isea_explain;
1218
+ self->explain_w = &isea_explain_w;
1219
+ self->get_term_vector = &isea_get_term_vector;
1220
+ self->get_similarity = &sea_get_similarity;
1221
+ self->close = &isea_close;
1222
+
1223
+ return self;
1224
+ }
1225
+
1226
+ /***************************************************************************
1227
+ *
1228
+ * CachedDFSearcher
1229
+ *
1230
+ ***************************************************************************/
1231
+
1232
+ #define CDFSEA(searcher) ((CachedDFSearcher *)(searcher))
1233
+ typedef struct CachedDFSearcher
1234
+ {
1235
+ Searcher super;
1236
+ HashTable *df_map;
1237
+ int max_doc;
1238
+ } CachedDFSearcher;
1239
+
1240
+ static int cdfsea_doc_freq(Searcher *self, const char *field, const char *text)
1241
+ {
1242
+ Term term;
1243
+ int *df;
1244
+ term.field = (char *)field;
1245
+ term.text = (char *)text;
1246
+ df = (int *)h_get(CDFSEA(self)->df_map, &term);
1247
+ return df ? *df : 0;
1248
+ }
1249
+
1250
+ static Document *cdfsea_get_doc(Searcher *self, int doc_num)
1251
+ {
1252
+ (void)self; (void)doc_num;
1253
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
1254
+ return NULL;
1255
+ }
1256
+
1257
+ static int cdfsea_max_doc(Searcher *self)
1258
+ {
1259
+ (void)self;
1260
+ return CDFSEA(self)->max_doc;
1261
+ }
1262
+
1263
+ static Weight *cdfsea_create_weight(Searcher *self, Query *query)
1264
+ {
1265
+ (void)self; (void)query;
1266
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
1267
+ return NULL;
1268
+ }
1269
+
1270
+ static TopDocs *cdfsea_search_w(Searcher *self, Weight *w, int fd, int nd,
1271
+ Filter *f, Sort *s, filter_ft ff, bool load)
1272
+ {
1273
+ (void)self; (void)w; (void)fd; (void)nd;
1274
+ (void)f; (void)s; (void)ff; (void)load;
1275
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
1276
+ return NULL;
1277
+ }
1278
+
1279
+ static TopDocs *cdfsea_search(Searcher *self, Query *q, int fd, int nd,
1280
+ Filter *f, Sort *s, filter_ft ff, bool load)
1281
+ {
1282
+ (void)self; (void)q; (void)fd; (void)nd;
1283
+ (void)f; (void)s; (void)ff; (void)load;
1284
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
1285
+ return NULL;
1286
+ }
1287
+
1288
+ static void cdfsea_search_each(Searcher *self, Query *query, Filter *filter,
1289
+ filter_ft ff,
1290
+ void (*fn)(Searcher *, int, float, void *),
1291
+ void *arg)
1292
+ {
1293
+ (void)self; (void)query; (void)filter; (void)ff; (void)fn; (void)arg;
1294
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
1295
+ }
1296
+
1297
+ static void cdfsea_search_each_w(Searcher *self, Weight *w, Filter *filter,
1298
+ filter_ft ff,
1299
+ void (*fn)(Searcher *, int, float, void *),
1300
+ void *arg)
1301
+ {
1302
+ (void)self; (void)w; (void)filter; (void)ff; (void)fn; (void)arg;
1303
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
1304
+ }
1305
+
1306
+ static Query *cdfsea_rewrite(Searcher *self, Query *original)
1307
+ {
1308
+ (void)self;
1309
+ original->ref_cnt++;
1310
+ return original;
1311
+ }
1312
+
1313
+ static Explanation *cdfsea_explain(Searcher *self, Query *query, int doc_num)
1314
+ {
1315
+ (void)self; (void)query; (void)doc_num;
1316
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
1317
+ return NULL;
1318
+ }
1319
+
1320
+ static Explanation *cdfsea_explain_w(Searcher *self, Weight *w, int doc_num)
1321
+ {
1322
+ (void)self; (void)w; (void)doc_num;
1323
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
1324
+ return NULL;
1325
+ }
1326
+
1327
+ static TermVector *cdfsea_get_term_vector(Searcher *self, const int doc_num,
1328
+ const char *field)
1329
+ {
1330
+ (void)self; (void)doc_num; (void)field;
1331
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
1332
+ return NULL;
1333
+ }
1334
+
1335
+ static Similarity *cdfsea_get_similarity(Searcher *self)
1336
+ {
1337
+ (void)self;
1338
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
1339
+ return NULL;
1340
+ }
1341
+
1342
+ static void cdfsea_close(Searcher *self)
1343
+ {
1344
+ h_destroy(CDFSEA(self)->df_map);
1345
+ free(self);
1346
+ }
1347
+
1348
+ static Searcher *cdfsea_new(HashTable *df_map, int max_doc)
1349
+ {
1350
+ Searcher *self = (Searcher *)ecalloc(sizeof(CachedDFSearcher));
1351
+
1352
+ CDFSEA(self)->df_map = df_map;
1353
+ CDFSEA(self)->max_doc = max_doc;
1354
+
1355
+ self->doc_freq = &cdfsea_doc_freq;
1356
+ self->get_doc = &cdfsea_get_doc;
1357
+ self->max_doc = &cdfsea_max_doc;
1358
+ self->create_weight = &cdfsea_create_weight;
1359
+ self->search = &cdfsea_search;
1360
+ self->search_w = &cdfsea_search_w;
1361
+ self->search_each = &cdfsea_search_each;
1362
+ self->search_each_w = &cdfsea_search_each_w;
1363
+ self->rewrite = &cdfsea_rewrite;
1364
+ self->explain = &cdfsea_explain;
1365
+ self->explain_w = &cdfsea_explain_w;
1366
+ self->get_term_vector = &cdfsea_get_term_vector;
1367
+ self->get_similarity = &cdfsea_get_similarity;
1368
+ self->close = &cdfsea_close;
1369
+ return self;
1370
+ }
1371
+
1372
+ /***************************************************************************
1373
+ *
1374
+ * MultiSearcher
1375
+ *
1376
+ ***************************************************************************/
1377
+
1378
+ #define MSEA(searcher) ((MultiSearcher *)(searcher))
1379
+ static INLINE int msea_get_searcher_index(Searcher *self, int n)
1380
+ {
1381
+ MultiSearcher *msea = MSEA(self);
1382
+ int lo = 0; /* search starts array */
1383
+ int hi = msea->s_cnt - 1; /* for 1st element < n, return its index */
1384
+ int mid, mid_val;
1385
+
1386
+ while (hi >= lo) {
1387
+ mid = (lo + hi) >> 1;
1388
+ mid_val = msea->starts[mid];
1389
+ if (n < mid_val) {
1390
+ hi = mid - 1;
1391
+ }
1392
+ else if (n > mid_val) {
1393
+ lo = mid + 1;
1394
+ }
1395
+ else { /* found a match */
1396
+ while (((mid+1) < msea->s_cnt)
1397
+ && (msea->starts[mid+1] == mid_val)) {
1398
+ mid++; /* scan to last match */
1399
+ }
1400
+ return mid;
1401
+ }
1402
+ }
1403
+ return hi;
1404
+ }
1405
+
1406
+ static int msea_doc_freq(Searcher *self, const char *field, const char *term)
1407
+ {
1408
+ int i;
1409
+ int doc_freq = 0;
1410
+ MultiSearcher *msea = MSEA(self);
1411
+ for (i = 0; i < msea->s_cnt; i++) {
1412
+ Searcher *s = msea->searchers[i];
1413
+ doc_freq += s->doc_freq(s, field, term);
1414
+ }
1415
+
1416
+ return doc_freq;
1417
+ }
1418
+
1419
+ static Document *msea_get_doc(Searcher *self, int doc_num)
1420
+ {
1421
+ MultiSearcher *msea = MSEA(self);
1422
+ int i = msea_get_searcher_index(self, doc_num);
1423
+ Searcher *s = msea->searchers[i];
1424
+ return s->get_doc(s, doc_num - msea->starts[i]);
1425
+ }
1426
+
1427
+ static LazyDoc *msea_get_lazy_doc(Searcher *self, int doc_num)
1428
+ {
1429
+ MultiSearcher *msea = MSEA(self);
1430
+ int i = msea_get_searcher_index(self, doc_num);
1431
+ Searcher *s = msea->searchers[i];
1432
+ return s->get_lazy_doc(s, doc_num - msea->starts[i]);
1433
+ }
1434
+
1435
+ static int msea_max_doc(Searcher *self)
1436
+ {
1437
+ return MSEA(self)->max_doc;
1438
+ }
1439
+
1440
+ static int *msea_get_doc_freqs(Searcher *self, HashSet *terms)
1441
+ {
1442
+ int i;
1443
+ const int num_terms = terms->size;
1444
+ int *doc_freqs = ALLOC_N(int, num_terms);
1445
+ for (i = 0; i < num_terms; i++) {
1446
+ Term *t = (Term *)terms->elems[i];
1447
+ doc_freqs[i] = msea_doc_freq(self, t->field, t->text);
1448
+ }
1449
+ return doc_freqs;
1450
+ }
1451
+
1452
+ static Weight *msea_create_weight(Searcher *self, Query *query)
1453
+ {
1454
+ int i, *doc_freqs;
1455
+ Searcher *cdfsea;
1456
+ Weight *w;
1457
+ HashTable *df_map = h_new((hash_ft)&term_hash, (eq_ft)&term_eq,
1458
+ (free_ft)NULL, free);
1459
+ Query *rewritten_query = self->rewrite(self, query);
1460
+ HashSet *terms = term_set_new();
1461
+
1462
+ rewritten_query->extract_terms(rewritten_query, terms);
1463
+ doc_freqs = msea_get_doc_freqs(self, terms);
1464
+
1465
+ for (i = 0; i < terms->size; i++) {
1466
+ h_set(df_map, terms->elems[i], imalloc(doc_freqs[i]));
1467
+ }
1468
+ hs_destroy(terms);
1469
+ free(doc_freqs);
1470
+
1471
+ cdfsea = cdfsea_new(df_map, MSEA(self)->max_doc);
1472
+
1473
+ w = q_weight(rewritten_query, cdfsea);
1474
+ q_deref(rewritten_query);
1475
+ cdfsea->close(cdfsea);
1476
+
1477
+ return w;
1478
+ }
1479
+
1480
+ struct MultiSearchEachArg {
1481
+ int start;
1482
+ void *arg;
1483
+ void (*fn)(Searcher *, int, float, void *);
1484
+ };
1485
+
1486
+ void msea_search_each_i(Searcher *self, int doc_num, float score, void *arg)
1487
+ {
1488
+ struct MultiSearchEachArg *mse_arg = (struct MultiSearchEachArg *)arg;
1489
+
1490
+ mse_arg->fn(self, doc_num + mse_arg->start, score, mse_arg->arg);
1491
+ }
1492
+
1493
+ static void msea_search_each_w(Searcher *self, Weight *w, Filter *filter,
1494
+ filter_ft filter_func,
1495
+ void (*fn)(Searcher *, int, float, void *),
1496
+ void *arg)
1497
+ {
1498
+ int i;
1499
+ struct MultiSearchEachArg mse_arg;
1500
+ MultiSearcher *msea = MSEA(self);
1501
+ Searcher *s;
1502
+
1503
+ mse_arg.fn = fn;
1504
+ mse_arg.arg = arg;
1505
+ for (i = 0; i < msea->s_cnt; i++) {
1506
+ s = msea->searchers[i];
1507
+ mse_arg.start = msea->starts[i];
1508
+ s->search_each_w(s, w, filter, filter_func,
1509
+ &msea_search_each_i, &mse_arg);
1510
+ }
1511
+ }
1512
+
1513
+ static void msea_search_each(Searcher *self, Query *query, Filter *filter,
1514
+ filter_ft filter_func,
1515
+ void (*fn)(Searcher *, int, float, void *), void *arg)
1516
+ {
1517
+ Weight *w = q_weight(query, self);
1518
+ msea_search_each_w(self, w, filter, filter_func, fn, arg);
1519
+ w->destroy(w);
1520
+ }
1521
+
1522
+ struct MultiSearchArg {
1523
+ int total_hits, max_size;
1524
+ PriorityQueue *hq;
1525
+ void (*hq_insert)(PriorityQueue *pq, Hit *hit);
1526
+ };
1527
+
1528
+ void msea_search_i(Searcher *self, int doc_num, float score, void *arg)
1529
+ {
1530
+ struct MultiSearchArg *ms_arg = (struct MultiSearchArg *)arg;
1531
+ Hit hit;
1532
+ (void)self;
1533
+
1534
+ ms_arg->total_hits++;
1535
+ hit.doc = doc_num;
1536
+ hit.score = score;
1537
+ ms_arg->hq_insert(ms_arg->hq, &hit);
1538
+ }
1539
+
1540
+ static TopDocs *msea_search_w(Searcher *self,
1541
+ Weight *weight,
1542
+ int first_doc,
1543
+ int num_docs,
1544
+ Filter *filter,
1545
+ Sort *sort,
1546
+ filter_ft filter_func,
1547
+ bool load_fields)
1548
+ {
1549
+ int max_size = num_docs + (num_docs == INT_MAX ? 0 : first_doc);
1550
+ int i;
1551
+ int total_hits = 0;
1552
+ Hit **score_docs = NULL;
1553
+ Hit *(*hq_pop)(PriorityQueue *pq);
1554
+ void (*hq_insert)(PriorityQueue *pq, Hit *hit);
1555
+ PriorityQueue *hq;
1556
+ float max_score = 0.0;
1557
+ (void)load_fields; /* does it automatically */
1558
+
1559
+ sea_check_args(num_docs, first_doc);
1560
+
1561
+ if (sort) {
1562
+ hq = pq_new(max_size, (lt_ft)fdshq_lt, &free);
1563
+ hq_insert = (void (*)(PriorityQueue *pq, Hit *hit))&pq_insert;
1564
+ hq_pop = (Hit *(*)(PriorityQueue *pq))&pq_pop;
1565
+ }
1566
+ else {
1567
+ hq = pq_new(max_size, (lt_ft)&hit_less_than, &free);
1568
+ hq_insert = &hit_pq_multi_insert;
1569
+ hq_pop = &hit_pq_pop;
1570
+ }
1571
+
1572
+ /*if (sort) printf("sort = %s\n", sort_to_s(sort)); */
1573
+ for (i = 0; i < MSEA(self)->s_cnt; i++) {
1574
+ Searcher *s = MSEA(self)->searchers[i];
1575
+ TopDocs *td = s->search_w(s, weight, 0, max_size,
1576
+ filter, sort, filter_func, true);
1577
+ /*if (sort) printf("sort = %s\n", sort_to_s(sort)); */
1578
+ if (td->size > 0) {
1579
+ /*printf("td->size = %d %d\n", td->size, num_docs); */
1580
+ int j;
1581
+ int start = MSEA(self)->starts[i];
1582
+ for (j = 0; j < td->size; j++) {
1583
+ Hit *hit = td->hits[j];
1584
+ hit->doc += start;
1585
+ /*
1586
+ printf("adding hit = %d:%f\n", hit->doc, hit->score);
1587
+ */
1588
+ hq_insert(hq, hit);
1589
+ }
1590
+ td->size = 0;
1591
+ if (td->max_score > max_score) max_score = td->max_score;
1592
+ }
1593
+ total_hits += td->total_hits;
1594
+ td_destroy(td);
1595
+ }
1596
+
1597
+ if (hq->size > first_doc) {
1598
+ if ((hq->size - first_doc) < num_docs) {
1599
+ num_docs = hq->size - first_doc;
1600
+ }
1601
+ score_docs = ALLOC_N(Hit *, num_docs);
1602
+ for (i = num_docs - 1; i >= 0; i--) {
1603
+ score_docs[i] = hq_pop(hq);
1604
+ /*
1605
+ Hit *hit = score_docs[i] = hq_pop(hq);
1606
+ printf("popped hit = %d-->%f\n", hit->doc, hit->score);
1607
+ */
1608
+ }
1609
+ }
1610
+ else {
1611
+ num_docs = 0;
1612
+ }
1613
+ pq_clear(hq);
1614
+ pq_destroy(hq);
1615
+
1616
+ return td_new(total_hits, num_docs, score_docs, max_score);
1617
+ }
1618
+
1619
+ static TopDocs *msea_search(Searcher *self,
1620
+ Query *query,
1621
+ int first_doc,
1622
+ int num_docs,
1623
+ Filter *filter,
1624
+ Sort *sort,
1625
+ filter_ft filter_func,
1626
+ bool load_fields)
1627
+ {
1628
+ TopDocs *td;
1629
+ Weight *weight = q_weight(query, self);
1630
+ td = msea_search_w(self, weight, first_doc, num_docs, filter,
1631
+ sort, filter_func, load_fields);
1632
+ weight->destroy(weight);
1633
+ return td;
1634
+ }
1635
+
1636
+ static Query *msea_rewrite(Searcher *self, Query *original)
1637
+ {
1638
+ int i;
1639
+ Searcher *s;
1640
+ MultiSearcher *msea = MSEA(self);
1641
+ Query **queries = ALLOC_N(Query *, msea->s_cnt), *rewritten;
1642
+
1643
+ for (i = 0; i < msea->s_cnt; i++) {
1644
+ s = msea->searchers[i];
1645
+ queries[i] = s->rewrite(s, original);
1646
+ }
1647
+ rewritten = q_combine(queries, msea->s_cnt);
1648
+
1649
+ for (i = 0; i < msea->s_cnt; i++) {
1650
+ q_deref(queries[i]);
1651
+ }
1652
+ free(queries);
1653
+ return rewritten;
1654
+ }
1655
+
1656
+ static Explanation *msea_explain(Searcher *self, Query *query, int doc_num)
1657
+ {
1658
+ MultiSearcher *msea = MSEA(self);
1659
+ int i = msea_get_searcher_index(self, doc_num);
1660
+ Weight *w = q_weight(query, self);
1661
+ Searcher *s = msea->searchers[i];
1662
+ Explanation *e = s->explain_w(s, w, doc_num - msea->starts[i]);
1663
+ w->destroy(w);
1664
+ return e;
1665
+ }
1666
+
1667
+ static Explanation *msea_explain_w(Searcher *self, Weight *w, int doc_num)
1668
+ {
1669
+ MultiSearcher *msea = MSEA(self);
1670
+ int i = msea_get_searcher_index(self, doc_num);
1671
+ Searcher *s = msea->searchers[i];
1672
+ Explanation *e = s->explain_w(s, w, doc_num - msea->starts[i]);
1673
+ return e;
1674
+ }
1675
+
1676
+ static TermVector *msea_get_term_vector(Searcher *self, const int doc_num,
1677
+ const char *field)
1678
+ {
1679
+ MultiSearcher *msea = MSEA(self);
1680
+ int i = msea_get_searcher_index(self, doc_num);
1681
+ Searcher *s = msea->searchers[i];
1682
+ return s->get_term_vector(s, doc_num - msea->starts[i],
1683
+ field);
1684
+ }
1685
+
1686
+ static Similarity *msea_get_similarity(Searcher *self)
1687
+ {
1688
+ return self->similarity;
1689
+ }
1690
+
1691
+ static void msea_close(Searcher *self)
1692
+ {
1693
+ int i;
1694
+ Searcher *s;
1695
+ MultiSearcher *msea = MSEA(self);
1696
+ if (msea->close_subs) {
1697
+ for (i = 0; i < msea->s_cnt; i++) {
1698
+ s = msea->searchers[i];
1699
+ s->close(s);
1700
+ }
1701
+ }
1702
+ free(msea->searchers);
1703
+ free(msea->starts);
1704
+ free(self);
1705
+ }
1706
+
1707
+ Searcher *msea_new(Searcher **searchers, int s_cnt, bool close_subs)
1708
+ {
1709
+ int i, max_doc = 0;
1710
+ Searcher *self = (Searcher *)ecalloc(sizeof(MultiSearcher));
1711
+ int *starts = ALLOC_N(int, s_cnt + 1);
1712
+ for (i = 0; i < s_cnt; i++) {
1713
+ starts[i] = max_doc;
1714
+ max_doc += searchers[i]->max_doc(searchers[i]);
1715
+ }
1716
+ starts[i] = max_doc;
1717
+
1718
+ MSEA(self)->s_cnt = s_cnt;
1719
+ MSEA(self)->searchers = searchers;
1720
+ MSEA(self)->starts = starts;
1721
+ MSEA(self)->max_doc = max_doc;
1722
+ MSEA(self)->close_subs = close_subs;
1723
+
1724
+ self->similarity = sim_create_default();
1725
+ self->doc_freq = &msea_doc_freq;
1726
+ self->get_doc = &msea_get_doc;
1727
+ self->get_lazy_doc = &msea_get_lazy_doc;
1728
+ self->max_doc = &msea_max_doc;
1729
+ self->create_weight = &msea_create_weight;
1730
+ self->search = &msea_search;
1731
+ self->search_w = &msea_search_w;
1732
+ self->search_each = &msea_search_each;
1733
+ self->search_each_w = &msea_search_each_w;
1734
+ self->rewrite = &msea_rewrite;
1735
+ self->explain = &msea_explain;
1736
+ self->explain_w = &msea_explain_w;
1737
+ self->get_term_vector = &msea_get_term_vector;
1738
+ self->get_similarity = &msea_get_similarity;
1739
+ self->close = &msea_close;
1740
+ return self;
1741
+ }