ferret 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/r_store.c CHANGED
@@ -6,6 +6,10 @@ VALUE cDirectory;
6
6
  VALUE cRAMDirectory;
7
7
  VALUE cFSDirectory;
8
8
 
9
+
10
+ static ID id_mkdir_p;
11
+ static ID id_is_directory;
12
+
9
13
  /****************************************************************************
10
14
  *
11
15
  * Lock Methods
@@ -87,11 +91,10 @@ frt_lock_release(VALUE self)
87
91
  ****************************************************************************/
88
92
 
89
93
  void
90
- frt_dir_free(void *p)
94
+ frt_dir_free(Store *store)
91
95
  {
92
- Store *store = (Store *)p;
93
- object_del(p);
94
- store->close(store);
96
+ object_del(store);
97
+ store_deref(store);
95
98
  }
96
99
 
97
100
  #define GET_STORE Store *store; Data_Get_Struct(self, Store, store)
@@ -99,10 +102,11 @@ static VALUE
99
102
  frt_dir_close(VALUE self)
100
103
  {
101
104
  /*
105
+ * No need to do anything here. Leave it do the garbage collector
102
106
  GET_STORE;
103
107
  Frt_Unwrap_Struct(self);
104
108
  object_del(store);
105
- store->close(store);
109
+ store_deref(store);
106
110
  */
107
111
  return Qnil;
108
112
  }
@@ -212,11 +216,23 @@ frt_fsdir_new(VALUE klass, VALUE rpath, VALUE rcreate)
212
216
  Store *store;
213
217
  bool create = RTEST(rcreate);
214
218
  rpath = rb_obj_as_string(rpath);
219
+ if (create) {
220
+ VALUE mFileUtils;
221
+ rb_require("fileutils");
222
+ mFileUtils = rb_define_module("FileUtils");
223
+ rb_funcall(mFileUtils, id_mkdir_p, 1, rpath);
224
+ }
225
+ if (!rb_funcall(rb_cFile, id_is_directory, 1, rpath)) {
226
+ rb_raise(rb_eIOError, "There is no directory: %s. Use create = true to "
227
+ "create one.", RSTRING(rpath)->ptr);
228
+ }
215
229
  store = open_fs_store(RSTRING(rpath)->ptr);
216
230
  if (create) store->clear_all(store);
217
231
  if ((self = object_get(store)) == Qnil) {
218
232
  self = Data_Wrap_Struct(klass, NULL, &frt_dir_free, store);
219
233
  object_add(store, self);
234
+ } else {
235
+ store_deref(store);
220
236
  }
221
237
  return self;
222
238
  }
@@ -227,19 +243,12 @@ frt_fsdir_new(VALUE klass, VALUE rpath, VALUE rcreate)
227
243
  *
228
244
  ****************************************************************************/
229
245
 
230
- #define DIR_METHODS(dir)\
231
- rb_define_method(dir, "close", frt_dir_close, 0);\
232
- rb_define_method(dir, "exists?", frt_dir_exists, 1);\
233
- rb_define_method(dir, "touch", frt_dir_touch, 1);\
234
- rb_define_method(dir, "delete", frt_dir_delete, 1);\
235
- rb_define_method(dir, "file_count", frt_dir_file_count, 0);\
236
- rb_define_method(dir, "refresh", frt_dir_refresh, 0);\
237
- rb_define_method(dir, "rename", frt_dir_rename, 2);\
238
- rb_define_method(dir, "make_lock", frt_dir_make_lock, 1);
239
-
240
246
  void
241
247
  Init_dir(void)
242
248
  {
249
+ id_mkdir_p = rb_intern("mkdir_p");
250
+ id_is_directory = rb_intern("directory?");
251
+
243
252
  cLock = rb_define_class_under(mStore, "Lock", rb_cObject);
244
253
  rb_define_method(cLock, "obtain", frt_lock_obtain, -1);
245
254
  rb_define_method(cLock, "while_locked", frt_lock_while_locked, -1);
@@ -248,16 +257,22 @@ Init_dir(void)
248
257
 
249
258
  cDirectory = rb_define_class_under(mStore, "Directory", rb_cObject);
250
259
  rb_define_const(cDirectory, "LOCK_PREFIX", rb_str_new2(LOCK_PREFIX));
260
+ rb_define_method(cDirectory, "close", frt_dir_close, 0);\
261
+ rb_define_method(cDirectory, "exists?", frt_dir_exists, 1);\
262
+ rb_define_method(cDirectory, "touch", frt_dir_touch, 1);\
263
+ rb_define_method(cDirectory, "delete", frt_dir_delete, 1);\
264
+ rb_define_method(cDirectory, "file_count", frt_dir_file_count, 0);\
265
+ rb_define_method(cDirectory, "refresh", frt_dir_refresh, 0);\
266
+ rb_define_method(cDirectory, "rename", frt_dir_rename, 2);\
267
+ rb_define_method(cDirectory, "make_lock", frt_dir_make_lock, 1);
251
268
 
252
269
  /* RAMDirectory */
253
270
  cRAMDirectory = rb_define_class_under(mStore, "RAMDirectory", cDirectory);
254
271
  rb_define_alloc_func(cRAMDirectory, frt_data_alloc);
255
272
  rb_define_method(cRAMDirectory, "initialize", frt_ramdir_init, -1);
256
- DIR_METHODS(cRAMDirectory);
257
273
 
258
274
  /* FSDirectory */
259
275
  cFSDirectory = rb_define_class_under(mStore, "FSDirectory", cDirectory);
260
276
  rb_define_alloc_func(cFSDirectory, frt_data_alloc);
261
277
  rb_define_singleton_method(cFSDirectory, "new", frt_fsdir_new, 2);
262
- DIR_METHODS(cFSDirectory);
263
278
  }
data/ext/ram_store.c CHANGED
@@ -4,12 +4,14 @@
4
4
  static char * const RENAME_ERROR_MSG = "tried to rename a file that doesn't exist";
5
5
  static char * const MISSING_RAMFILE_ERROR_MSG ="Couldn't open the ram file to read";
6
6
 
7
+ extern void store_destroy(Store *store);
8
+
7
9
  typedef struct RamFile {
8
10
  char *name;
9
11
  uchar **buffers;
10
12
  int bufcnt;
11
13
  int len;
12
- int refcnt;
14
+ int ref_cnt;
13
15
  bool alive;
14
16
  } RamFile;
15
17
 
@@ -21,7 +23,7 @@ RamFile *rf_create(const char *name)
21
23
  rf->name = estrdup(name);
22
24
  rf->len = 0;
23
25
  rf->bufcnt = 1;
24
- rf->refcnt = 0;
26
+ rf->ref_cnt = 0;
25
27
  rf->alive = true;
26
28
  return rf;
27
29
  }
@@ -38,7 +40,7 @@ void rf_close(void *p)
38
40
  {
39
41
  int i;
40
42
  RamFile *rf = (RamFile *)p;
41
- if (rf->refcnt > 0 || rf->alive) return;
43
+ if (rf->ref_cnt > 0 || rf->alive) return;
42
44
  free(rf->name);
43
45
  for (i = 0; i < rf->bufcnt; i++) {
44
46
  free(rf->buffers[i]);
@@ -76,17 +78,21 @@ int ram_remove(Store *store, char *filename)
76
78
  int ram_rename(Store *store, char *from, char *to)
77
79
  {
78
80
  RamFile *rf = (RamFile *)h_rem(store->dir.ht, from, false);
79
- if (rf == NULL)
81
+ RamFile *tmp;
82
+
83
+ if (rf == NULL) {
80
84
  RAISE(IO_ERROR, RENAME_ERROR_MSG);
85
+ }
81
86
 
82
87
  free(rf->name);
83
88
 
84
89
  rf->name = estrdup(to);
85
90
 
86
- // clean up the file we are overwriting
87
- RamFile *tmp = (RamFile *)h_get(store->dir.ht, to);
88
- if (tmp != NULL)
91
+ /* clean up the file we are overwriting */
92
+ tmp = (RamFile *)h_get(store->dir.ht, to);
93
+ if (tmp != NULL) {
89
94
  tmp->alive = false;
95
+ }
90
96
 
91
97
  h_set(store->dir.ht, rf->name, rf);
92
98
  return true;
@@ -112,7 +118,7 @@ void ram_each(Store *store, void (*func)(char *fname, void *arg), void *arg)
112
118
  }
113
119
  }
114
120
 
115
- void ram_close(Store *store)
121
+ void ram_close_i(Store *store)
116
122
  {
117
123
  HshTable *ht = store->dir.ht;
118
124
  RamFile *rf;
@@ -185,6 +191,7 @@ int ramo_length(OutStream *os)
185
191
 
186
192
  void ramo_flush_internal(OutStream *os, uchar *src, int len)
187
193
  {
194
+ uchar *buffer;
188
195
  RamFile *rf = (RamFile *)os->file;
189
196
  int buffer_number, buffer_offset, bytes_in_buffer, bytes_to_copy;
190
197
  int src_offset;
@@ -197,7 +204,7 @@ void ramo_flush_internal(OutStream *os, uchar *src, int len)
197
204
 
198
205
  rf_extend_if_necessary(rf, buffer_number);
199
206
 
200
- uchar *buffer = rf->buffers[buffer_number];
207
+ buffer = rf->buffers[buffer_number];
201
208
  memcpy(buffer + buffer_offset, src, bytes_to_copy);
202
209
 
203
210
  if (bytes_to_copy < len) {
@@ -230,7 +237,7 @@ void ramo_reset(OutStream *os)
230
237
  void ramo_close_internal(OutStream *os)
231
238
  {
232
239
  RamFile *rf = (RamFile *)os->file;
233
- rf->refcnt--;
240
+ rf->ref_cnt--;
234
241
  rf_close(rf);
235
242
  }
236
243
 
@@ -238,9 +245,12 @@ void ramo_write_to(OutStream *os, OutStream *other_o)
238
245
  {
239
246
  int i, len;
240
247
  RamFile *rf = (RamFile *)os->file;
248
+ int last_buffer_number;
249
+ int last_buffer_offset;
250
+
241
251
  os_flush(os);
242
- int last_buffer_number = (int)(rf->len / BUFFER_SIZE);
243
- int last_buffer_offset = rf->len % BUFFER_SIZE;
252
+ last_buffer_number = (int)(rf->len / BUFFER_SIZE);
253
+ last_buffer_offset = rf->len % BUFFER_SIZE;
244
254
  for (i = 0; i <= last_buffer_number; i++) {
245
255
  len = (i == last_buffer_number ? last_buffer_offset : BUFFER_SIZE);
246
256
  os_write_bytes(other_o, rf->buffers[i], len);
@@ -250,8 +260,9 @@ void ramo_write_to(OutStream *os, OutStream *other_o)
250
260
  OutStream *ram_create_buffer()
251
261
  {
252
262
  RamFile *rf = rf_create("");
253
- rf->alive = false;
254
263
  OutStream *os = os_create();
264
+
265
+ rf->alive = false;
255
266
  os->file = rf;
256
267
  os->pointer = 0;
257
268
  os->flush_internal = &ramo_flush_internal;
@@ -269,12 +280,13 @@ void ram_destroy_buffer(OutStream *os)
269
280
  OutStream *ram_create_output(Store *store, const char *filename)
270
281
  {
271
282
  RamFile *rf = (RamFile *)h_get(store->dir.ht, filename);
283
+ OutStream *os = os_create();
284
+
272
285
  if (rf == NULL) {
273
286
  rf = rf_create(filename);
274
287
  h_set(store->dir.ht, rf->name, rf);
275
288
  }
276
- rf->refcnt++;
277
- OutStream *os = os_create();
289
+ rf->ref_cnt++;
278
290
  os->pointer = 0;
279
291
  os->file = rf;
280
292
  os->flush_internal = &ramo_flush_internal;
@@ -325,23 +337,24 @@ void rami_seek_internal(InStream *is, int pos)
325
337
  void rami_close_internal(InStream *is)
326
338
  {
327
339
  RamFile *rf = (RamFile *)is->file;
328
- rf->refcnt--;
340
+ rf->ref_cnt--;
329
341
  rf_close(rf);
330
342
  }
331
343
 
332
344
  void rami_clone_internal(InStream *is, InStream *new_index_i)
333
345
  {
334
- ((RamFile *)is->file)->refcnt++;
346
+ ((RamFile *)is->file)->ref_cnt++;
335
347
  }
336
348
 
337
349
  InStream *ram_open_input(Store *store, const char *filename)
338
350
  {
339
351
  RamFile *rf = (RamFile *)h_get(store->dir.ht, filename);
352
+ InStream *is = is_create();
353
+
340
354
  if (rf == NULL) {
341
355
  RAISE(IO_ERROR, MISSING_RAMFILE_ERROR_MSG);
342
356
  }
343
- rf->refcnt++;
344
- InStream *is = is_create();
357
+ rf->ref_cnt++;
345
358
  is->file = rf;
346
359
  is->d.pointer = 0;
347
360
  is->is_clone = false;
@@ -404,7 +417,6 @@ Store *open_ram_store()
404
417
  new_store->remove = &ram_remove;
405
418
  new_store->rename = &ram_rename;
406
419
  new_store->count = &ram_count;
407
- new_store->close = &ram_close;
408
420
  new_store->clear = &ram_clear;
409
421
  new_store->clear_all = &ram_clear_all;
410
422
  new_store->clear_locks = &ram_clear_locks;
@@ -414,6 +426,7 @@ Store *open_ram_store()
414
426
  new_store->open_input = &ram_open_input;
415
427
  new_store->open_lock = &ram_open_lock;
416
428
  new_store->close_lock = &ram_close_lock;
429
+ new_store->close_i = &ram_close_i;
417
430
  return new_store;
418
431
  }
419
432
 
@@ -427,11 +440,14 @@ static void copy_files(char *fname, void *arg)
427
440
  OutStream *os = cfa->to_store->create_output(cfa->to_store, fname);
428
441
  InStream *is = cfa->from_store->open_input(cfa->from_store, fname);
429
442
  int len = is_length(is);
430
- uchar buffer[len+1];
443
+ uchar *buffer = ALLOC_N(uchar, len+1);
444
+
431
445
  is_read_bytes(is, buffer, 0, len);
432
446
  os_write_bytes(os, buffer, len);
447
+
433
448
  is_close(is);
434
449
  os_close(os);
450
+ free(buffer);
435
451
  }
436
452
 
437
453
  Store *open_ram_store_and_copy(Store *from_store, bool close_dir)
@@ -444,7 +460,7 @@ Store *open_ram_store_and_copy(Store *from_store, bool close_dir)
444
460
  from_store->each(from_store, &copy_files, &cfa);
445
461
 
446
462
  if (close_dir)
447
- from_store->close(from_store);
463
+ store_deref(from_store);
448
464
 
449
465
  return store;
450
466
  }
data/ext/search.c CHANGED
@@ -135,30 +135,34 @@ Hit *hit_pq_pop(PriorityQueue *pq)
135
135
 
136
136
  inline void hit_pq_up(PriorityQueue *pq)
137
137
  {
138
- int i,j;
139
- i = pq->count;
140
- j = i >> 1;
141
138
  Hit **heap = (Hit **)pq->heap;
142
- Hit *node = heap[i];
139
+ Hit *node;
140
+ int i = pq->count;
141
+ int j = i >> 1;
142
+ node = heap[i];
143
143
 
144
- while ((j > 0) && hit_lt(node, heap[j])) {
145
- heap[i] = heap[j];
146
- i = j;
147
- j = j >> 1;
148
- }
144
+ while ((j > 0) && hit_lt(node, heap[j])) {
145
+ heap[i] = heap[j];
146
+ i = j;
147
+ j = j >> 1;
148
+ }
149
149
  heap[i] = node;
150
150
  }
151
151
 
152
-
153
- void hit_pq_push(PriorityQueue *pq, void *elem)
152
+ void hit_pq_insert(PriorityQueue *pq, Hit *hit)
154
153
  {
155
- pq->count++;
156
- pq->heap[pq->count] = elem;
157
- hit_pq_up(pq);
154
+ if (pq->count < pq->size) {
155
+ Hit *new_hit = ALLOC(Hit);
156
+ memcpy(new_hit, hit, sizeof(Hit));
157
+ pq->count++;
158
+ pq->heap[pq->count] = new_hit;
159
+ hit_pq_up(pq);
160
+ } else if (pq->count > 0 && hit_lt((Hit *)pq->heap[1], hit)) {
161
+ memcpy(pq->heap[1], hit, sizeof(Hit));
162
+ hit_pq_down(pq);
163
+ }
158
164
  }
159
165
 
160
-
161
-
162
166
  /***************************************************************************
163
167
  *
164
168
  * TopDocs
@@ -174,9 +178,8 @@ TopDocs *td_create(int total_hits, int size, Hit **hits)
174
178
  return td;
175
179
  }
176
180
 
177
- void td_destroy(void *p)
181
+ void td_destroy(TopDocs *td)
178
182
  {
179
- TopDocs *td = (TopDocs *)p;
180
183
  int i;
181
184
  for (i = 0; i < td->size; i++) {
182
185
  free(td->hits[i]);
@@ -226,54 +229,78 @@ void w_normalize(Weight *self, float normalization_factor)
226
229
  self->value = self->qweight * self->idf; // idf for document
227
230
  }
228
231
 
232
+ void w_destroy(Weight *self)
233
+ {
234
+ q_deref(self->query);
235
+ free(self);
236
+ }
237
+
238
+ Weight *w_create(Query *query)
239
+ {
240
+ Weight *self = ALLOC_AND_ZERO_N(Weight, 1);
241
+ ref(query);
242
+ self->query = query;
243
+
244
+ self->get_query = &w_get_query;
245
+ self->get_value = &w_get_value;
246
+ self->normalize = &w_normalize;
247
+ self->destroy = &w_destroy;
248
+ return self;
249
+ }
250
+
229
251
  /***************************************************************************
230
252
  *
231
253
  * Query
232
254
  *
233
255
  ***************************************************************************/
234
256
 
235
- Similarity *q_get_similarity(Query *self, Searcher *searcher)
257
+ Similarity *q_get_similarity_i(Query *self, Searcher *searcher)
236
258
  {
237
259
  return searcher->get_similarity(searcher);
238
260
  }
239
261
 
240
262
  Query *q_rewrite(Query *self, IndexReader *ir)
241
263
  {
264
+ self->ref_cnt++;
242
265
  return self;
243
266
  }
244
267
 
245
268
  Weight *q_weight(Query *self, Searcher *searcher)
246
269
  {
247
- if (self->weight) {
248
- self->weight->destroy(self->weight);
249
- }
250
270
  Query *query = searcher->rewrite(searcher, self);
251
- Weight *weight = query->create_weight(query, searcher);
271
+ Weight *weight = query->create_weight_i(query, searcher);
252
272
  float sum = weight->sum_of_squared_weights(weight);
253
273
  Similarity *sim = query->get_similarity(query, searcher);
254
274
  float norm = sim_query_norm(sim, sum);
275
+ q_deref(query);
255
276
 
256
277
  weight->normalize(weight, norm);
257
278
  return self->weight = weight;
258
279
  }
259
280
 
260
- void q_destroy(Query *self)
281
+ Weight *q_create_weight_unsup(Query *self, Searcher *searcher)
282
+ {
283
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
284
+ return NULL;
285
+ }
286
+
287
+ void q_destroy_i(Query *self)
261
288
  {
262
- if (self->rewritten) {
263
- self->rewritten->destroy(self->rewritten);
264
- self->rewritten = NULL;
265
- }
266
- if (self->weight) {
267
- self->weight->destroy(self->weight);
268
- }
269
289
  free(self);
270
290
  }
271
291
 
272
- void q_extract_terms(Query *self, Array *terms)
292
+ void q_extract_terms(Query *self, HashSet *terms)
273
293
  {
274
294
  /* do nothing by default */
275
295
  }
276
296
 
297
+ void q_deref(Query *self)
298
+ {
299
+ if (--self->ref_cnt == 0) {
300
+ self->destroy_i(self);
301
+ }
302
+ }
303
+
277
304
  Query *q_create()
278
305
  {
279
306
  Query *self = ALLOC(Query);
@@ -281,30 +308,92 @@ Query *q_create()
281
308
  self->destroy_all = true;
282
309
  self->boost = 1.0;
283
310
  self->rewrite = &q_rewrite;
284
- self->get_similarity = &q_get_similarity;
311
+ self->get_similarity = &q_get_similarity_i;
285
312
  self->extract_terms = &q_extract_terms;
286
313
  self->weight = NULL;
287
- self->rewritten = NULL;
314
+ self->ref_cnt = 1;
288
315
  return self;
289
316
  }
290
317
 
318
+ uint q_hash(Query *self)
319
+ {
320
+ return (self->hash(self) << 4) | self->type;
321
+ }
322
+
323
+ int q_eq(Query *self, Query *o)
324
+ {
325
+ return (self == o) || ((self->type == o->type) &&
326
+ (self->boost == o->boost) &&
327
+ self->eq(self, o));
328
+ }
329
+
330
+ Query *q_combine(Query **queries, int q_cnt)
331
+ {
332
+ int i;
333
+ Query *q, *ret_q;
334
+ HashSet *uniques =
335
+ hs_create((hash_ft)&q_hash, (eq_ft)&q_eq, NULL);
336
+
337
+ for (i = 0; i < q_cnt; i++) {
338
+ q = queries[i];
339
+ if (q->type == BOOLEAN_QUERY) {
340
+ int j;
341
+ bool splittable = true;
342
+ BooleanQuery *bq = (BooleanQuery *)q->data;
343
+ if (bq->coord_disabled == false) {
344
+ splittable = false;
345
+ } else {
346
+ for (j = 0; j < bq->clause_cnt; j++) {
347
+ if (bq->clauses[j]->occur != BC_SHOULD) {
348
+ splittable = false;
349
+ break;
350
+ }
351
+ }
352
+ }
353
+ if (splittable) {
354
+ for (j = 0; j < bq->clause_cnt; j++) {
355
+ q = bq->clauses[j]->query;
356
+ hs_add(uniques, q);
357
+ }
358
+ } else {
359
+ hs_add(uniques, q);
360
+ }
361
+ } else {
362
+ hs_add(uniques, q);
363
+ }
364
+ }
365
+ if (uniques->size == 1) {
366
+ ret_q = (Query *)uniques->elems[0];
367
+ ref(ret_q);
368
+ } else {
369
+ ret_q = bq_create(true);
370
+ for (i = 0; i < uniques->size; i++) {
371
+ q = (Query *)uniques->elems[i];
372
+ ref(q);
373
+ bq_add_query(ret_q, q, BC_SHOULD);
374
+ }
375
+ }
376
+ hs_destroy(uniques);
377
+
378
+ return ret_q;
379
+ }
380
+
291
381
  /***************************************************************************
292
382
  *
293
383
  * Scorer
294
384
  *
295
385
  ***************************************************************************/
296
386
 
297
- void scorer_destroy(void *p)
387
+ void scorer_destroy_i(Scorer *self)
298
388
  {
299
- Scorer *scorer = (Scorer *)p;
300
- free(scorer->data);
301
- free(scorer);
389
+ free(self->data);
390
+ free(self);
302
391
  }
303
392
 
304
393
  Scorer *scorer_create(Similarity *similarity)
305
394
  {
306
395
  Scorer *self = ALLOC(Scorer);
307
- self->destroy = &scorer_destroy;
396
+ self->destroy = &scorer_destroy_i;
308
397
  self->data = NULL;
309
398
  self->similarity = similarity;
310
399
  return self;
@@ -326,43 +415,58 @@ int scorer_doc_cmp(const void *p1, const void *p2)
326
415
  {
327
416
  return (*(Scorer **)p1)->doc - (*(Scorer **)p2)->doc;
328
417
  }
418
+
329
419
  /***************************************************************************
330
420
  *
331
421
  * Searcher
332
422
  *
333
423
  ***************************************************************************/
334
424
 
335
- int sea_doc_freq(Searcher *self, Term *term)
425
+ static int s_doc_freq(Searcher *self, Term *term)
336
426
  {
337
427
  return self->ir->doc_freq(self->ir, term);
338
428
  }
339
429
 
340
- int *sea_doc_freqs(Searcher *self, Term **terms, int tcnt)
430
+ static int *s_doc_freqs(Searcher *self, Term **terms, int tcnt)
341
431
  {
342
- int *freqs = ALLOC_N(int, tcnt);
343
432
  int i;
433
+ int *freqs = ALLOC_N(int, tcnt);
434
+
344
435
  for (i = 0; i < tcnt; i++) {
345
436
  freqs[i] = self->ir->doc_freq(self->ir, terms[i]);
346
437
  }
347
438
  return freqs;
348
439
  }
349
440
 
350
- Document *sea_get_doc(Searcher *self, int doc_num)
441
+ static int *ss_doc_freqs(Searcher *self, Term **terms, int tcnt)
442
+ {
443
+ int i;
444
+ int *freqs = ALLOC_N(int, tcnt);
445
+
446
+ for (i = 0; i < tcnt; i++) {
447
+ freqs[i] = self->doc_freq(self, terms[i]);
448
+ }
449
+
450
+ return freqs;
451
+ }
452
+
453
+
454
+ static Document *s_get_doc(Searcher *self, int doc_num)
351
455
  {
352
456
  return self->ir->get_doc(self->ir, doc_num);
353
457
  }
354
458
 
355
- int sea_max_doc(Searcher *self)
459
+ static int s_max_doc(Searcher *self)
356
460
  {
357
461
  return self->ir->max_doc(self->ir);
358
462
  }
359
463
 
360
- Weight *sea_create_weight(Searcher *self, Query *query)
464
+ static Weight *s_create_weight(Searcher *self, Query *query)
361
465
  {
362
466
  return q_weight(query, self);
363
467
  }
364
468
 
365
- TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
469
+ static TopDocs *s_search(Searcher *self, Query *query, int first_doc,
366
470
  int num_docs, Filter *filter, Sort *sort)
367
471
  {
368
472
  int max_size = first_doc + num_docs;
@@ -370,14 +474,13 @@ TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
370
474
  Weight *weight;
371
475
  Scorer *scorer;
372
476
  Hit **score_docs = NULL;
373
- Hit *hit;
477
+ Hit hit;
374
478
  int total_hits = 0;
375
- float min_score = 0.0, score;
479
+ float score;
376
480
  BitVector *bits = (filter ? filter->get_bv(filter, self->ir) : NULL);
377
481
  Hit *(*hq_pop)(PriorityQueue *pq);
378
- void (*hq_down)(PriorityQueue *pq);
379
- void (*hq_push)(PriorityQueue *pq, void *elem);
380
- void (*hq_destroy)(void *p);
482
+ void (*hq_insert)(PriorityQueue *pq, Hit *hit);
483
+ void (*hq_destroy)(PriorityQueue *self);
381
484
  PriorityQueue *hq;
382
485
 
383
486
 
@@ -391,20 +494,19 @@ TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
391
494
  scorer = weight->scorer(weight, self->ir);
392
495
  if (!scorer) {
393
496
  if (bits) bv_destroy(bits);
497
+ weight->destroy(weight);
394
498
  return td_create(0, 0, NULL);
395
499
  }
396
500
 
397
501
  if (sort) {
398
502
  hq = fshq_pq_create(max_size, sort, self->ir);
399
503
  hq_pop = &fshq_pq_pop;
400
- hq_down = &fshq_pq_down;
401
- hq_push = &fshq_pq_push;
504
+ hq_insert = &fshq_pq_insert;
402
505
  hq_destroy = &fshq_pq_destroy;
403
506
  } else {
404
507
  hq = pq_create(max_size, &hit_less_than);
405
508
  hq_pop = &hit_pq_pop;
406
- hq_down = &hit_pq_down;
407
- hq_push = &hit_pq_push;
509
+ hq_insert = &hit_pq_insert;
408
510
  hq_destroy = &pq_destroy;
409
511
  }
410
512
 
@@ -412,19 +514,11 @@ TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
412
514
  if (bits && !bv_get(bits, scorer->doc)) continue;
413
515
  total_hits++;
414
516
  score = scorer->score(scorer);
415
- if (hq->count < max_size) {
416
- hit = ALLOC(Hit);
417
- hit->doc = scorer->doc; hit->score = score;
418
- hq_push(hq, hit);
419
- min_score = ((Hit *)pq_top(hq))->score; // maintain min_score
420
- } else if (score > min_score) {
421
- hit = pq_top(hq);
422
- hit->doc = scorer->doc; hit->score = score;
423
- hq_down(hq);
424
- min_score = ((Hit *)pq_top(hq))->score; // maintain min_score
425
- }
517
+ hit.doc = scorer->doc; hit.score = score;
518
+ hq_insert(hq, &hit);
426
519
  }
427
520
  scorer->destroy(scorer);
521
+ weight->destroy(weight);
428
522
 
429
523
  if (hq->count > first_doc) {
430
524
  if ((hq->count - first_doc) < num_docs) {
@@ -446,14 +540,12 @@ TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
446
540
  return td_create(total_hits, num_docs, score_docs);
447
541
  }
448
542
 
449
- void sea_search_each(Searcher *self, Query *query, Filter *filter,
450
- void (*fn)(Searcher *self, int doc_num, void *arg), void *arg)
543
+ static void s_search_each_w(Searcher *self, Weight *weight, Filter *filter,
544
+ void (*fn)(Searcher *, int, float, void *), void *arg)
451
545
  {
452
- Weight *weight;
453
546
  Scorer *scorer;
454
547
  BitVector *bits = (filter ? filter->get_bv(filter, self->ir) : NULL);
455
548
 
456
- weight = q_weight(query, self);
457
549
  scorer = weight->scorer(weight, self->ir);
458
550
  if (!scorer) {
459
551
  if (bits) bv_destroy(bits);
@@ -462,34 +554,53 @@ void sea_search_each(Searcher *self, Query *query, Filter *filter,
462
554
 
463
555
  while (scorer->next(scorer)) {
464
556
  if (bits && !bv_get(bits, scorer->doc)) continue;
465
- fn(self, scorer->doc, arg);
557
+ fn(self, scorer->doc, scorer->score(scorer), arg);
466
558
  }
467
559
  scorer->destroy(scorer);
468
560
  }
469
561
 
470
- Query *sea_rewrite(Searcher *self, Query *original)
562
+ static void s_search_each(Searcher *self, Query *query, Filter *filter,
563
+ void (*fn)(Searcher *, int, float, void *), void *arg)
471
564
  {
565
+ Weight *weight;
566
+ weight = q_weight(query, self);
567
+ s_search_each_w(self, weight, filter, fn, arg);
568
+ weight->destroy(weight);
569
+ }
570
+
571
+ static Query *s_rewrite(Searcher *self, Query *original)
572
+ {
573
+ int q_is_destroyed = false;
472
574
  Query *query = original;
473
575
  Query *rewritten_query = query->rewrite(query, self->ir);
474
- while (query != rewritten_query) {
576
+ while (q_is_destroyed || (query != rewritten_query)) {
475
577
  query = rewritten_query;
476
578
  rewritten_query = query->rewrite(query, self->ir);
579
+ q_is_destroyed = (query->ref_cnt <= 1);
580
+ q_deref(query); /* destroy intermediate queries */
477
581
  }
478
582
  return query;
479
583
  }
480
584
 
481
- Explanation *sea_explain(Searcher *self, Query *query, int doc_num)
585
+ static Explanation *s_explain(Searcher *self, Query *query, int doc_num)
482
586
  {
483
587
  Weight *weight = q_weight(query, self);
484
- return weight->explain(weight, self->ir, doc_num);
588
+ Explanation *e = weight->explain(weight, self->ir, doc_num);
589
+ weight->destroy(weight);
590
+ return e;
591
+ }
592
+
593
+ static Explanation *s_explain_w(Searcher *self, Weight *w, int doc_num)
594
+ {
595
+ return w->explain(w, self->ir, doc_num);
485
596
  }
486
597
 
487
- Similarity *sea_get_similarity(Searcher *self)
598
+ static Similarity *s_get_similarity(Searcher *self)
488
599
  {
489
600
  return self->similarity;
490
601
  }
491
602
 
492
- void sea_close(Searcher *self)
603
+ static void s_close(Searcher *self)
493
604
  {
494
605
  if (self->ir && self->close_ir)
495
606
  ir_close(self->ir);
@@ -502,17 +613,436 @@ Searcher *sea_create(IndexReader *ir)
502
613
  self->ir = ir;
503
614
  self->close_ir = true;
504
615
  self->similarity = sim_create_default();
505
- self->doc_freq = &sea_doc_freq;
506
- self->doc_freqs = &sea_doc_freqs;
507
- self->get_doc = &sea_get_doc;
508
- self->max_doc = &sea_max_doc;
509
- self->create_weight = &sea_create_weight;
510
- self->search = &sea_search;
511
- self->rewrite = &sea_rewrite;
512
- self->explain = &sea_explain;
513
- self->get_similarity = &sea_get_similarity;
514
- self->close = &sea_close;
616
+ self->doc_freq = &s_doc_freq;
617
+ self->doc_freqs = &s_doc_freqs;
618
+ self->get_doc = &s_get_doc;
619
+ self->max_doc = &s_max_doc;
620
+ self->create_weight = &s_create_weight;
621
+ self->search = &s_search;
622
+ self->search_each = &s_search_each;
623
+ self->search_each_w = &s_search_each_w;
624
+ self->rewrite = &s_rewrite;
625
+ self->explain = &s_explain;
626
+ self->explain_w = &s_explain_w;
627
+ self->get_similarity = &s_get_similarity;
628
+ self->close = &s_close;
629
+ return self;
630
+ }
631
+
632
+ /***************************************************************************
633
+ *
634
+ * CachedDFSearcher
635
+ *
636
+ ***************************************************************************/
637
+
638
+ typedef struct CachedDFSearcher {
639
+ HshTable *df_map;
640
+ int max_doc;
641
+ } CachedDFSearcher;
642
+
643
+ static int cdfsea_doc_freq(Searcher *self, Term *term)
644
+ {
645
+ CachedDFSearcher *cdfsea = (CachedDFSearcher *)self->data;
646
+ return (int)h_get(cdfsea->df_map, term);
647
+ }
648
+
649
+ static Document *cdfsea_get_doc(Searcher *self, int doc_num)
650
+ {
651
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
652
+ return NULL;
653
+ }
654
+
655
+ static int cdfsea_max_doc(Searcher *self)
656
+ {
657
+ return ((CachedDFSearcher *)self->data)->max_doc;
658
+ }
659
+
660
+ static Weight *cdfsea_create_weight(Searcher *self, Query *query)
661
+ {
662
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
663
+ return NULL;
664
+ }
665
+
666
+ static TopDocs *cdfsea_search(Searcher *self, Query *query, int first_doc,
667
+ int num_docs, Filter *filter, Sort *sort)
668
+ {
669
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
670
+ return NULL;
671
+ }
672
+
673
+ static void cdfsea_search_each(Searcher *self, Query *query, Filter *filter,
674
+ void (*fn)(Searcher *, int, float, void *), void *arg)
675
+ {
676
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
677
+ }
678
+
679
+ static void cdfsea_search_each_w(Searcher *self, Weight *w, Filter *filter,
680
+ void (*fn)(Searcher *, int, float, void *), void *arg)
681
+ {
682
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
683
+ }
684
+
685
+ static Query *cdfsea_rewrite(Searcher *self, Query *original)
686
+ {
687
+ original->ref_cnt++;
688
+ return original;
689
+ }
690
+
691
+ static Explanation *cdfsea_explain(Searcher *self, Query *query, int doc_num)
692
+ {
693
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
694
+ return NULL;
695
+ }
696
+
697
+ static Explanation *cdfsea_explain_w(Searcher *self, Weight *w, int doc_num)
698
+ {
699
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
700
+ return NULL;
701
+ }
702
+
703
+ static Similarity *cdfsea_get_similarity(Searcher *self)
704
+ {
705
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
706
+ return NULL;
707
+ }
708
+
709
+ static void cdfsea_close(Searcher *self)
710
+ {
711
+ CachedDFSearcher *cdfsea = (CachedDFSearcher *)self->data;
712
+ h_destroy(cdfsea->df_map);
713
+ free(cdfsea);
714
+ free(self);
715
+ }
716
+
717
+ Searcher *cdfsea_create(HshTable *df_map, int max_doc)
718
+ {
719
+ Searcher *self = ALLOC(Searcher);
720
+
721
+ CachedDFSearcher *cdfsea = ALLOC(CachedDFSearcher);
722
+
723
+ cdfsea->df_map = df_map;
724
+ cdfsea->max_doc = max_doc;
725
+ self->data = cdfsea;
726
+
727
+ self->doc_freq = &cdfsea_doc_freq;
728
+ self->doc_freqs = &ss_doc_freqs;
729
+ self->get_doc = &cdfsea_get_doc;
730
+ self->max_doc = &cdfsea_max_doc;
731
+ self->create_weight = &cdfsea_create_weight;
732
+ self->search = &cdfsea_search;
733
+ self->search_each = &cdfsea_search_each;
734
+ self->search_each_w = &cdfsea_search_each_w;
735
+ self->rewrite = &cdfsea_rewrite;
736
+ self->explain = &cdfsea_explain;
737
+ self->explain_w = &cdfsea_explain_w;
738
+ self->get_similarity = &cdfsea_get_similarity;
739
+ self->close = &cdfsea_close;
515
740
  return self;
516
741
  }
517
742
 
743
+ /***************************************************************************
744
+ *
745
+ * MultiSearcher
746
+ *
747
+ ***************************************************************************/
748
+
749
+ static inline int msea_get_searcher_index(Searcher *self, int n)
750
+ {
751
+ MultiSearcher *msea = (MultiSearcher *)self->data;
752
+ int lo = 0; /* search starts array */
753
+ int hi = msea->s_cnt - 1; /* for 1st element < n, return its index */
754
+ int mid, mid_val;
755
+
756
+ while (hi >= lo) {
757
+ mid = (lo + hi) >> 1;
758
+ mid_val = msea->starts[mid];
759
+ if (n < mid_val) {
760
+ hi = mid - 1;
761
+ } else if (n > mid_val) {
762
+ lo = mid + 1;
763
+ } else { /* found a match */
764
+ while (((mid+1) < msea->s_cnt) && (msea->starts[mid+1] == mid_val)) {
765
+ mid++; /* scan to last match */
766
+ }
767
+ return mid;
768
+ }
769
+ }
770
+ return hi;
771
+ }
772
+
773
+ static int msea_doc_freq(Searcher *self, Term *term)
774
+ {
775
+ int i;
776
+ int doc_freq = 0;
777
+ Searcher *s;
778
+ MultiSearcher *msea = (MultiSearcher *)self->data;
779
+ for (i = 0; i < msea->s_cnt; i++) {
780
+ s = msea->searchers[i];
781
+ doc_freq += s->doc_freq(s, term);
782
+ }
783
+
784
+ return doc_freq;
785
+ }
786
+
787
+ static Document *msea_get_doc(Searcher *self, int doc_num)
788
+ {
789
+ MultiSearcher *msea = (MultiSearcher *)self->data;
790
+ int i = msea_get_searcher_index(self, doc_num);
791
+ Searcher *s = msea->searchers[i];
792
+ return s->get_doc(s, doc_num - msea->starts[i]);
793
+ }
794
+
795
+ static int msea_max_doc(Searcher *self)
796
+ {
797
+ return ((MultiSearcher *)self->data)->max_doc;
798
+ }
799
+
800
+ static Weight *msea_create_weight(Searcher *self, Query *query)
801
+ {
802
+ int i, *dfs;
803
+ Searcher *cdfsea;
804
+ Weight *w;
805
+ HshTable *df_map = h_new((hash_ft)&term_hash, (eq_ft)&term_eq,
806
+ (free_ft)NULL, (free_ft)NULL);
807
+ Query *rq = self->rewrite(self, query);
808
+ HashSet *terms = term_set_create();
809
+ rq->extract_terms(rq, terms);
810
+ dfs = self->doc_freqs(self, (Term **)terms->elems, terms->size);
811
+
812
+ for (i = 0; i < terms->size; i++) {
813
+ h_set(df_map, terms->elems[i], (void *)dfs[i]);
814
+ }
815
+ /* don't destroy the individual terms, only the HashSet */
816
+ hs_destroy(terms);
817
+ free(dfs);
818
+
819
+ cdfsea = cdfsea_create(df_map, ((MultiSearcher *)self->data)->max_doc);
820
+
821
+ w = q_weight(rq, cdfsea);
822
+ q_deref(rq);
823
+ cdfsea->close(cdfsea);
824
+
825
+ return w;
826
+ }
827
+
828
+ struct MultiSearchEachArg {
829
+ int start;
830
+ void *arg;
831
+ void (*fn)(Searcher *, int, float, void *);
832
+ };
833
+
834
+ void msea_search_each_i(Searcher *self, int doc_num, float score, void *arg)
835
+ {
836
+ struct MultiSearchEachArg *mse_arg = (struct MultiSearchEachArg *)arg;
837
+
838
+ mse_arg->fn(self, doc_num + mse_arg->start, score, mse_arg->arg);
839
+ }
840
+
841
+ static void msea_search_each_w(Searcher *self, Weight *w, Filter *filter,
842
+ void (*fn)(Searcher *, int, float, void *), void *arg)
843
+ {
844
+ int i;
845
+ struct MultiSearchEachArg mse_arg;
846
+ MultiSearcher *msea = (MultiSearcher *)self->data;
847
+ Searcher *s;
848
+
849
+ mse_arg.fn = fn;
850
+ mse_arg.arg = arg;
851
+ for (i = 0; i < msea->s_cnt; i++) {
852
+ s = msea->searchers[i];
853
+ mse_arg.start = msea->starts[i];
854
+ s->search_each_w(s, w, filter, &msea_search_each_i, &mse_arg);
855
+ }
856
+ }
518
857
 
858
+ static void msea_search_each(Searcher *self, Query *query, Filter *filter,
859
+ void (*fn)(Searcher *, int, float, void *), void *arg)
860
+ {
861
+ Weight *w = q_weight(query, self);
862
+ msea_search_each_w(self, w, filter, fn, arg);
863
+ w->destroy(w);
864
+ }
865
+
866
+ struct MultiSearchArg {
867
+ int total_hits, max_size;
868
+ float min_score;
869
+ PriorityQueue *hq;
870
+ void (*hq_insert)(PriorityQueue *pq, Hit *hit);
871
+ };
872
+
873
+ void msea_search_i(Searcher *self, int doc_num, float score, void *arg)
874
+ {
875
+ struct MultiSearchArg *ms_arg = (struct MultiSearchArg *)arg;
876
+ Hit hit;
877
+
878
+ ms_arg->total_hits++;
879
+ hit.doc = doc_num;
880
+ hit.score = score;
881
+ ms_arg->hq_insert(ms_arg->hq, &hit);
882
+ }
883
+
884
+ static TopDocs *msea_search(Searcher *self, Query *query, int first_doc,
885
+ int num_docs, Filter *filter, Sort *sort)
886
+ {
887
+ int max_size = first_doc + num_docs;
888
+ int i;
889
+ Weight *weight;
890
+ Hit **score_docs = NULL;
891
+ BitVector *bits = (filter ? filter->get_bv(filter, self->ir) : NULL);
892
+ Hit *(*hq_pop)(PriorityQueue *pq);
893
+ void (*hq_insert)(PriorityQueue *pq, Hit *hit);
894
+ void (*hq_destroy)(PriorityQueue *self);
895
+ PriorityQueue *hq;
896
+ struct MultiSearchArg ms_arg;
897
+
898
+
899
+ if (num_docs <= 0)
900
+ RAISE(ARG_ERROR, NUM_DOCS_ARG_ERROR_MSG);
901
+
902
+ if (first_doc < 0)
903
+ RAISE(ARG_ERROR, FIRST_DOC_ARG_ERROR_MSG);
904
+
905
+ weight = q_weight(query, self);
906
+ if (sort) {
907
+ hq = fshq_pq_create(max_size, sort, self->ir);
908
+ hq_pop = &fshq_pq_pop;
909
+ hq_insert = &fshq_pq_insert;
910
+ hq_destroy = &fshq_pq_destroy;
911
+ } else {
912
+ hq = pq_create(max_size, &hit_less_than);
913
+ hq_pop = &hit_pq_pop;
914
+ hq_insert = &hit_pq_insert;
915
+ hq_destroy = &pq_destroy;
916
+ }
917
+
918
+
919
+ ms_arg.hq = hq;
920
+ ms_arg.total_hits = 0;
921
+ ms_arg.max_size = max_size;
922
+ ms_arg.min_score = 0.0;
923
+ ms_arg.hq_insert = hq_insert;
924
+
925
+ msea_search_each_w(self, weight, filter, msea_search_i, &ms_arg);
926
+
927
+ weight->destroy(weight);
928
+
929
+ if (hq->count > first_doc) {
930
+ if ((hq->count - first_doc) < num_docs) {
931
+ num_docs = hq->count - first_doc;
932
+ }
933
+ score_docs = ALLOC_N(Hit *, num_docs);
934
+ for (i = num_docs - 1; i >= 0; i--) {
935
+ score_docs[i] = hq_pop(hq);
936
+ //hit = score_docs[i] = pq_pop(hq);
937
+ //printf("hit = %d-->%f\n", hit->doc, hit->score);
938
+ }
939
+ } else {
940
+ num_docs = 0;
941
+ }
942
+ pq_clear(hq);
943
+ hq_destroy(hq);
944
+
945
+ if (bits) bv_destroy(bits);
946
+ return td_create(ms_arg.total_hits, num_docs, score_docs);
947
+ }
948
+
949
+ static Query *msea_rewrite(Searcher *self, Query *original)
950
+ {
951
+ int i;
952
+ Searcher *s;
953
+ MultiSearcher *msea = (MultiSearcher *)self->data;
954
+ Query **queries = ALLOC_N(Query *, msea->s_cnt), *rewritten;
955
+
956
+ for (i = 0; i < msea->s_cnt; i++) {
957
+ s = msea->searchers[i];
958
+ queries[i] = s->rewrite(s, original);
959
+ }
960
+ rewritten = q_combine(queries, msea->s_cnt);
961
+
962
+ for (i = 0; i < msea->s_cnt; i++) {
963
+ q_deref(queries[i]);
964
+ }
965
+ free(queries);
966
+ return rewritten;
967
+ }
968
+
969
+ static Explanation *msea_explain(Searcher *self, Query *query, int doc_num)
970
+ {
971
+ MultiSearcher *msea = (MultiSearcher *)self->data;
972
+ int i = msea_get_searcher_index(self, doc_num);
973
+ Weight *w = q_weight(query, self);
974
+ Searcher *s = msea->searchers[i];
975
+ Explanation *e = s->explain_w(s, w, doc_num - msea->starts[i]);
976
+ w->destroy(w);
977
+ return e;
978
+ }
979
+
980
+ static Explanation *msea_explain_w(Searcher *self, Weight *w, int doc_num)
981
+ {
982
+ MultiSearcher *msea = (MultiSearcher *)self->data;
983
+ int i = msea_get_searcher_index(self, doc_num);
984
+ Searcher *s = msea->searchers[i];
985
+ Explanation *e = s->explain_w(s, w, doc_num - msea->starts[i]);
986
+ return e;
987
+ }
988
+
989
+ static Similarity *msea_get_similarity(Searcher *self)
990
+ {
991
+ return self->similarity;
992
+ }
993
+
994
+ static void msea_close(Searcher *self)
995
+ {
996
+ int i;
997
+ Searcher *s;
998
+ MultiSearcher *msea = (MultiSearcher *)self->data;
999
+ if (msea->close_subs) {
1000
+ for (i = 0; i < msea->s_cnt; i++) {
1001
+ s = msea->searchers[i];
1002
+ s->close(s);
1003
+ }
1004
+ free(msea->searchers);
1005
+ }
1006
+ free(msea->starts);
1007
+ free(msea);
1008
+ free(self);
1009
+ }
1010
+
1011
+ Searcher *msea_create(Searcher **searchers, int s_cnt, bool close_subs)
1012
+ {
1013
+ int i, max_doc = 0, *starts;
1014
+ Searcher *self = ALLOC(Searcher);
1015
+
1016
+ MultiSearcher *msea = ALLOC(MultiSearcher);
1017
+
1018
+ starts = ALLOC_N(int, s_cnt + 1);
1019
+ for (i = 0; i < s_cnt; i++) {
1020
+ starts[i] = max_doc;
1021
+ max_doc += searchers[i]->max_doc(searchers[i]);
1022
+ }
1023
+ starts[i] = max_doc;
1024
+
1025
+ msea->s_cnt = s_cnt;
1026
+ msea->searchers = searchers;
1027
+ msea->starts = starts;
1028
+ msea->max_doc = max_doc;
1029
+ msea->close_subs = close_subs;
1030
+ self->data = msea;
1031
+
1032
+ self->ir = (IndexReader *)NULL;
1033
+ self->similarity = sim_create_default();
1034
+ self->doc_freq = &msea_doc_freq;
1035
+ self->doc_freqs = &ss_doc_freqs;
1036
+ self->get_doc = &msea_get_doc;
1037
+ self->max_doc = &msea_max_doc;
1038
+ self->create_weight = &msea_create_weight;
1039
+ self->search = &msea_search;
1040
+ self->search_each = &msea_search_each;
1041
+ self->search_each_w = &msea_search_each_w;
1042
+ self->rewrite = &msea_rewrite;
1043
+ self->explain = &msea_explain;
1044
+ self->explain_w = &msea_explain_w;
1045
+ self->get_similarity = &msea_get_similarity;
1046
+ self->close = &msea_close;
1047
+ return self;
1048
+ }