ferret 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/r_store.c CHANGED
@@ -6,6 +6,10 @@ VALUE cDirectory;
6
6
  VALUE cRAMDirectory;
7
7
  VALUE cFSDirectory;
8
8
 
9
+
10
+ static ID id_mkdir_p;
11
+ static ID id_is_directory;
12
+
9
13
  /****************************************************************************
10
14
  *
11
15
  * Lock Methods
@@ -87,11 +91,10 @@ frt_lock_release(VALUE self)
87
91
  ****************************************************************************/
88
92
 
89
93
  void
90
- frt_dir_free(void *p)
94
+ frt_dir_free(Store *store)
91
95
  {
92
- Store *store = (Store *)p;
93
- object_del(p);
94
- store->close(store);
96
+ object_del(store);
97
+ store_deref(store);
95
98
  }
96
99
 
97
100
  #define GET_STORE Store *store; Data_Get_Struct(self, Store, store)
@@ -99,10 +102,11 @@ static VALUE
99
102
  frt_dir_close(VALUE self)
100
103
  {
101
104
  /*
105
+ * No need to do anything here. Leave it do the garbage collector
102
106
  GET_STORE;
103
107
  Frt_Unwrap_Struct(self);
104
108
  object_del(store);
105
- store->close(store);
109
+ store_deref(store);
106
110
  */
107
111
  return Qnil;
108
112
  }
@@ -212,11 +216,23 @@ frt_fsdir_new(VALUE klass, VALUE rpath, VALUE rcreate)
212
216
  Store *store;
213
217
  bool create = RTEST(rcreate);
214
218
  rpath = rb_obj_as_string(rpath);
219
+ if (create) {
220
+ VALUE mFileUtils;
221
+ rb_require("fileutils");
222
+ mFileUtils = rb_define_module("FileUtils");
223
+ rb_funcall(mFileUtils, id_mkdir_p, 1, rpath);
224
+ }
225
+ if (!rb_funcall(rb_cFile, id_is_directory, 1, rpath)) {
226
+ rb_raise(rb_eIOError, "There is no directory: %s. Use create = true to "
227
+ "create one.", RSTRING(rpath)->ptr);
228
+ }
215
229
  store = open_fs_store(RSTRING(rpath)->ptr);
216
230
  if (create) store->clear_all(store);
217
231
  if ((self = object_get(store)) == Qnil) {
218
232
  self = Data_Wrap_Struct(klass, NULL, &frt_dir_free, store);
219
233
  object_add(store, self);
234
+ } else {
235
+ store_deref(store);
220
236
  }
221
237
  return self;
222
238
  }
@@ -227,19 +243,12 @@ frt_fsdir_new(VALUE klass, VALUE rpath, VALUE rcreate)
227
243
  *
228
244
  ****************************************************************************/
229
245
 
230
- #define DIR_METHODS(dir)\
231
- rb_define_method(dir, "close", frt_dir_close, 0);\
232
- rb_define_method(dir, "exists?", frt_dir_exists, 1);\
233
- rb_define_method(dir, "touch", frt_dir_touch, 1);\
234
- rb_define_method(dir, "delete", frt_dir_delete, 1);\
235
- rb_define_method(dir, "file_count", frt_dir_file_count, 0);\
236
- rb_define_method(dir, "refresh", frt_dir_refresh, 0);\
237
- rb_define_method(dir, "rename", frt_dir_rename, 2);\
238
- rb_define_method(dir, "make_lock", frt_dir_make_lock, 1);
239
-
240
246
  void
241
247
  Init_dir(void)
242
248
  {
249
+ id_mkdir_p = rb_intern("mkdir_p");
250
+ id_is_directory = rb_intern("directory?");
251
+
243
252
  cLock = rb_define_class_under(mStore, "Lock", rb_cObject);
244
253
  rb_define_method(cLock, "obtain", frt_lock_obtain, -1);
245
254
  rb_define_method(cLock, "while_locked", frt_lock_while_locked, -1);
@@ -248,16 +257,22 @@ Init_dir(void)
248
257
 
249
258
  cDirectory = rb_define_class_under(mStore, "Directory", rb_cObject);
250
259
  rb_define_const(cDirectory, "LOCK_PREFIX", rb_str_new2(LOCK_PREFIX));
260
+ rb_define_method(cDirectory, "close", frt_dir_close, 0);\
261
+ rb_define_method(cDirectory, "exists?", frt_dir_exists, 1);\
262
+ rb_define_method(cDirectory, "touch", frt_dir_touch, 1);\
263
+ rb_define_method(cDirectory, "delete", frt_dir_delete, 1);\
264
+ rb_define_method(cDirectory, "file_count", frt_dir_file_count, 0);\
265
+ rb_define_method(cDirectory, "refresh", frt_dir_refresh, 0);\
266
+ rb_define_method(cDirectory, "rename", frt_dir_rename, 2);\
267
+ rb_define_method(cDirectory, "make_lock", frt_dir_make_lock, 1);
251
268
 
252
269
  /* RAMDirectory */
253
270
  cRAMDirectory = rb_define_class_under(mStore, "RAMDirectory", cDirectory);
254
271
  rb_define_alloc_func(cRAMDirectory, frt_data_alloc);
255
272
  rb_define_method(cRAMDirectory, "initialize", frt_ramdir_init, -1);
256
- DIR_METHODS(cRAMDirectory);
257
273
 
258
274
  /* FSDirectory */
259
275
  cFSDirectory = rb_define_class_under(mStore, "FSDirectory", cDirectory);
260
276
  rb_define_alloc_func(cFSDirectory, frt_data_alloc);
261
277
  rb_define_singleton_method(cFSDirectory, "new", frt_fsdir_new, 2);
262
- DIR_METHODS(cFSDirectory);
263
278
  }
data/ext/ram_store.c CHANGED
@@ -4,12 +4,14 @@
4
4
  static char * const RENAME_ERROR_MSG = "tried to rename a file that doesn't exist";
5
5
  static char * const MISSING_RAMFILE_ERROR_MSG ="Couldn't open the ram file to read";
6
6
 
7
+ extern void store_destroy(Store *store);
8
+
7
9
  typedef struct RamFile {
8
10
  char *name;
9
11
  uchar **buffers;
10
12
  int bufcnt;
11
13
  int len;
12
- int refcnt;
14
+ int ref_cnt;
13
15
  bool alive;
14
16
  } RamFile;
15
17
 
@@ -21,7 +23,7 @@ RamFile *rf_create(const char *name)
21
23
  rf->name = estrdup(name);
22
24
  rf->len = 0;
23
25
  rf->bufcnt = 1;
24
- rf->refcnt = 0;
26
+ rf->ref_cnt = 0;
25
27
  rf->alive = true;
26
28
  return rf;
27
29
  }
@@ -38,7 +40,7 @@ void rf_close(void *p)
38
40
  {
39
41
  int i;
40
42
  RamFile *rf = (RamFile *)p;
41
- if (rf->refcnt > 0 || rf->alive) return;
43
+ if (rf->ref_cnt > 0 || rf->alive) return;
42
44
  free(rf->name);
43
45
  for (i = 0; i < rf->bufcnt; i++) {
44
46
  free(rf->buffers[i]);
@@ -76,17 +78,21 @@ int ram_remove(Store *store, char *filename)
76
78
  int ram_rename(Store *store, char *from, char *to)
77
79
  {
78
80
  RamFile *rf = (RamFile *)h_rem(store->dir.ht, from, false);
79
- if (rf == NULL)
81
+ RamFile *tmp;
82
+
83
+ if (rf == NULL) {
80
84
  RAISE(IO_ERROR, RENAME_ERROR_MSG);
85
+ }
81
86
 
82
87
  free(rf->name);
83
88
 
84
89
  rf->name = estrdup(to);
85
90
 
86
- // clean up the file we are overwriting
87
- RamFile *tmp = (RamFile *)h_get(store->dir.ht, to);
88
- if (tmp != NULL)
91
+ /* clean up the file we are overwriting */
92
+ tmp = (RamFile *)h_get(store->dir.ht, to);
93
+ if (tmp != NULL) {
89
94
  tmp->alive = false;
95
+ }
90
96
 
91
97
  h_set(store->dir.ht, rf->name, rf);
92
98
  return true;
@@ -112,7 +118,7 @@ void ram_each(Store *store, void (*func)(char *fname, void *arg), void *arg)
112
118
  }
113
119
  }
114
120
 
115
- void ram_close(Store *store)
121
+ void ram_close_i(Store *store)
116
122
  {
117
123
  HshTable *ht = store->dir.ht;
118
124
  RamFile *rf;
@@ -185,6 +191,7 @@ int ramo_length(OutStream *os)
185
191
 
186
192
  void ramo_flush_internal(OutStream *os, uchar *src, int len)
187
193
  {
194
+ uchar *buffer;
188
195
  RamFile *rf = (RamFile *)os->file;
189
196
  int buffer_number, buffer_offset, bytes_in_buffer, bytes_to_copy;
190
197
  int src_offset;
@@ -197,7 +204,7 @@ void ramo_flush_internal(OutStream *os, uchar *src, int len)
197
204
 
198
205
  rf_extend_if_necessary(rf, buffer_number);
199
206
 
200
- uchar *buffer = rf->buffers[buffer_number];
207
+ buffer = rf->buffers[buffer_number];
201
208
  memcpy(buffer + buffer_offset, src, bytes_to_copy);
202
209
 
203
210
  if (bytes_to_copy < len) {
@@ -230,7 +237,7 @@ void ramo_reset(OutStream *os)
230
237
  void ramo_close_internal(OutStream *os)
231
238
  {
232
239
  RamFile *rf = (RamFile *)os->file;
233
- rf->refcnt--;
240
+ rf->ref_cnt--;
234
241
  rf_close(rf);
235
242
  }
236
243
 
@@ -238,9 +245,12 @@ void ramo_write_to(OutStream *os, OutStream *other_o)
238
245
  {
239
246
  int i, len;
240
247
  RamFile *rf = (RamFile *)os->file;
248
+ int last_buffer_number;
249
+ int last_buffer_offset;
250
+
241
251
  os_flush(os);
242
- int last_buffer_number = (int)(rf->len / BUFFER_SIZE);
243
- int last_buffer_offset = rf->len % BUFFER_SIZE;
252
+ last_buffer_number = (int)(rf->len / BUFFER_SIZE);
253
+ last_buffer_offset = rf->len % BUFFER_SIZE;
244
254
  for (i = 0; i <= last_buffer_number; i++) {
245
255
  len = (i == last_buffer_number ? last_buffer_offset : BUFFER_SIZE);
246
256
  os_write_bytes(other_o, rf->buffers[i], len);
@@ -250,8 +260,9 @@ void ramo_write_to(OutStream *os, OutStream *other_o)
250
260
  OutStream *ram_create_buffer()
251
261
  {
252
262
  RamFile *rf = rf_create("");
253
- rf->alive = false;
254
263
  OutStream *os = os_create();
264
+
265
+ rf->alive = false;
255
266
  os->file = rf;
256
267
  os->pointer = 0;
257
268
  os->flush_internal = &ramo_flush_internal;
@@ -269,12 +280,13 @@ void ram_destroy_buffer(OutStream *os)
269
280
  OutStream *ram_create_output(Store *store, const char *filename)
270
281
  {
271
282
  RamFile *rf = (RamFile *)h_get(store->dir.ht, filename);
283
+ OutStream *os = os_create();
284
+
272
285
  if (rf == NULL) {
273
286
  rf = rf_create(filename);
274
287
  h_set(store->dir.ht, rf->name, rf);
275
288
  }
276
- rf->refcnt++;
277
- OutStream *os = os_create();
289
+ rf->ref_cnt++;
278
290
  os->pointer = 0;
279
291
  os->file = rf;
280
292
  os->flush_internal = &ramo_flush_internal;
@@ -325,23 +337,24 @@ void rami_seek_internal(InStream *is, int pos)
325
337
  void rami_close_internal(InStream *is)
326
338
  {
327
339
  RamFile *rf = (RamFile *)is->file;
328
- rf->refcnt--;
340
+ rf->ref_cnt--;
329
341
  rf_close(rf);
330
342
  }
331
343
 
332
344
  void rami_clone_internal(InStream *is, InStream *new_index_i)
333
345
  {
334
- ((RamFile *)is->file)->refcnt++;
346
+ ((RamFile *)is->file)->ref_cnt++;
335
347
  }
336
348
 
337
349
  InStream *ram_open_input(Store *store, const char *filename)
338
350
  {
339
351
  RamFile *rf = (RamFile *)h_get(store->dir.ht, filename);
352
+ InStream *is = is_create();
353
+
340
354
  if (rf == NULL) {
341
355
  RAISE(IO_ERROR, MISSING_RAMFILE_ERROR_MSG);
342
356
  }
343
- rf->refcnt++;
344
- InStream *is = is_create();
357
+ rf->ref_cnt++;
345
358
  is->file = rf;
346
359
  is->d.pointer = 0;
347
360
  is->is_clone = false;
@@ -404,7 +417,6 @@ Store *open_ram_store()
404
417
  new_store->remove = &ram_remove;
405
418
  new_store->rename = &ram_rename;
406
419
  new_store->count = &ram_count;
407
- new_store->close = &ram_close;
408
420
  new_store->clear = &ram_clear;
409
421
  new_store->clear_all = &ram_clear_all;
410
422
  new_store->clear_locks = &ram_clear_locks;
@@ -414,6 +426,7 @@ Store *open_ram_store()
414
426
  new_store->open_input = &ram_open_input;
415
427
  new_store->open_lock = &ram_open_lock;
416
428
  new_store->close_lock = &ram_close_lock;
429
+ new_store->close_i = &ram_close_i;
417
430
  return new_store;
418
431
  }
419
432
 
@@ -427,11 +440,14 @@ static void copy_files(char *fname, void *arg)
427
440
  OutStream *os = cfa->to_store->create_output(cfa->to_store, fname);
428
441
  InStream *is = cfa->from_store->open_input(cfa->from_store, fname);
429
442
  int len = is_length(is);
430
- uchar buffer[len+1];
443
+ uchar *buffer = ALLOC_N(uchar, len+1);
444
+
431
445
  is_read_bytes(is, buffer, 0, len);
432
446
  os_write_bytes(os, buffer, len);
447
+
433
448
  is_close(is);
434
449
  os_close(os);
450
+ free(buffer);
435
451
  }
436
452
 
437
453
  Store *open_ram_store_and_copy(Store *from_store, bool close_dir)
@@ -444,7 +460,7 @@ Store *open_ram_store_and_copy(Store *from_store, bool close_dir)
444
460
  from_store->each(from_store, &copy_files, &cfa);
445
461
 
446
462
  if (close_dir)
447
- from_store->close(from_store);
463
+ store_deref(from_store);
448
464
 
449
465
  return store;
450
466
  }
data/ext/search.c CHANGED
@@ -135,30 +135,34 @@ Hit *hit_pq_pop(PriorityQueue *pq)
135
135
 
136
136
  inline void hit_pq_up(PriorityQueue *pq)
137
137
  {
138
- int i,j;
139
- i = pq->count;
140
- j = i >> 1;
141
138
  Hit **heap = (Hit **)pq->heap;
142
- Hit *node = heap[i];
139
+ Hit *node;
140
+ int i = pq->count;
141
+ int j = i >> 1;
142
+ node = heap[i];
143
143
 
144
- while ((j > 0) && hit_lt(node, heap[j])) {
145
- heap[i] = heap[j];
146
- i = j;
147
- j = j >> 1;
148
- }
144
+ while ((j > 0) && hit_lt(node, heap[j])) {
145
+ heap[i] = heap[j];
146
+ i = j;
147
+ j = j >> 1;
148
+ }
149
149
  heap[i] = node;
150
150
  }
151
151
 
152
-
153
- void hit_pq_push(PriorityQueue *pq, void *elem)
152
+ void hit_pq_insert(PriorityQueue *pq, Hit *hit)
154
153
  {
155
- pq->count++;
156
- pq->heap[pq->count] = elem;
157
- hit_pq_up(pq);
154
+ if (pq->count < pq->size) {
155
+ Hit *new_hit = ALLOC(Hit);
156
+ memcpy(new_hit, hit, sizeof(Hit));
157
+ pq->count++;
158
+ pq->heap[pq->count] = new_hit;
159
+ hit_pq_up(pq);
160
+ } else if (pq->count > 0 && hit_lt((Hit *)pq->heap[1], hit)) {
161
+ memcpy(pq->heap[1], hit, sizeof(Hit));
162
+ hit_pq_down(pq);
163
+ }
158
164
  }
159
165
 
160
-
161
-
162
166
  /***************************************************************************
163
167
  *
164
168
  * TopDocs
@@ -174,9 +178,8 @@ TopDocs *td_create(int total_hits, int size, Hit **hits)
174
178
  return td;
175
179
  }
176
180
 
177
- void td_destroy(void *p)
181
+ void td_destroy(TopDocs *td)
178
182
  {
179
- TopDocs *td = (TopDocs *)p;
180
183
  int i;
181
184
  for (i = 0; i < td->size; i++) {
182
185
  free(td->hits[i]);
@@ -226,54 +229,78 @@ void w_normalize(Weight *self, float normalization_factor)
226
229
  self->value = self->qweight * self->idf; // idf for document
227
230
  }
228
231
 
232
+ void w_destroy(Weight *self)
233
+ {
234
+ q_deref(self->query);
235
+ free(self);
236
+ }
237
+
238
+ Weight *w_create(Query *query)
239
+ {
240
+ Weight *self = ALLOC_AND_ZERO_N(Weight, 1);
241
+ ref(query);
242
+ self->query = query;
243
+
244
+ self->get_query = &w_get_query;
245
+ self->get_value = &w_get_value;
246
+ self->normalize = &w_normalize;
247
+ self->destroy = &w_destroy;
248
+ return self;
249
+ }
250
+
229
251
  /***************************************************************************
230
252
  *
231
253
  * Query
232
254
  *
233
255
  ***************************************************************************/
234
256
 
235
- Similarity *q_get_similarity(Query *self, Searcher *searcher)
257
+ Similarity *q_get_similarity_i(Query *self, Searcher *searcher)
236
258
  {
237
259
  return searcher->get_similarity(searcher);
238
260
  }
239
261
 
240
262
  Query *q_rewrite(Query *self, IndexReader *ir)
241
263
  {
264
+ self->ref_cnt++;
242
265
  return self;
243
266
  }
244
267
 
245
268
  Weight *q_weight(Query *self, Searcher *searcher)
246
269
  {
247
- if (self->weight) {
248
- self->weight->destroy(self->weight);
249
- }
250
270
  Query *query = searcher->rewrite(searcher, self);
251
- Weight *weight = query->create_weight(query, searcher);
271
+ Weight *weight = query->create_weight_i(query, searcher);
252
272
  float sum = weight->sum_of_squared_weights(weight);
253
273
  Similarity *sim = query->get_similarity(query, searcher);
254
274
  float norm = sim_query_norm(sim, sum);
275
+ q_deref(query);
255
276
 
256
277
  weight->normalize(weight, norm);
257
278
  return self->weight = weight;
258
279
  }
259
280
 
260
- void q_destroy(Query *self)
281
+ Weight *q_create_weight_unsup(Query *self, Searcher *searcher)
282
+ {
283
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
284
+ return NULL;
285
+ }
286
+
287
+ void q_destroy_i(Query *self)
261
288
  {
262
- if (self->rewritten) {
263
- self->rewritten->destroy(self->rewritten);
264
- self->rewritten = NULL;
265
- }
266
- if (self->weight) {
267
- self->weight->destroy(self->weight);
268
- }
269
289
  free(self);
270
290
  }
271
291
 
272
- void q_extract_terms(Query *self, Array *terms)
292
+ void q_extract_terms(Query *self, HashSet *terms)
273
293
  {
274
294
  /* do nothing by default */
275
295
  }
276
296
 
297
+ void q_deref(Query *self)
298
+ {
299
+ if (--self->ref_cnt == 0) {
300
+ self->destroy_i(self);
301
+ }
302
+ }
303
+
277
304
  Query *q_create()
278
305
  {
279
306
  Query *self = ALLOC(Query);
@@ -281,30 +308,92 @@ Query *q_create()
281
308
  self->destroy_all = true;
282
309
  self->boost = 1.0;
283
310
  self->rewrite = &q_rewrite;
284
- self->get_similarity = &q_get_similarity;
311
+ self->get_similarity = &q_get_similarity_i;
285
312
  self->extract_terms = &q_extract_terms;
286
313
  self->weight = NULL;
287
- self->rewritten = NULL;
314
+ self->ref_cnt = 1;
288
315
  return self;
289
316
  }
290
317
 
318
+ uint q_hash(Query *self)
319
+ {
320
+ return (self->hash(self) << 4) | self->type;
321
+ }
322
+
323
+ int q_eq(Query *self, Query *o)
324
+ {
325
+ return (self == o) || ((self->type == o->type) &&
326
+ (self->boost == o->boost) &&
327
+ self->eq(self, o));
328
+ }
329
+
330
+ Query *q_combine(Query **queries, int q_cnt)
331
+ {
332
+ int i;
333
+ Query *q, *ret_q;
334
+ HashSet *uniques =
335
+ hs_create((hash_ft)&q_hash, (eq_ft)&q_eq, NULL);
336
+
337
+ for (i = 0; i < q_cnt; i++) {
338
+ q = queries[i];
339
+ if (q->type == BOOLEAN_QUERY) {
340
+ int j;
341
+ bool splittable = true;
342
+ BooleanQuery *bq = (BooleanQuery *)q->data;
343
+ if (bq->coord_disabled == false) {
344
+ splittable = false;
345
+ } else {
346
+ for (j = 0; j < bq->clause_cnt; j++) {
347
+ if (bq->clauses[j]->occur != BC_SHOULD) {
348
+ splittable = false;
349
+ break;
350
+ }
351
+ }
352
+ }
353
+ if (splittable) {
354
+ for (j = 0; j < bq->clause_cnt; j++) {
355
+ q = bq->clauses[j]->query;
356
+ hs_add(uniques, q);
357
+ }
358
+ } else {
359
+ hs_add(uniques, q);
360
+ }
361
+ } else {
362
+ hs_add(uniques, q);
363
+ }
364
+ }
365
+ if (uniques->size == 1) {
366
+ ret_q = (Query *)uniques->elems[0];
367
+ ref(ret_q);
368
+ } else {
369
+ ret_q = bq_create(true);
370
+ for (i = 0; i < uniques->size; i++) {
371
+ q = (Query *)uniques->elems[i];
372
+ ref(q);
373
+ bq_add_query(ret_q, q, BC_SHOULD);
374
+ }
375
+ }
376
+ hs_destroy(uniques);
377
+
378
+ return ret_q;
379
+ }
380
+
291
381
  /***************************************************************************
292
382
  *
293
383
  * Scorer
294
384
  *
295
385
  ***************************************************************************/
296
386
 
297
- void scorer_destroy(void *p)
387
+ void scorer_destroy_i(Scorer *self)
298
388
  {
299
- Scorer *scorer = (Scorer *)p;
300
- free(scorer->data);
301
- free(scorer);
389
+ free(self->data);
390
+ free(self);
302
391
  }
303
392
 
304
393
  Scorer *scorer_create(Similarity *similarity)
305
394
  {
306
395
  Scorer *self = ALLOC(Scorer);
307
- self->destroy = &scorer_destroy;
396
+ self->destroy = &scorer_destroy_i;
308
397
  self->data = NULL;
309
398
  self->similarity = similarity;
310
399
  return self;
@@ -326,43 +415,58 @@ int scorer_doc_cmp(const void *p1, const void *p2)
326
415
  {
327
416
  return (*(Scorer **)p1)->doc - (*(Scorer **)p2)->doc;
328
417
  }
418
+
329
419
  /***************************************************************************
330
420
  *
331
421
  * Searcher
332
422
  *
333
423
  ***************************************************************************/
334
424
 
335
- int sea_doc_freq(Searcher *self, Term *term)
425
+ static int s_doc_freq(Searcher *self, Term *term)
336
426
  {
337
427
  return self->ir->doc_freq(self->ir, term);
338
428
  }
339
429
 
340
- int *sea_doc_freqs(Searcher *self, Term **terms, int tcnt)
430
+ static int *s_doc_freqs(Searcher *self, Term **terms, int tcnt)
341
431
  {
342
- int *freqs = ALLOC_N(int, tcnt);
343
432
  int i;
433
+ int *freqs = ALLOC_N(int, tcnt);
434
+
344
435
  for (i = 0; i < tcnt; i++) {
345
436
  freqs[i] = self->ir->doc_freq(self->ir, terms[i]);
346
437
  }
347
438
  return freqs;
348
439
  }
349
440
 
350
- Document *sea_get_doc(Searcher *self, int doc_num)
441
+ static int *ss_doc_freqs(Searcher *self, Term **terms, int tcnt)
442
+ {
443
+ int i;
444
+ int *freqs = ALLOC_N(int, tcnt);
445
+
446
+ for (i = 0; i < tcnt; i++) {
447
+ freqs[i] = self->doc_freq(self, terms[i]);
448
+ }
449
+
450
+ return freqs;
451
+ }
452
+
453
+
454
+ static Document *s_get_doc(Searcher *self, int doc_num)
351
455
  {
352
456
  return self->ir->get_doc(self->ir, doc_num);
353
457
  }
354
458
 
355
- int sea_max_doc(Searcher *self)
459
+ static int s_max_doc(Searcher *self)
356
460
  {
357
461
  return self->ir->max_doc(self->ir);
358
462
  }
359
463
 
360
- Weight *sea_create_weight(Searcher *self, Query *query)
464
+ static Weight *s_create_weight(Searcher *self, Query *query)
361
465
  {
362
466
  return q_weight(query, self);
363
467
  }
364
468
 
365
- TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
469
+ static TopDocs *s_search(Searcher *self, Query *query, int first_doc,
366
470
  int num_docs, Filter *filter, Sort *sort)
367
471
  {
368
472
  int max_size = first_doc + num_docs;
@@ -370,14 +474,13 @@ TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
370
474
  Weight *weight;
371
475
  Scorer *scorer;
372
476
  Hit **score_docs = NULL;
373
- Hit *hit;
477
+ Hit hit;
374
478
  int total_hits = 0;
375
- float min_score = 0.0, score;
479
+ float score;
376
480
  BitVector *bits = (filter ? filter->get_bv(filter, self->ir) : NULL);
377
481
  Hit *(*hq_pop)(PriorityQueue *pq);
378
- void (*hq_down)(PriorityQueue *pq);
379
- void (*hq_push)(PriorityQueue *pq, void *elem);
380
- void (*hq_destroy)(void *p);
482
+ void (*hq_insert)(PriorityQueue *pq, Hit *hit);
483
+ void (*hq_destroy)(PriorityQueue *self);
381
484
  PriorityQueue *hq;
382
485
 
383
486
 
@@ -391,20 +494,19 @@ TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
391
494
  scorer = weight->scorer(weight, self->ir);
392
495
  if (!scorer) {
393
496
  if (bits) bv_destroy(bits);
497
+ weight->destroy(weight);
394
498
  return td_create(0, 0, NULL);
395
499
  }
396
500
 
397
501
  if (sort) {
398
502
  hq = fshq_pq_create(max_size, sort, self->ir);
399
503
  hq_pop = &fshq_pq_pop;
400
- hq_down = &fshq_pq_down;
401
- hq_push = &fshq_pq_push;
504
+ hq_insert = &fshq_pq_insert;
402
505
  hq_destroy = &fshq_pq_destroy;
403
506
  } else {
404
507
  hq = pq_create(max_size, &hit_less_than);
405
508
  hq_pop = &hit_pq_pop;
406
- hq_down = &hit_pq_down;
407
- hq_push = &hit_pq_push;
509
+ hq_insert = &hit_pq_insert;
408
510
  hq_destroy = &pq_destroy;
409
511
  }
410
512
 
@@ -412,19 +514,11 @@ TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
412
514
  if (bits && !bv_get(bits, scorer->doc)) continue;
413
515
  total_hits++;
414
516
  score = scorer->score(scorer);
415
- if (hq->count < max_size) {
416
- hit = ALLOC(Hit);
417
- hit->doc = scorer->doc; hit->score = score;
418
- hq_push(hq, hit);
419
- min_score = ((Hit *)pq_top(hq))->score; // maintain min_score
420
- } else if (score > min_score) {
421
- hit = pq_top(hq);
422
- hit->doc = scorer->doc; hit->score = score;
423
- hq_down(hq);
424
- min_score = ((Hit *)pq_top(hq))->score; // maintain min_score
425
- }
517
+ hit.doc = scorer->doc; hit.score = score;
518
+ hq_insert(hq, &hit);
426
519
  }
427
520
  scorer->destroy(scorer);
521
+ weight->destroy(weight);
428
522
 
429
523
  if (hq->count > first_doc) {
430
524
  if ((hq->count - first_doc) < num_docs) {
@@ -446,14 +540,12 @@ TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
446
540
  return td_create(total_hits, num_docs, score_docs);
447
541
  }
448
542
 
449
- void sea_search_each(Searcher *self, Query *query, Filter *filter,
450
- void (*fn)(Searcher *self, int doc_num, void *arg), void *arg)
543
+ static void s_search_each_w(Searcher *self, Weight *weight, Filter *filter,
544
+ void (*fn)(Searcher *, int, float, void *), void *arg)
451
545
  {
452
- Weight *weight;
453
546
  Scorer *scorer;
454
547
  BitVector *bits = (filter ? filter->get_bv(filter, self->ir) : NULL);
455
548
 
456
- weight = q_weight(query, self);
457
549
  scorer = weight->scorer(weight, self->ir);
458
550
  if (!scorer) {
459
551
  if (bits) bv_destroy(bits);
@@ -462,34 +554,53 @@ void sea_search_each(Searcher *self, Query *query, Filter *filter,
462
554
 
463
555
  while (scorer->next(scorer)) {
464
556
  if (bits && !bv_get(bits, scorer->doc)) continue;
465
- fn(self, scorer->doc, arg);
557
+ fn(self, scorer->doc, scorer->score(scorer), arg);
466
558
  }
467
559
  scorer->destroy(scorer);
468
560
  }
469
561
 
470
- Query *sea_rewrite(Searcher *self, Query *original)
562
+ static void s_search_each(Searcher *self, Query *query, Filter *filter,
563
+ void (*fn)(Searcher *, int, float, void *), void *arg)
471
564
  {
565
+ Weight *weight;
566
+ weight = q_weight(query, self);
567
+ s_search_each_w(self, weight, filter, fn, arg);
568
+ weight->destroy(weight);
569
+ }
570
+
571
+ static Query *s_rewrite(Searcher *self, Query *original)
572
+ {
573
+ int q_is_destroyed = false;
472
574
  Query *query = original;
473
575
  Query *rewritten_query = query->rewrite(query, self->ir);
474
- while (query != rewritten_query) {
576
+ while (q_is_destroyed || (query != rewritten_query)) {
475
577
  query = rewritten_query;
476
578
  rewritten_query = query->rewrite(query, self->ir);
579
+ q_is_destroyed = (query->ref_cnt <= 1);
580
+ q_deref(query); /* destroy intermediate queries */
477
581
  }
478
582
  return query;
479
583
  }
480
584
 
481
- Explanation *sea_explain(Searcher *self, Query *query, int doc_num)
585
+ static Explanation *s_explain(Searcher *self, Query *query, int doc_num)
482
586
  {
483
587
  Weight *weight = q_weight(query, self);
484
- return weight->explain(weight, self->ir, doc_num);
588
+ Explanation *e = weight->explain(weight, self->ir, doc_num);
589
+ weight->destroy(weight);
590
+ return e;
591
+ }
592
+
593
+ static Explanation *s_explain_w(Searcher *self, Weight *w, int doc_num)
594
+ {
595
+ return w->explain(w, self->ir, doc_num);
485
596
  }
486
597
 
487
- Similarity *sea_get_similarity(Searcher *self)
598
+ static Similarity *s_get_similarity(Searcher *self)
488
599
  {
489
600
  return self->similarity;
490
601
  }
491
602
 
492
- void sea_close(Searcher *self)
603
+ static void s_close(Searcher *self)
493
604
  {
494
605
  if (self->ir && self->close_ir)
495
606
  ir_close(self->ir);
@@ -502,17 +613,436 @@ Searcher *sea_create(IndexReader *ir)
502
613
  self->ir = ir;
503
614
  self->close_ir = true;
504
615
  self->similarity = sim_create_default();
505
- self->doc_freq = &sea_doc_freq;
506
- self->doc_freqs = &sea_doc_freqs;
507
- self->get_doc = &sea_get_doc;
508
- self->max_doc = &sea_max_doc;
509
- self->create_weight = &sea_create_weight;
510
- self->search = &sea_search;
511
- self->rewrite = &sea_rewrite;
512
- self->explain = &sea_explain;
513
- self->get_similarity = &sea_get_similarity;
514
- self->close = &sea_close;
616
+ self->doc_freq = &s_doc_freq;
617
+ self->doc_freqs = &s_doc_freqs;
618
+ self->get_doc = &s_get_doc;
619
+ self->max_doc = &s_max_doc;
620
+ self->create_weight = &s_create_weight;
621
+ self->search = &s_search;
622
+ self->search_each = &s_search_each;
623
+ self->search_each_w = &s_search_each_w;
624
+ self->rewrite = &s_rewrite;
625
+ self->explain = &s_explain;
626
+ self->explain_w = &s_explain_w;
627
+ self->get_similarity = &s_get_similarity;
628
+ self->close = &s_close;
629
+ return self;
630
+ }
631
+
632
+ /***************************************************************************
633
+ *
634
+ * CachedDFSearcher
635
+ *
636
+ ***************************************************************************/
637
+
638
+ typedef struct CachedDFSearcher {
639
+ HshTable *df_map;
640
+ int max_doc;
641
+ } CachedDFSearcher;
642
+
643
+ static int cdfsea_doc_freq(Searcher *self, Term *term)
644
+ {
645
+ CachedDFSearcher *cdfsea = (CachedDFSearcher *)self->data;
646
+ return (int)h_get(cdfsea->df_map, term);
647
+ }
648
+
649
+ static Document *cdfsea_get_doc(Searcher *self, int doc_num)
650
+ {
651
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
652
+ return NULL;
653
+ }
654
+
655
+ static int cdfsea_max_doc(Searcher *self)
656
+ {
657
+ return ((CachedDFSearcher *)self->data)->max_doc;
658
+ }
659
+
660
+ static Weight *cdfsea_create_weight(Searcher *self, Query *query)
661
+ {
662
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
663
+ return NULL;
664
+ }
665
+
666
+ static TopDocs *cdfsea_search(Searcher *self, Query *query, int first_doc,
667
+ int num_docs, Filter *filter, Sort *sort)
668
+ {
669
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
670
+ return NULL;
671
+ }
672
+
673
+ static void cdfsea_search_each(Searcher *self, Query *query, Filter *filter,
674
+ void (*fn)(Searcher *, int, float, void *), void *arg)
675
+ {
676
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
677
+ }
678
+
679
+ static void cdfsea_search_each_w(Searcher *self, Weight *w, Filter *filter,
680
+ void (*fn)(Searcher *, int, float, void *), void *arg)
681
+ {
682
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
683
+ }
684
+
685
+ static Query *cdfsea_rewrite(Searcher *self, Query *original)
686
+ {
687
+ original->ref_cnt++;
688
+ return original;
689
+ }
690
+
691
+ static Explanation *cdfsea_explain(Searcher *self, Query *query, int doc_num)
692
+ {
693
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
694
+ return NULL;
695
+ }
696
+
697
+ static Explanation *cdfsea_explain_w(Searcher *self, Weight *w, int doc_num)
698
+ {
699
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
700
+ return NULL;
701
+ }
702
+
703
+ static Similarity *cdfsea_get_similarity(Searcher *self)
704
+ {
705
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
706
+ return NULL;
707
+ }
708
+
709
+ static void cdfsea_close(Searcher *self)
710
+ {
711
+ CachedDFSearcher *cdfsea = (CachedDFSearcher *)self->data;
712
+ h_destroy(cdfsea->df_map);
713
+ free(cdfsea);
714
+ free(self);
715
+ }
716
+
717
+ Searcher *cdfsea_create(HshTable *df_map, int max_doc)
718
+ {
719
+ Searcher *self = ALLOC(Searcher);
720
+
721
+ CachedDFSearcher *cdfsea = ALLOC(CachedDFSearcher);
722
+
723
+ cdfsea->df_map = df_map;
724
+ cdfsea->max_doc = max_doc;
725
+ self->data = cdfsea;
726
+
727
+ self->doc_freq = &cdfsea_doc_freq;
728
+ self->doc_freqs = &ss_doc_freqs;
729
+ self->get_doc = &cdfsea_get_doc;
730
+ self->max_doc = &cdfsea_max_doc;
731
+ self->create_weight = &cdfsea_create_weight;
732
+ self->search = &cdfsea_search;
733
+ self->search_each = &cdfsea_search_each;
734
+ self->search_each_w = &cdfsea_search_each_w;
735
+ self->rewrite = &cdfsea_rewrite;
736
+ self->explain = &cdfsea_explain;
737
+ self->explain_w = &cdfsea_explain_w;
738
+ self->get_similarity = &cdfsea_get_similarity;
739
+ self->close = &cdfsea_close;
515
740
  return self;
516
741
  }
517
742
 
743
+ /***************************************************************************
744
+ *
745
+ * MultiSearcher
746
+ *
747
+ ***************************************************************************/
748
+
749
+ static inline int msea_get_searcher_index(Searcher *self, int n)
750
+ {
751
+ MultiSearcher *msea = (MultiSearcher *)self->data;
752
+ int lo = 0; /* search starts array */
753
+ int hi = msea->s_cnt - 1; /* for 1st element < n, return its index */
754
+ int mid, mid_val;
755
+
756
+ while (hi >= lo) {
757
+ mid = (lo + hi) >> 1;
758
+ mid_val = msea->starts[mid];
759
+ if (n < mid_val) {
760
+ hi = mid - 1;
761
+ } else if (n > mid_val) {
762
+ lo = mid + 1;
763
+ } else { /* found a match */
764
+ while (((mid+1) < msea->s_cnt) && (msea->starts[mid+1] == mid_val)) {
765
+ mid++; /* scan to last match */
766
+ }
767
+ return mid;
768
+ }
769
+ }
770
+ return hi;
771
+ }
772
+
773
+ static int msea_doc_freq(Searcher *self, Term *term)
774
+ {
775
+ int i;
776
+ int doc_freq = 0;
777
+ Searcher *s;
778
+ MultiSearcher *msea = (MultiSearcher *)self->data;
779
+ for (i = 0; i < msea->s_cnt; i++) {
780
+ s = msea->searchers[i];
781
+ doc_freq += s->doc_freq(s, term);
782
+ }
783
+
784
+ return doc_freq;
785
+ }
786
+
787
+ static Document *msea_get_doc(Searcher *self, int doc_num)
788
+ {
789
+ MultiSearcher *msea = (MultiSearcher *)self->data;
790
+ int i = msea_get_searcher_index(self, doc_num);
791
+ Searcher *s = msea->searchers[i];
792
+ return s->get_doc(s, doc_num - msea->starts[i]);
793
+ }
794
+
795
+ static int msea_max_doc(Searcher *self)
796
+ {
797
+ return ((MultiSearcher *)self->data)->max_doc;
798
+ }
799
+
800
+ static Weight *msea_create_weight(Searcher *self, Query *query)
801
+ {
802
+ int i, *dfs;
803
+ Searcher *cdfsea;
804
+ Weight *w;
805
+ HshTable *df_map = h_new((hash_ft)&term_hash, (eq_ft)&term_eq,
806
+ (free_ft)NULL, (free_ft)NULL);
807
+ Query *rq = self->rewrite(self, query);
808
+ HashSet *terms = term_set_create();
809
+ rq->extract_terms(rq, terms);
810
+ dfs = self->doc_freqs(self, (Term **)terms->elems, terms->size);
811
+
812
+ for (i = 0; i < terms->size; i++) {
813
+ h_set(df_map, terms->elems[i], (void *)dfs[i]);
814
+ }
815
+ /* don't destroy the individual terms, only the HashSet */
816
+ hs_destroy(terms);
817
+ free(dfs);
818
+
819
+ cdfsea = cdfsea_create(df_map, ((MultiSearcher *)self->data)->max_doc);
820
+
821
+ w = q_weight(rq, cdfsea);
822
+ q_deref(rq);
823
+ cdfsea->close(cdfsea);
824
+
825
+ return w;
826
+ }
827
+
828
+ struct MultiSearchEachArg {
829
+ int start;
830
+ void *arg;
831
+ void (*fn)(Searcher *, int, float, void *);
832
+ };
833
+
834
+ void msea_search_each_i(Searcher *self, int doc_num, float score, void *arg)
835
+ {
836
+ struct MultiSearchEachArg *mse_arg = (struct MultiSearchEachArg *)arg;
837
+
838
+ mse_arg->fn(self, doc_num + mse_arg->start, score, mse_arg->arg);
839
+ }
840
+
841
+ static void msea_search_each_w(Searcher *self, Weight *w, Filter *filter,
842
+ void (*fn)(Searcher *, int, float, void *), void *arg)
843
+ {
844
+ int i;
845
+ struct MultiSearchEachArg mse_arg;
846
+ MultiSearcher *msea = (MultiSearcher *)self->data;
847
+ Searcher *s;
848
+
849
+ mse_arg.fn = fn;
850
+ mse_arg.arg = arg;
851
+ for (i = 0; i < msea->s_cnt; i++) {
852
+ s = msea->searchers[i];
853
+ mse_arg.start = msea->starts[i];
854
+ s->search_each_w(s, w, filter, &msea_search_each_i, &mse_arg);
855
+ }
856
+ }
518
857
 
858
+ static void msea_search_each(Searcher *self, Query *query, Filter *filter,
859
+ void (*fn)(Searcher *, int, float, void *), void *arg)
860
+ {
861
+ Weight *w = q_weight(query, self);
862
+ msea_search_each_w(self, w, filter, fn, arg);
863
+ w->destroy(w);
864
+ }
865
+
866
+ struct MultiSearchArg {
867
+ int total_hits, max_size;
868
+ float min_score;
869
+ PriorityQueue *hq;
870
+ void (*hq_insert)(PriorityQueue *pq, Hit *hit);
871
+ };
872
+
873
+ void msea_search_i(Searcher *self, int doc_num, float score, void *arg)
874
+ {
875
+ struct MultiSearchArg *ms_arg = (struct MultiSearchArg *)arg;
876
+ Hit hit;
877
+
878
+ ms_arg->total_hits++;
879
+ hit.doc = doc_num;
880
+ hit.score = score;
881
+ ms_arg->hq_insert(ms_arg->hq, &hit);
882
+ }
883
+
884
+ static TopDocs *msea_search(Searcher *self, Query *query, int first_doc,
885
+ int num_docs, Filter *filter, Sort *sort)
886
+ {
887
+ int max_size = first_doc + num_docs;
888
+ int i;
889
+ Weight *weight;
890
+ Hit **score_docs = NULL;
891
+ BitVector *bits = (filter ? filter->get_bv(filter, self->ir) : NULL);
892
+ Hit *(*hq_pop)(PriorityQueue *pq);
893
+ void (*hq_insert)(PriorityQueue *pq, Hit *hit);
894
+ void (*hq_destroy)(PriorityQueue *self);
895
+ PriorityQueue *hq;
896
+ struct MultiSearchArg ms_arg;
897
+
898
+
899
+ if (num_docs <= 0)
900
+ RAISE(ARG_ERROR, NUM_DOCS_ARG_ERROR_MSG);
901
+
902
+ if (first_doc < 0)
903
+ RAISE(ARG_ERROR, FIRST_DOC_ARG_ERROR_MSG);
904
+
905
+ weight = q_weight(query, self);
906
+ if (sort) {
907
+ hq = fshq_pq_create(max_size, sort, self->ir);
908
+ hq_pop = &fshq_pq_pop;
909
+ hq_insert = &fshq_pq_insert;
910
+ hq_destroy = &fshq_pq_destroy;
911
+ } else {
912
+ hq = pq_create(max_size, &hit_less_than);
913
+ hq_pop = &hit_pq_pop;
914
+ hq_insert = &hit_pq_insert;
915
+ hq_destroy = &pq_destroy;
916
+ }
917
+
918
+
919
+ ms_arg.hq = hq;
920
+ ms_arg.total_hits = 0;
921
+ ms_arg.max_size = max_size;
922
+ ms_arg.min_score = 0.0;
923
+ ms_arg.hq_insert = hq_insert;
924
+
925
+ msea_search_each_w(self, weight, filter, msea_search_i, &ms_arg);
926
+
927
+ weight->destroy(weight);
928
+
929
+ if (hq->count > first_doc) {
930
+ if ((hq->count - first_doc) < num_docs) {
931
+ num_docs = hq->count - first_doc;
932
+ }
933
+ score_docs = ALLOC_N(Hit *, num_docs);
934
+ for (i = num_docs - 1; i >= 0; i--) {
935
+ score_docs[i] = hq_pop(hq);
936
+ //hit = score_docs[i] = pq_pop(hq);
937
+ //printf("hit = %d-->%f\n", hit->doc, hit->score);
938
+ }
939
+ } else {
940
+ num_docs = 0;
941
+ }
942
+ pq_clear(hq);
943
+ hq_destroy(hq);
944
+
945
+ if (bits) bv_destroy(bits);
946
+ return td_create(ms_arg.total_hits, num_docs, score_docs);
947
+ }
948
+
949
+ static Query *msea_rewrite(Searcher *self, Query *original)
950
+ {
951
+ int i;
952
+ Searcher *s;
953
+ MultiSearcher *msea = (MultiSearcher *)self->data;
954
+ Query **queries = ALLOC_N(Query *, msea->s_cnt), *rewritten;
955
+
956
+ for (i = 0; i < msea->s_cnt; i++) {
957
+ s = msea->searchers[i];
958
+ queries[i] = s->rewrite(s, original);
959
+ }
960
+ rewritten = q_combine(queries, msea->s_cnt);
961
+
962
+ for (i = 0; i < msea->s_cnt; i++) {
963
+ q_deref(queries[i]);
964
+ }
965
+ free(queries);
966
+ return rewritten;
967
+ }
968
+
969
+ static Explanation *msea_explain(Searcher *self, Query *query, int doc_num)
970
+ {
971
+ MultiSearcher *msea = (MultiSearcher *)self->data;
972
+ int i = msea_get_searcher_index(self, doc_num);
973
+ Weight *w = q_weight(query, self);
974
+ Searcher *s = msea->searchers[i];
975
+ Explanation *e = s->explain_w(s, w, doc_num - msea->starts[i]);
976
+ w->destroy(w);
977
+ return e;
978
+ }
979
+
980
+ static Explanation *msea_explain_w(Searcher *self, Weight *w, int doc_num)
981
+ {
982
+ MultiSearcher *msea = (MultiSearcher *)self->data;
983
+ int i = msea_get_searcher_index(self, doc_num);
984
+ Searcher *s = msea->searchers[i];
985
+ Explanation *e = s->explain_w(s, w, doc_num - msea->starts[i]);
986
+ return e;
987
+ }
988
+
989
+ static Similarity *msea_get_similarity(Searcher *self)
990
+ {
991
+ return self->similarity;
992
+ }
993
+
994
+ static void msea_close(Searcher *self)
995
+ {
996
+ int i;
997
+ Searcher *s;
998
+ MultiSearcher *msea = (MultiSearcher *)self->data;
999
+ if (msea->close_subs) {
1000
+ for (i = 0; i < msea->s_cnt; i++) {
1001
+ s = msea->searchers[i];
1002
+ s->close(s);
1003
+ }
1004
+ free(msea->searchers);
1005
+ }
1006
+ free(msea->starts);
1007
+ free(msea);
1008
+ free(self);
1009
+ }
1010
+
1011
+ Searcher *msea_create(Searcher **searchers, int s_cnt, bool close_subs)
1012
+ {
1013
+ int i, max_doc = 0, *starts;
1014
+ Searcher *self = ALLOC(Searcher);
1015
+
1016
+ MultiSearcher *msea = ALLOC(MultiSearcher);
1017
+
1018
+ starts = ALLOC_N(int, s_cnt + 1);
1019
+ for (i = 0; i < s_cnt; i++) {
1020
+ starts[i] = max_doc;
1021
+ max_doc += searchers[i]->max_doc(searchers[i]);
1022
+ }
1023
+ starts[i] = max_doc;
1024
+
1025
+ msea->s_cnt = s_cnt;
1026
+ msea->searchers = searchers;
1027
+ msea->starts = starts;
1028
+ msea->max_doc = max_doc;
1029
+ msea->close_subs = close_subs;
1030
+ self->data = msea;
1031
+
1032
+ self->ir = (IndexReader *)NULL;
1033
+ self->similarity = sim_create_default();
1034
+ self->doc_freq = &msea_doc_freq;
1035
+ self->doc_freqs = &ss_doc_freqs;
1036
+ self->get_doc = &msea_get_doc;
1037
+ self->max_doc = &msea_max_doc;
1038
+ self->create_weight = &msea_create_weight;
1039
+ self->search = &msea_search;
1040
+ self->search_each = &msea_search_each;
1041
+ self->search_each_w = &msea_search_each_w;
1042
+ self->rewrite = &msea_rewrite;
1043
+ self->explain = &msea_explain;
1044
+ self->explain_w = &msea_explain_w;
1045
+ self->get_similarity = &msea_get_similarity;
1046
+ self->close = &msea_close;
1047
+ return self;
1048
+ }