ferret 0.3.2 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +9 -0
- data/Rakefile +51 -25
- data/ext/analysis.c +553 -0
- data/ext/analysis.h +76 -0
- data/ext/array.c +83 -0
- data/ext/array.h +19 -0
- data/ext/bitvector.c +164 -0
- data/ext/bitvector.h +29 -0
- data/ext/compound_io.c +335 -0
- data/ext/document.c +336 -0
- data/ext/document.h +87 -0
- data/ext/ferret.c +88 -47
- data/ext/ferret.h +43 -109
- data/ext/field.c +395 -0
- data/ext/filter.c +103 -0
- data/ext/fs_store.c +352 -0
- data/ext/global.c +219 -0
- data/ext/global.h +73 -0
- data/ext/hash.c +446 -0
- data/ext/hash.h +80 -0
- data/ext/hashset.c +141 -0
- data/ext/hashset.h +37 -0
- data/ext/helper.c +11 -0
- data/ext/helper.h +5 -0
- data/ext/inc/lang.h +41 -0
- data/ext/ind.c +389 -0
- data/ext/index.h +884 -0
- data/ext/index_io.c +269 -415
- data/ext/index_rw.c +2543 -0
- data/ext/lang.c +31 -0
- data/ext/lang.h +41 -0
- data/ext/priorityqueue.c +228 -0
- data/ext/priorityqueue.h +44 -0
- data/ext/q_boolean.c +1331 -0
- data/ext/q_const_score.c +154 -0
- data/ext/q_fuzzy.c +287 -0
- data/ext/q_match_all.c +142 -0
- data/ext/q_multi_phrase.c +343 -0
- data/ext/q_parser.c +2180 -0
- data/ext/q_phrase.c +657 -0
- data/ext/q_prefix.c +75 -0
- data/ext/q_range.c +247 -0
- data/ext/q_span.c +1566 -0
- data/ext/q_term.c +308 -0
- data/ext/q_wildcard.c +146 -0
- data/ext/r_analysis.c +255 -0
- data/ext/r_doc.c +578 -0
- data/ext/r_index_io.c +996 -0
- data/ext/r_qparser.c +158 -0
- data/ext/r_search.c +2321 -0
- data/ext/r_store.c +263 -0
- data/ext/r_term.c +219 -0
- data/ext/ram_store.c +447 -0
- data/ext/search.c +524 -0
- data/ext/search.h +1065 -0
- data/ext/similarity.c +143 -39
- data/ext/sort.c +661 -0
- data/ext/store.c +35 -0
- data/ext/store.h +152 -0
- data/ext/term.c +704 -143
- data/ext/termdocs.c +599 -0
- data/ext/vector.c +594 -0
- data/lib/ferret.rb +9 -10
- data/lib/ferret/analysis/analyzers.rb +2 -2
- data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
- data/lib/ferret/analysis/token.rb +14 -14
- data/lib/ferret/analysis/token_filters.rb +3 -3
- data/lib/ferret/document/field.rb +16 -17
- data/lib/ferret/index/document_writer.rb +4 -4
- data/lib/ferret/index/index.rb +39 -23
- data/lib/ferret/index/index_writer.rb +2 -2
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
- data/lib/ferret/index/segment_term_vector.rb +4 -4
- data/lib/ferret/index/term.rb +5 -1
- data/lib/ferret/index/term_vector_offset_info.rb +6 -6
- data/lib/ferret/index/term_vectors_io.rb +5 -5
- data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
- data/lib/ferret/search.rb +1 -1
- data/lib/ferret/search/boolean_query.rb +2 -1
- data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
- data/lib/ferret/search/fuzzy_query.rb +2 -1
- data/lib/ferret/search/index_searcher.rb +3 -0
- data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
- data/lib/ferret/search/multi_phrase_query.rb +6 -5
- data/lib/ferret/search/phrase_query.rb +3 -6
- data/lib/ferret/search/prefix_query.rb +4 -4
- data/lib/ferret/search/sort.rb +3 -1
- data/lib/ferret/search/sort_field.rb +9 -9
- data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
- data/lib/ferret/search/spans/span_near_query.rb +1 -1
- data/lib/ferret/search/spans/span_weight.rb +1 -1
- data/lib/ferret/search/spans/spans_enum.rb +7 -7
- data/lib/ferret/store/fs_store.rb +10 -6
- data/lib/ferret/store/ram_store.rb +3 -3
- data/lib/rferret.rb +36 -0
- data/test/functional/thread_safety_index_test.rb +2 -2
- data/test/test_helper.rb +16 -2
- data/test/unit/analysis/c_token.rb +25 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
- data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
- data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
- data/test/unit/document/c_field.rb +98 -0
- data/test/unit/document/tc_field.rb +0 -66
- data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
- data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
- data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
- data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
- data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
- data/test/unit/index/tc_segment_term_vector.rb +2 -2
- data/test/unit/index/tc_term_vectors_io.rb +4 -4
- data/test/unit/query_parser/c_query_parser.rb +138 -0
- data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
- data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
- data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
- data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
- data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
- data/test/unit/search/c_sort_field.rb +27 -0
- data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +7 -20
- data/test/unit/store/c_fs_store.rb +76 -0
- data/test/unit/store/c_ram_store.rb +35 -0
- data/test/unit/store/m_store.rb +34 -0
- data/test/unit/store/m_store_lock.rb +68 -0
- data/test/unit/store/tc_fs_store.rb +0 -53
- data/test/unit/store/tc_ram_store.rb +0 -20
- data/test/unit/store/tm_store.rb +0 -30
- data/test/unit/store/tm_store_lock.rb +0 -66
- metadata +84 -31
- data/ext/Makefile +0 -140
- data/ext/ferret_ext.so +0 -0
- data/ext/priority_queue.c +0 -232
- data/ext/ram_directory.c +0 -321
- data/ext/segment_merge_queue.c +0 -37
- data/ext/segment_term_enum.c +0 -326
- data/ext/string_helper.c +0 -42
- data/ext/tags +0 -344
- data/ext/term_buffer.c +0 -230
- data/ext/term_infos_reader.c +0 -54
- data/ext/terminfo.c +0 -160
- data/ext/token.c +0 -93
- data/ext/util.c +0 -12
data/ext/ram_store.c
ADDED
@@ -0,0 +1,447 @@
|
|
1
|
+
#include <string.h>
|
2
|
+
#include <store.h>
|
3
|
+
|
4
|
+
typedef struct RamFile {
|
5
|
+
char *name;
|
6
|
+
uchar **buffers;
|
7
|
+
int bufcnt;
|
8
|
+
int len;
|
9
|
+
int refcnt;
|
10
|
+
bool alive;
|
11
|
+
} RamFile;
|
12
|
+
|
13
|
+
RamFile *rf_create(const char *name)
|
14
|
+
{
|
15
|
+
RamFile *rf = ALLOC(RamFile);
|
16
|
+
rf->buffers = ALLOC(uchar *);
|
17
|
+
rf->buffers[0] = ALLOC_N(uchar, BUFFER_SIZE);
|
18
|
+
rf->name = estrdup(name);
|
19
|
+
rf->len = 0;
|
20
|
+
rf->bufcnt = 1;
|
21
|
+
rf->refcnt = 0;
|
22
|
+
rf->alive = true;
|
23
|
+
return rf;
|
24
|
+
}
|
25
|
+
|
26
|
+
void rf_extend_if_necessary(RamFile *rf, int buf_num)
|
27
|
+
{
|
28
|
+
while (rf->bufcnt <= buf_num) {
|
29
|
+
REALLOC_N(rf->buffers, uchar *, (rf->bufcnt + 1));
|
30
|
+
rf->buffers[rf->bufcnt++] = ALLOC_N(uchar, BUFFER_SIZE);
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
void rf_close(void *p)
|
35
|
+
{
|
36
|
+
int i;
|
37
|
+
RamFile *rf = (RamFile *)p;
|
38
|
+
if (rf->refcnt > 0 || rf->alive) return;
|
39
|
+
free(rf->name);
|
40
|
+
for (i = 0; i < rf->bufcnt; i++) {
|
41
|
+
free(rf->buffers[i]);
|
42
|
+
}
|
43
|
+
free(rf->buffers);
|
44
|
+
free(rf);
|
45
|
+
}
|
46
|
+
|
47
|
+
void ram_touch(Store *store, char *filename)
|
48
|
+
{
|
49
|
+
if (h_get(store->dir.ht, filename) == NULL)
|
50
|
+
h_set(store->dir.ht, filename, rf_create(filename));
|
51
|
+
}
|
52
|
+
|
53
|
+
int ram_exists(Store *store, char *filename)
|
54
|
+
{
|
55
|
+
if (h_get(store->dir.ht, filename) != NULL)
|
56
|
+
return true;
|
57
|
+
else
|
58
|
+
return false;
|
59
|
+
}
|
60
|
+
|
61
|
+
int ram_remove(Store *store, char *filename)
|
62
|
+
{
|
63
|
+
RamFile *rf = h_rem(store->dir.ht, filename, false);
|
64
|
+
if (rf != NULL) {
|
65
|
+
rf->alive = false;
|
66
|
+
rf_close(rf);
|
67
|
+
return true;
|
68
|
+
} else {
|
69
|
+
return false;
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
int ram_rename(Store *store, char *from, char *to)
|
74
|
+
{
|
75
|
+
RamFile *rf = (RamFile *)h_rem(store->dir.ht, from, false);
|
76
|
+
if (rf == NULL)
|
77
|
+
eprintf(IO_ERROR, "tried to rename a file that doesn't exist");
|
78
|
+
|
79
|
+
free(rf->name);
|
80
|
+
|
81
|
+
rf->name = estrdup(to);
|
82
|
+
|
83
|
+
// clean up the file we are overwriting
|
84
|
+
RamFile *tmp = (RamFile *)h_get(store->dir.ht, to);
|
85
|
+
if (tmp != NULL)
|
86
|
+
tmp->alive = false;
|
87
|
+
|
88
|
+
h_set(store->dir.ht, rf->name, rf);
|
89
|
+
return true;
|
90
|
+
}
|
91
|
+
|
92
|
+
int ram_count(Store *store)
|
93
|
+
{
|
94
|
+
return store->dir.ht->used;
|
95
|
+
}
|
96
|
+
|
97
|
+
void ram_each(Store *store, void (*func)(char *fname, void *arg), void *arg)
|
98
|
+
{
|
99
|
+
HshTable *ht = store->dir.ht;
|
100
|
+
RamFile *rf;
|
101
|
+
int i;
|
102
|
+
for (i = 0; i <= ht->mask; i++) {
|
103
|
+
rf = (RamFile *)ht->table[i].value;
|
104
|
+
if (rf) {
|
105
|
+
if (strncmp(rf->name, LOCK_PREFIX, strlen(LOCK_PREFIX)) == 0)
|
106
|
+
continue;
|
107
|
+
func(rf->name, arg);
|
108
|
+
}
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
void ram_close(Store *store)
|
113
|
+
{
|
114
|
+
HshTable *ht = store->dir.ht;
|
115
|
+
RamFile *rf;
|
116
|
+
int i;
|
117
|
+
for (i = 0; i <= ht->mask; i++) {
|
118
|
+
rf = (RamFile *)ht->table[i].value;
|
119
|
+
if (rf) rf->alive = false;
|
120
|
+
}
|
121
|
+
h_destroy(store->dir.ht);
|
122
|
+
store_destroy(store);
|
123
|
+
}
|
124
|
+
|
125
|
+
/*
|
126
|
+
* Be sure to keep the locks
|
127
|
+
*/
|
128
|
+
void ram_clear(Store *store)
|
129
|
+
{
|
130
|
+
int i;
|
131
|
+
HshTable *ht = store->dir.ht;
|
132
|
+
RamFile *rf;
|
133
|
+
for (i = 0; i <= ht->mask; i++) {
|
134
|
+
rf = (RamFile *)ht->table[i].value;
|
135
|
+
if (rf && !file_is_lock(rf->name)) {
|
136
|
+
rf->alive = false;
|
137
|
+
h_del(ht, rf->name);
|
138
|
+
}
|
139
|
+
}
|
140
|
+
}
|
141
|
+
|
142
|
+
void ram_clear_locks(Store *store)
|
143
|
+
{
|
144
|
+
int i;
|
145
|
+
HshTable *ht = store->dir.ht;
|
146
|
+
RamFile *rf;
|
147
|
+
for (i = 0; i <= ht->mask; i++) {
|
148
|
+
rf = (RamFile *)ht->table[i].value;
|
149
|
+
if (rf && file_is_lock(rf->name)) {
|
150
|
+
rf->alive = false;
|
151
|
+
h_del(ht, rf->name);
|
152
|
+
}
|
153
|
+
}
|
154
|
+
}
|
155
|
+
void ram_clear_all(Store *store)
|
156
|
+
{
|
157
|
+
int i;
|
158
|
+
HshTable *ht = store->dir.ht;
|
159
|
+
RamFile *rf;
|
160
|
+
for (i = 0; i <= ht->mask; i++) {
|
161
|
+
rf = (RamFile *)ht->table[i].value;
|
162
|
+
if (rf) {
|
163
|
+
rf->alive = false;
|
164
|
+
h_del(ht, rf->name);
|
165
|
+
}
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
int ram_length(Store *store, char *filename)
|
170
|
+
{
|
171
|
+
RamFile *rf = (RamFile *)h_get(store->dir.ht, filename);
|
172
|
+
if (rf != NULL)
|
173
|
+
return rf->len;
|
174
|
+
else
|
175
|
+
return 0;
|
176
|
+
}
|
177
|
+
|
178
|
+
int ramo_length(OutStream *os)
|
179
|
+
{
|
180
|
+
return ((RamFile *)os->file)->len;
|
181
|
+
}
|
182
|
+
|
183
|
+
void ramo_flush_internal(OutStream *os, uchar *src, int len)
|
184
|
+
{
|
185
|
+
RamFile *rf = (RamFile *)os->file;
|
186
|
+
int buffer_number, buffer_offset, bytes_in_buffer, bytes_to_copy;
|
187
|
+
int src_offset;
|
188
|
+
int pointer = os->pointer;
|
189
|
+
|
190
|
+
buffer_number = (int)(pointer / BUFFER_SIZE);
|
191
|
+
buffer_offset = pointer % BUFFER_SIZE;
|
192
|
+
bytes_in_buffer = BUFFER_SIZE - buffer_offset;
|
193
|
+
bytes_to_copy = bytes_in_buffer < len ? bytes_in_buffer : len;
|
194
|
+
|
195
|
+
rf_extend_if_necessary(rf, buffer_number);
|
196
|
+
|
197
|
+
uchar *buffer = rf->buffers[buffer_number];
|
198
|
+
memcpy(buffer + buffer_offset, src, bytes_to_copy);
|
199
|
+
|
200
|
+
if (bytes_to_copy < len) {
|
201
|
+
src_offset = bytes_to_copy;
|
202
|
+
bytes_to_copy = len - bytes_to_copy;
|
203
|
+
buffer_number += 1;
|
204
|
+
rf_extend_if_necessary(rf, buffer_number);
|
205
|
+
buffer = rf->buffers[buffer_number];
|
206
|
+
|
207
|
+
memcpy(buffer, src + src_offset, bytes_to_copy);
|
208
|
+
}
|
209
|
+
os->pointer += len;
|
210
|
+
|
211
|
+
if (os->pointer > rf->len)
|
212
|
+
rf->len = os->pointer;
|
213
|
+
}
|
214
|
+
|
215
|
+
void ramo_seek_internal(OutStream *os, int pos)
|
216
|
+
{
|
217
|
+
os->pointer = pos;
|
218
|
+
}
|
219
|
+
|
220
|
+
void ramo_reset(OutStream *os)
|
221
|
+
{
|
222
|
+
RamFile *rf = (RamFile *)os->file;
|
223
|
+
os_seek(os, 0);
|
224
|
+
rf->len = 0;
|
225
|
+
}
|
226
|
+
|
227
|
+
void ramo_close_internal(OutStream *os)
|
228
|
+
{
|
229
|
+
RamFile *rf = (RamFile *)os->file;
|
230
|
+
rf->refcnt--;
|
231
|
+
rf_close(rf);
|
232
|
+
}
|
233
|
+
|
234
|
+
void ramo_write_to(OutStream *os, OutStream *other_o)
|
235
|
+
{
|
236
|
+
int i, len;
|
237
|
+
RamFile *rf = (RamFile *)os->file;
|
238
|
+
os_flush(os);
|
239
|
+
int last_buffer_number = (int)(rf->len / BUFFER_SIZE);
|
240
|
+
int last_buffer_offset = rf->len % BUFFER_SIZE;
|
241
|
+
for (i = 0; i <= last_buffer_number; i++) {
|
242
|
+
len = (i == last_buffer_number ? last_buffer_offset : BUFFER_SIZE);
|
243
|
+
os_write_bytes(other_o, rf->buffers[i], len);
|
244
|
+
}
|
245
|
+
}
|
246
|
+
|
247
|
+
OutStream *ram_create_buffer()
|
248
|
+
{
|
249
|
+
RamFile *rf = rf_create("");
|
250
|
+
rf->alive = false;
|
251
|
+
OutStream *os = os_create();
|
252
|
+
os->file = rf;
|
253
|
+
os->pointer = 0;
|
254
|
+
os->flush_internal = &ramo_flush_internal;
|
255
|
+
os->seek_internal = &ramo_seek_internal;
|
256
|
+
os->close_internal = &ramo_close_internal;
|
257
|
+
return os;
|
258
|
+
}
|
259
|
+
|
260
|
+
void ram_destroy_buffer(OutStream *os)
|
261
|
+
{
|
262
|
+
rf_close(os->file);
|
263
|
+
free(os);
|
264
|
+
}
|
265
|
+
|
266
|
+
OutStream *ram_create_output(Store *store, const char *filename)
|
267
|
+
{
|
268
|
+
RamFile *rf = (RamFile *)h_get(store->dir.ht, filename);
|
269
|
+
if (rf == NULL) {
|
270
|
+
rf = rf_create(filename);
|
271
|
+
h_set(store->dir.ht, rf->name, rf);
|
272
|
+
}
|
273
|
+
rf->refcnt++;
|
274
|
+
OutStream *os = os_create();
|
275
|
+
os->pointer = 0;
|
276
|
+
os->file = rf;
|
277
|
+
os->flush_internal = &ramo_flush_internal;
|
278
|
+
os->seek_internal = &ramo_seek_internal;
|
279
|
+
os->close_internal = &ramo_close_internal;
|
280
|
+
return os;
|
281
|
+
}
|
282
|
+
|
283
|
+
void rami_read_internal(InStream *is, uchar *b, int offset, int len)
|
284
|
+
{
|
285
|
+
RamFile *rf = (RamFile *)is->file;
|
286
|
+
|
287
|
+
int buffer_number, buffer_offset, bytes_in_buffer, bytes_to_copy;
|
288
|
+
int remainder = len;
|
289
|
+
int start = is->d.pointer;
|
290
|
+
uchar *buffer;
|
291
|
+
|
292
|
+
while (remainder > 0) {
|
293
|
+
buffer_number = (int)(start / BUFFER_SIZE);
|
294
|
+
buffer_offset = start % BUFFER_SIZE;
|
295
|
+
bytes_in_buffer = BUFFER_SIZE - buffer_offset;
|
296
|
+
|
297
|
+
if (bytes_in_buffer >= remainder) {
|
298
|
+
bytes_to_copy = remainder;
|
299
|
+
} else {
|
300
|
+
bytes_to_copy = bytes_in_buffer;
|
301
|
+
}
|
302
|
+
buffer = rf->buffers[buffer_number];
|
303
|
+
memcpy(b + offset, buffer + buffer_offset, bytes_to_copy);
|
304
|
+
offset += bytes_to_copy;
|
305
|
+
start += bytes_to_copy;
|
306
|
+
remainder -= bytes_to_copy;
|
307
|
+
}
|
308
|
+
|
309
|
+
is->d.pointer += len;
|
310
|
+
}
|
311
|
+
|
312
|
+
int rami_length(InStream *is)
|
313
|
+
{
|
314
|
+
return ((RamFile *)is->file)->len;
|
315
|
+
}
|
316
|
+
|
317
|
+
void rami_seek_internal(InStream *is, int pos)
|
318
|
+
{
|
319
|
+
is->d.pointer = pos;
|
320
|
+
}
|
321
|
+
|
322
|
+
void rami_close_internal(InStream *is)
|
323
|
+
{
|
324
|
+
RamFile *rf = (RamFile *)is->file;
|
325
|
+
rf->refcnt--;
|
326
|
+
rf_close(rf);
|
327
|
+
}
|
328
|
+
|
329
|
+
void rami_clone_internal(InStream *is, InStream *new_index_i)
|
330
|
+
{
|
331
|
+
((RamFile *)is->file)->refcnt++;
|
332
|
+
}
|
333
|
+
|
334
|
+
InStream *ram_open_input(Store *store, const char *filename)
|
335
|
+
{
|
336
|
+
RamFile *rf = (RamFile *)h_get(store->dir.ht, filename);
|
337
|
+
if (rf == NULL) {
|
338
|
+
eprintf(IO_ERROR, "Couldn't open the ram file %s to read", filename);
|
339
|
+
}
|
340
|
+
rf->refcnt++;
|
341
|
+
InStream *is = is_create();
|
342
|
+
is->file = rf;
|
343
|
+
is->d.pointer = 0;
|
344
|
+
is->is_clone = false;
|
345
|
+
is->read_internal = &rami_read_internal;
|
346
|
+
is->seek_internal = &rami_seek_internal;
|
347
|
+
is->close_internal = &rami_close_internal;
|
348
|
+
is->clone_internal = &rami_clone_internal;
|
349
|
+
is->length_internal = &rami_length;
|
350
|
+
return is;
|
351
|
+
}
|
352
|
+
|
353
|
+
#define LOCK_OBTAIN_TIMEOUT 5
|
354
|
+
|
355
|
+
int ram_lock_obtain(Lock *lock)
|
356
|
+
{
|
357
|
+
int ret = true;
|
358
|
+
if (ram_exists(lock->store, lock->name))
|
359
|
+
ret = false;
|
360
|
+
ram_touch(lock->store, lock->name);
|
361
|
+
return ret;
|
362
|
+
}
|
363
|
+
|
364
|
+
int ram_lock_is_locked(Lock *lock)
|
365
|
+
{
|
366
|
+
return ram_exists(lock->store, lock->name);
|
367
|
+
}
|
368
|
+
|
369
|
+
void ram_lock_release(Lock *lock)
|
370
|
+
{
|
371
|
+
ram_remove(lock->store, lock->name);
|
372
|
+
}
|
373
|
+
|
374
|
+
Lock *ram_open_lock(Store *store, char *lockname)
|
375
|
+
{
|
376
|
+
Lock *lock = ALLOC(Lock);
|
377
|
+
char lname[100];
|
378
|
+
sprintf(lname, "%s%s.lck", LOCK_PREFIX, lockname);
|
379
|
+
lock->name = estrdup(lname);
|
380
|
+
lock->store = store;
|
381
|
+
lock->obtain = &ram_lock_obtain;
|
382
|
+
lock->release = &ram_lock_release;
|
383
|
+
lock->is_locked = &ram_lock_is_locked;
|
384
|
+
return lock;
|
385
|
+
}
|
386
|
+
|
387
|
+
void ram_close_lock(Lock *lock)
|
388
|
+
{
|
389
|
+
free(lock->name);
|
390
|
+
free(lock);
|
391
|
+
}
|
392
|
+
|
393
|
+
|
394
|
+
Store *open_ram_store()
|
395
|
+
{
|
396
|
+
Store *new_store = store_create();
|
397
|
+
|
398
|
+
new_store->dir.ht = h_new_str(NULL, rf_close);
|
399
|
+
new_store->touch = &ram_touch;
|
400
|
+
new_store->exists = &ram_exists;
|
401
|
+
new_store->remove = &ram_remove;
|
402
|
+
new_store->rename = &ram_rename;
|
403
|
+
new_store->count = &ram_count;
|
404
|
+
new_store->close = &ram_close;
|
405
|
+
new_store->clear = &ram_clear;
|
406
|
+
new_store->clear_all = &ram_clear_all;
|
407
|
+
new_store->clear_locks = &ram_clear_locks;
|
408
|
+
new_store->length = &ram_length;
|
409
|
+
new_store->each = &ram_each;
|
410
|
+
new_store->create_output = &ram_create_output;
|
411
|
+
new_store->open_input = &ram_open_input;
|
412
|
+
new_store->open_lock = &ram_open_lock;
|
413
|
+
new_store->close_lock = &ram_close_lock;
|
414
|
+
return new_store;
|
415
|
+
}
|
416
|
+
|
417
|
+
struct CopyFileArg {
|
418
|
+
Store *to_store, *from_store;
|
419
|
+
};
|
420
|
+
|
421
|
+
static void copy_files(char *fname, void *arg)
|
422
|
+
{
|
423
|
+
struct CopyFileArg *cfa = (struct CopyFileArg *)arg;
|
424
|
+
OutStream *os = cfa->to_store->create_output(cfa->to_store, fname);
|
425
|
+
InStream *is = cfa->from_store->open_input(cfa->from_store, fname);
|
426
|
+
int len = is_length(is);
|
427
|
+
uchar buffer[len+1];
|
428
|
+
is_read_bytes(is, buffer, 0, len);
|
429
|
+
os_write_bytes(os, buffer, len);
|
430
|
+
is_close(is);
|
431
|
+
os_close(os);
|
432
|
+
}
|
433
|
+
|
434
|
+
Store *open_ram_store_and_copy(Store *from_store, bool close_dir)
|
435
|
+
{
|
436
|
+
Store *store = open_ram_store();
|
437
|
+
struct CopyFileArg cfa;
|
438
|
+
cfa.to_store = store;
|
439
|
+
cfa.from_store = from_store;
|
440
|
+
|
441
|
+
from_store->each(from_store, ©_files, &cfa);
|
442
|
+
|
443
|
+
if (close_dir)
|
444
|
+
from_store->close(from_store);
|
445
|
+
|
446
|
+
return store;
|
447
|
+
}
|
data/ext/search.c
ADDED
@@ -0,0 +1,524 @@
|
|
1
|
+
#include <string.h>
|
2
|
+
#include "search.h"
|
3
|
+
|
4
|
+
/***************************************************************************
|
5
|
+
*
|
6
|
+
* Explanation
|
7
|
+
*
|
8
|
+
***************************************************************************/
|
9
|
+
|
10
|
+
Explanation *expl_create(float value, char *description)
|
11
|
+
{
|
12
|
+
Explanation *self = ALLOC(Explanation);
|
13
|
+
self->value = value;
|
14
|
+
self->description = description;
|
15
|
+
self->dcnt = 0;
|
16
|
+
self->dcapa = EXPLANATION_DETAILS_START_SIZE;
|
17
|
+
self->details = ALLOC_N(Explanation *, EXPLANATION_DETAILS_START_SIZE);
|
18
|
+
return self;
|
19
|
+
}
|
20
|
+
|
21
|
+
void expl_destoy(void *p)
|
22
|
+
{
|
23
|
+
Explanation *expl = (Explanation *)p;
|
24
|
+
int i;
|
25
|
+
for (i = 0; i < expl->dcnt; i++) {
|
26
|
+
expl_destoy(expl->details[i]);
|
27
|
+
}
|
28
|
+
free(expl->details);
|
29
|
+
free(expl->description);
|
30
|
+
free(expl);
|
31
|
+
}
|
32
|
+
|
33
|
+
Explanation *expl_add_detail(Explanation *self, Explanation *detail)
|
34
|
+
{
|
35
|
+
if (self->dcnt >= self->dcapa) {
|
36
|
+
self->dcapa *= 2;
|
37
|
+
REALLOC_N(self->details, Explanation *, self->dcapa);
|
38
|
+
}
|
39
|
+
self->details[self->dcnt] = detail;
|
40
|
+
self->dcnt++;
|
41
|
+
return self;
|
42
|
+
}
|
43
|
+
|
44
|
+
char *expl_to_s(Explanation *self, int depth)
|
45
|
+
{
|
46
|
+
int i;
|
47
|
+
char dbuf[32];
|
48
|
+
char *buffer = ALLOC_N(char, depth * 2 + 1);
|
49
|
+
memset(buffer, ' ', sizeof(char) * depth * 2);
|
50
|
+
buffer[depth*2] = 0;
|
51
|
+
|
52
|
+
dbl_to_s(dbuf, self->value);
|
53
|
+
buffer = estrcat(buffer, epstrdup("%s = %s\n",
|
54
|
+
strlen(dbuf) + strlen(self->description),
|
55
|
+
dbuf, self->description));
|
56
|
+
for (i = 0; i < self->dcnt; i++) {
|
57
|
+
buffer = estrcat(buffer, expl_to_s(self->details[i], depth + 1));
|
58
|
+
}
|
59
|
+
|
60
|
+
return buffer;
|
61
|
+
}
|
62
|
+
|
63
|
+
char *expl_to_html(Explanation *self)
|
64
|
+
{
|
65
|
+
int i;
|
66
|
+
char dbuf[32];
|
67
|
+
char *buffer;
|
68
|
+
dbl_to_s(dbuf, self->value);
|
69
|
+
buffer = epstrdup("<ul>\n<li>%s = %s</li>\n",
|
70
|
+
strlen(dbuf) + strlen(self->description),
|
71
|
+
dbuf, self->description);
|
72
|
+
|
73
|
+
for (i = 0; i < self->dcnt; i++) {
|
74
|
+
estrcat(buffer, expl_to_html(self->details[i]));
|
75
|
+
}
|
76
|
+
|
77
|
+
REALLOC_N(buffer, char, strlen(buffer) + 10);
|
78
|
+
return strcat(buffer, "</ul>\n");
|
79
|
+
}
|
80
|
+
|
81
|
+
/***************************************************************************
|
82
|
+
*
|
83
|
+
* Hit
|
84
|
+
*
|
85
|
+
***************************************************************************/
|
86
|
+
|
87
|
+
bool hit_less_than(void *hit1, void *hit2)
|
88
|
+
{
|
89
|
+
if (((Hit *)hit1)->score == ((Hit *)hit2)->score) {
|
90
|
+
return ((Hit *)hit1)->doc > ((Hit *)hit2)->doc;
|
91
|
+
} else {
|
92
|
+
return ((Hit *)hit1)->score < ((Hit *)hit2)->score;
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
inline bool hit_lt(Hit *hit1, Hit *hit2)
|
97
|
+
{
|
98
|
+
if (hit1->score == hit2->score) {
|
99
|
+
return hit1->doc > hit2->doc;
|
100
|
+
} else {
|
101
|
+
return hit1->score < hit2->score;
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
105
|
+
void hit_pq_down(PriorityQueue *pq)
|
106
|
+
{
|
107
|
+
register int i = 1;
|
108
|
+
register int j = 2; //i << 1;
|
109
|
+
register int k = 3; //j + 1;
|
110
|
+
Hit **heap = (Hit **)pq->heap;
|
111
|
+
Hit *node = heap[i]; // save top node
|
112
|
+
|
113
|
+
if ((k <= pq->count) && hit_lt(heap[k], heap[j]))
|
114
|
+
j = k;
|
115
|
+
|
116
|
+
while ((j <= pq->count) && hit_lt(heap[j], node)) {
|
117
|
+
heap[i] = heap[j]; // shift up child
|
118
|
+
i = j;
|
119
|
+
j = i << 1;
|
120
|
+
k = j + 1;
|
121
|
+
if ((k <= pq->count) && hit_lt(heap[k], heap[j]))
|
122
|
+
j = k;
|
123
|
+
}
|
124
|
+
heap[i] = node;
|
125
|
+
}
|
126
|
+
|
127
|
+
Hit *hit_pq_pop(PriorityQueue *pq)
|
128
|
+
{
|
129
|
+
if (pq->count > 0) {
|
130
|
+
Hit *result = (Hit *)pq->heap[1]; // save first value
|
131
|
+
pq->heap[1] = pq->heap[pq->count]; // move last to first
|
132
|
+
pq->heap[pq->count] = NULL;
|
133
|
+
pq->count--;
|
134
|
+
hit_pq_down(pq); // adjust heap
|
135
|
+
return result;
|
136
|
+
} else {
|
137
|
+
return NULL;
|
138
|
+
}
|
139
|
+
}
|
140
|
+
|
141
|
+
inline void hit_pq_up(PriorityQueue *pq)
|
142
|
+
{
|
143
|
+
int i,j;
|
144
|
+
i = pq->count;
|
145
|
+
j = i >> 1;
|
146
|
+
Hit **heap = (Hit **)pq->heap;
|
147
|
+
Hit *node = heap[i];
|
148
|
+
|
149
|
+
while ((j > 0) && hit_lt(node, heap[j])) {
|
150
|
+
heap[i] = heap[j];
|
151
|
+
i = j;
|
152
|
+
j = j >> 1;
|
153
|
+
}
|
154
|
+
heap[i] = node;
|
155
|
+
}
|
156
|
+
|
157
|
+
|
158
|
+
void hit_pq_push(PriorityQueue *pq, void *elem)
|
159
|
+
{
|
160
|
+
pq->count++;
|
161
|
+
pq->heap[pq->count] = elem;
|
162
|
+
hit_pq_up(pq);
|
163
|
+
}
|
164
|
+
|
165
|
+
|
166
|
+
|
167
|
+
/***************************************************************************
|
168
|
+
*
|
169
|
+
* TopDocs
|
170
|
+
*
|
171
|
+
***************************************************************************/
|
172
|
+
|
173
|
+
TopDocs *td_create(int total_hits, int size, Hit **hits)
|
174
|
+
{
|
175
|
+
TopDocs *td = ALLOC(TopDocs);
|
176
|
+
td->total_hits = total_hits;
|
177
|
+
td->size = size;
|
178
|
+
td->hits = hits;
|
179
|
+
return td;
|
180
|
+
}
|
181
|
+
|
182
|
+
void td_destroy(void *p)
|
183
|
+
{
|
184
|
+
TopDocs *td = (TopDocs *)p;
|
185
|
+
int i;
|
186
|
+
for (i = 0; i < td->size; i++) {
|
187
|
+
free(td->hits[i]);
|
188
|
+
}
|
189
|
+
free(td->hits);
|
190
|
+
free(td);
|
191
|
+
}
|
192
|
+
|
193
|
+
char *td_to_s(TopDocs *td)
|
194
|
+
{
|
195
|
+
int i;
|
196
|
+
char dbuf[32];
|
197
|
+
Hit *hit;
|
198
|
+
char *buffer = epstrdup("%d hits sorted by <score, doc_num>\n", 20, td->total_hits);
|
199
|
+
for (i = 0; i < td->size; i++) {
|
200
|
+
hit = td->hits[i];
|
201
|
+
dbl_to_s(dbuf, hit->score);
|
202
|
+
estrcat(buffer, epstrdup("\t%d:%s\n", 52, hit->doc, dbuf));
|
203
|
+
}
|
204
|
+
return buffer;
|
205
|
+
}
|
206
|
+
|
207
|
+
/***************************************************************************
|
208
|
+
*
|
209
|
+
* Weight
|
210
|
+
*
|
211
|
+
***************************************************************************/
|
212
|
+
|
213
|
+
Query *w_get_query(Weight *self)
|
214
|
+
{
|
215
|
+
return self->query;
|
216
|
+
}
|
217
|
+
|
218
|
+
float w_get_value(Weight *self)
|
219
|
+
{
|
220
|
+
return self->value;
|
221
|
+
}
|
222
|
+
|
223
|
+
float w_sum_of_squared_weights(Weight *self)
|
224
|
+
{
|
225
|
+
self->qweight = self->idf * self->query->boost;
|
226
|
+
return self->qweight * self->qweight; // square it
|
227
|
+
}
|
228
|
+
|
229
|
+
void w_normalize(Weight *self, float normalization_factor)
|
230
|
+
{
|
231
|
+
self->qnorm = normalization_factor;
|
232
|
+
self->qweight *= normalization_factor; // normalize query weight
|
233
|
+
self->value = self->qweight * self->idf; // idf for document
|
234
|
+
}
|
235
|
+
|
236
|
+
/***************************************************************************
|
237
|
+
*
|
238
|
+
* Query
|
239
|
+
*
|
240
|
+
***************************************************************************/
|
241
|
+
|
242
|
+
Similarity *q_get_similarity(Query *self, Searcher *searcher)
|
243
|
+
{
|
244
|
+
return searcher->get_similarity(searcher);
|
245
|
+
}
|
246
|
+
|
247
|
+
Query *q_rewrite(Query *self, IndexReader *ir)
|
248
|
+
{
|
249
|
+
return self;
|
250
|
+
}
|
251
|
+
|
252
|
+
Weight *q_weight(Query *self, Searcher *searcher)
|
253
|
+
{
|
254
|
+
if (self->weight) {
|
255
|
+
self->weight->destroy(self->weight);
|
256
|
+
}
|
257
|
+
Query *query = searcher->rewrite(searcher, self);
|
258
|
+
Weight *weight = query->create_weight(query, searcher);
|
259
|
+
float sum = weight->sum_of_squared_weights(weight);
|
260
|
+
Similarity *sim = query->get_similarity(query, searcher);
|
261
|
+
float norm = sim_query_norm(sim, sum);
|
262
|
+
|
263
|
+
weight->normalize(weight, norm);
|
264
|
+
return self->weight = weight;
|
265
|
+
}
|
266
|
+
|
267
|
+
void q_destroy(Query *self)
|
268
|
+
{
|
269
|
+
if (self->rewritten) {
|
270
|
+
self->rewritten->destroy(self->rewritten);
|
271
|
+
self->rewritten = NULL;
|
272
|
+
}
|
273
|
+
if (self->weight) {
|
274
|
+
self->weight->destroy(self->weight);
|
275
|
+
}
|
276
|
+
free(self);
|
277
|
+
}
|
278
|
+
|
279
|
+
void q_extract_terms(Query *self, Array *terms)
|
280
|
+
{
|
281
|
+
/* do nothing by default */
|
282
|
+
}
|
283
|
+
|
284
|
+
Query *q_create()
|
285
|
+
{
|
286
|
+
Query *self = ALLOC(Query);
|
287
|
+
ZEROSET(self, Query, 1);
|
288
|
+
self->destroy_all = true;
|
289
|
+
self->boost = 1.0;
|
290
|
+
self->rewrite = &q_rewrite;
|
291
|
+
self->get_similarity = &q_get_similarity;
|
292
|
+
self->extract_terms = &q_extract_terms;
|
293
|
+
self->weight = NULL;
|
294
|
+
self->rewritten = NULL;
|
295
|
+
return self;
|
296
|
+
}
|
297
|
+
|
298
|
+
/***************************************************************************
|
299
|
+
*
|
300
|
+
* Scorer
|
301
|
+
*
|
302
|
+
***************************************************************************/
|
303
|
+
|
304
|
+
void scorer_destroy(void *p)
|
305
|
+
{
|
306
|
+
Scorer *scorer = (Scorer *)p;
|
307
|
+
free(scorer->data);
|
308
|
+
free(scorer);
|
309
|
+
}
|
310
|
+
|
311
|
+
Scorer *scorer_create(Similarity *similarity)
|
312
|
+
{
|
313
|
+
Scorer *self = ALLOC(Scorer);
|
314
|
+
self->destroy = &scorer_destroy;
|
315
|
+
self->data = NULL;
|
316
|
+
self->similarity = similarity;
|
317
|
+
return self;
|
318
|
+
}
|
319
|
+
|
320
|
+
bool scorer_less_than(void *p1, void *p2)
|
321
|
+
{
|
322
|
+
Scorer *s1 = (Scorer *)p1;
|
323
|
+
Scorer *s2 = (Scorer *)p2;
|
324
|
+
return s1->score(s1) < s2->score(s2);
|
325
|
+
}
|
326
|
+
|
327
|
+
bool scorer_doc_less_than(void *p1, void *p2)
|
328
|
+
{
|
329
|
+
return ((Scorer *)p1)->doc < ((Scorer *)p2)->doc;
|
330
|
+
}
|
331
|
+
|
332
|
+
int scorer_doc_cmp(const void *p1, const void *p2)
|
333
|
+
{
|
334
|
+
return (*(Scorer **)p1)->doc - (*(Scorer **)p2)->doc;
|
335
|
+
}
|
336
|
+
/***************************************************************************
|
337
|
+
*
|
338
|
+
* Searcher
|
339
|
+
*
|
340
|
+
***************************************************************************/
|
341
|
+
|
342
|
+
int sea_doc_freq(Searcher *self, Term *term)
|
343
|
+
{
|
344
|
+
return self->ir->doc_freq(self->ir, term);
|
345
|
+
}
|
346
|
+
|
347
|
+
int *sea_doc_freqs(Searcher *self, Term **terms, int tcnt)
|
348
|
+
{
|
349
|
+
int *freqs = ALLOC_N(int, tcnt);
|
350
|
+
int i;
|
351
|
+
for (i = 0; i < tcnt; i++) {
|
352
|
+
freqs[i] = self->ir->doc_freq(self->ir, terms[i]);
|
353
|
+
}
|
354
|
+
return freqs;
|
355
|
+
}
|
356
|
+
|
357
|
+
Document *sea_get_doc(Searcher *self, int doc_num)
|
358
|
+
{
|
359
|
+
return self->ir->get_doc(self->ir, doc_num);
|
360
|
+
}
|
361
|
+
|
362
|
+
int sea_max_doc(Searcher *self)
|
363
|
+
{
|
364
|
+
return self->ir->max_doc(self->ir);
|
365
|
+
}
|
366
|
+
|
367
|
+
Weight *sea_create_weight(Searcher *self, Query *query)
|
368
|
+
{
|
369
|
+
return q_weight(query, self);
|
370
|
+
}
|
371
|
+
|
372
|
+
TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
|
373
|
+
int num_docs, Filter *filter, Sort *sort)
|
374
|
+
{
|
375
|
+
int max_size = first_doc + num_docs;
|
376
|
+
int i;
|
377
|
+
Weight *weight;
|
378
|
+
Scorer *scorer;
|
379
|
+
Hit **score_docs = NULL;
|
380
|
+
Hit *hit;
|
381
|
+
int total_hits = 0;
|
382
|
+
float min_score = 0.0, score;
|
383
|
+
BitVector *bits = (filter ? filter->get_bv(filter, self->ir) : NULL);
|
384
|
+
Hit *(*hq_pop)(PriorityQueue *pq);
|
385
|
+
void (*hq_down)(PriorityQueue *pq);
|
386
|
+
void (*hq_push)(PriorityQueue *pq, void *elem);
|
387
|
+
void (*hq_destroy)(void *p);
|
388
|
+
PriorityQueue *hq;
|
389
|
+
|
390
|
+
|
391
|
+
if (num_docs <= 0)
|
392
|
+
eprintf(ARG_ERROR, "num_docs must be > 0 to run a search");
|
393
|
+
|
394
|
+
if (first_doc < 0)
|
395
|
+
eprintf(ARG_ERROR, "first_doc must be >= 0 to run a search");
|
396
|
+
|
397
|
+
weight = q_weight(query, self);
|
398
|
+
scorer = weight->scorer(weight, self->ir);
|
399
|
+
if (!scorer) {
|
400
|
+
if (bits) bv_destroy(bits);
|
401
|
+
return td_create(0, 0, NULL);
|
402
|
+
}
|
403
|
+
|
404
|
+
if (sort) {
|
405
|
+
hq = fshq_pq_create(max_size, sort, self->ir);
|
406
|
+
hq_pop = &fshq_pq_pop;
|
407
|
+
hq_down = &fshq_pq_down;
|
408
|
+
hq_push = &fshq_pq_push;
|
409
|
+
hq_destroy = &fshq_pq_destroy;
|
410
|
+
} else {
|
411
|
+
hq = pq_create(max_size, &hit_less_than);
|
412
|
+
hq_pop = &hit_pq_pop;
|
413
|
+
hq_down = &hit_pq_down;
|
414
|
+
hq_push = &hit_pq_push;
|
415
|
+
hq_destroy = &pq_destroy;
|
416
|
+
}
|
417
|
+
|
418
|
+
while (scorer->next(scorer)) {
|
419
|
+
if (bits && !bv_get(bits, scorer->doc)) continue;
|
420
|
+
total_hits++;
|
421
|
+
score = scorer->score(scorer);
|
422
|
+
if (hq->count < max_size) {
|
423
|
+
hit = ALLOC(Hit);
|
424
|
+
hit->doc = scorer->doc; hit->score = score;
|
425
|
+
hq_push(hq, hit);
|
426
|
+
min_score = ((Hit *)pq_top(hq))->score; // maintain min_score
|
427
|
+
} else if (score > min_score) {
|
428
|
+
hit = pq_top(hq);
|
429
|
+
hit->doc = scorer->doc; hit->score = score;
|
430
|
+
hq_down(hq);
|
431
|
+
min_score = ((Hit *)pq_top(hq))->score; // maintain min_score
|
432
|
+
}
|
433
|
+
}
|
434
|
+
scorer->destroy(scorer);
|
435
|
+
|
436
|
+
if (hq->count > first_doc) {
|
437
|
+
if ((hq->count - first_doc) < num_docs) {
|
438
|
+
num_docs = hq->count - first_doc;
|
439
|
+
}
|
440
|
+
score_docs = ALLOC_N(Hit *, num_docs);
|
441
|
+
for (i = num_docs - 1; i >= 0; i--) {
|
442
|
+
score_docs[i] = hq_pop(hq);
|
443
|
+
//hit = score_docs[i] = pq_pop(hq);
|
444
|
+
//printf("hit = %d-->%f\n", hit->doc, hit->score);
|
445
|
+
}
|
446
|
+
} else {
|
447
|
+
num_docs = 0;
|
448
|
+
}
|
449
|
+
pq_clear(hq);
|
450
|
+
hq_destroy(hq);
|
451
|
+
|
452
|
+
if (bits) bv_destroy(bits);
|
453
|
+
return td_create(total_hits, num_docs, score_docs);
|
454
|
+
}
|
455
|
+
|
456
|
+
void sea_search_each(Searcher *self, Query *query, Filter *filter,
|
457
|
+
void (*fn)(Searcher *self, int doc_num, void *arg), void *arg)
|
458
|
+
{
|
459
|
+
Weight *weight;
|
460
|
+
Scorer *scorer;
|
461
|
+
BitVector *bits = (filter ? filter->get_bv(filter, self->ir) : NULL);
|
462
|
+
|
463
|
+
weight = q_weight(query, self);
|
464
|
+
scorer = weight->scorer(weight, self->ir);
|
465
|
+
if (!scorer) {
|
466
|
+
if (bits) bv_destroy(bits);
|
467
|
+
return;
|
468
|
+
}
|
469
|
+
|
470
|
+
while (scorer->next(scorer)) {
|
471
|
+
if (bits && !bv_get(bits, scorer->doc)) continue;
|
472
|
+
fn(self, scorer->doc, arg);
|
473
|
+
}
|
474
|
+
scorer->destroy(scorer);
|
475
|
+
}
|
476
|
+
|
477
|
+
Query *sea_rewrite(Searcher *self, Query *original)
|
478
|
+
{
|
479
|
+
Query *query = original;
|
480
|
+
Query *rewritten_query = query->rewrite(query, self->ir);
|
481
|
+
while (query != rewritten_query) {
|
482
|
+
query = rewritten_query;
|
483
|
+
rewritten_query = query->rewrite(query, self->ir);
|
484
|
+
}
|
485
|
+
return query;
|
486
|
+
}
|
487
|
+
|
488
|
+
Explanation *sea_explain(Searcher *self, Query *query, int doc_num)
|
489
|
+
{
|
490
|
+
Weight *weight = q_weight(query, self);
|
491
|
+
return weight->explain(weight, self->ir, doc_num);
|
492
|
+
}
|
493
|
+
|
494
|
+
Similarity *sea_get_similarity(Searcher *self)
|
495
|
+
{
|
496
|
+
return self->similarity;
|
497
|
+
}
|
498
|
+
|
499
|
+
void sea_close(Searcher *self)
|
500
|
+
{
|
501
|
+
if (self->ir)
|
502
|
+
ir_close(self->ir);
|
503
|
+
free(self);
|
504
|
+
}
|
505
|
+
|
506
|
+
Searcher *sea_create(IndexReader *ir)
|
507
|
+
{
|
508
|
+
Searcher *self = ALLOC(Searcher);
|
509
|
+
self->ir = ir;
|
510
|
+
self->similarity = sim_create_default();
|
511
|
+
self->doc_freq = &sea_doc_freq;
|
512
|
+
self->doc_freqs = &sea_doc_freqs;
|
513
|
+
self->get_doc = &sea_get_doc;
|
514
|
+
self->max_doc = &sea_max_doc;
|
515
|
+
self->create_weight = &sea_create_weight;
|
516
|
+
self->search = &sea_search;
|
517
|
+
self->rewrite = &sea_rewrite;
|
518
|
+
self->explain = &sea_explain;
|
519
|
+
self->get_similarity = &sea_get_similarity;
|
520
|
+
self->close = &sea_close;
|
521
|
+
return self;
|
522
|
+
}
|
523
|
+
|
524
|
+
|