ferret 0.10.3 → 0.10.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -0
- data/ext/q_multi_term.c +1 -1
- data/lib/ferret_version.rb +1 -1
- metadata +1 -3
- data/ext/ind.c +0 -418
- data/ext/ind.h +0 -68
data/Rakefile
CHANGED
data/ext/q_multi_term.c
CHANGED
data/lib/ferret_version.rb
CHANGED
metadata
CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.10.
|
6
|
+
version: 0.10.4
|
7
7
|
date: 2006-09-09 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
@@ -69,7 +69,6 @@ files:
|
|
69
69
|
- ext/q_multi_term.c
|
70
70
|
- ext/q_parser.c
|
71
71
|
- ext/q_span.c
|
72
|
-
- ext/ind.c
|
73
72
|
- ext/term_vectors.c
|
74
73
|
- ext/priorityqueue.h
|
75
74
|
- ext/defines.h
|
@@ -90,7 +89,6 @@ files:
|
|
90
89
|
- ext/search.h
|
91
90
|
- ext/mem_pool.h
|
92
91
|
- ext/array.h
|
93
|
-
- ext/ind.h
|
94
92
|
- ext/lang.h
|
95
93
|
- ext/stem_UTF_8_norwegian.c
|
96
94
|
- ext/stem_UTF_8_danish.c
|
data/ext/ind.c
DELETED
@@ -1,418 +0,0 @@
|
|
1
|
-
#include "ind.h"
|
2
|
-
#include "array.h"
|
3
|
-
#include <string.h>
|
4
|
-
|
5
|
-
|
6
|
-
static char * const NON_UNIQUE_KEY_ERROR_MSG = "Tried to use a key that was not unique";
|
7
|
-
|
8
|
-
static const char *ID_STRING = "id";
|
9
|
-
|
10
|
-
#define INDEX_CLOSE_READER(self) do {\
|
11
|
-
if (self->sea) {\
|
12
|
-
searcher_close(self->sea);\
|
13
|
-
self->sea = NULL;\
|
14
|
-
self->ir = NULL;\
|
15
|
-
} else if (self->ir) {\
|
16
|
-
ir_close(self->ir);\
|
17
|
-
self->ir = NULL;\
|
18
|
-
}\
|
19
|
-
} while (0)
|
20
|
-
|
21
|
-
#define AUTOFLUSH_IR if (self->auto_flush) ir_commit(self->ir);\
|
22
|
-
else self->has_writes = true
|
23
|
-
|
24
|
-
#define AUTOFLUSH_IW \
|
25
|
-
if (self->auto_flush) {\
|
26
|
-
iw_close(self->iw);\
|
27
|
-
self->iw = NULL;\
|
28
|
-
} else self->has_writes = true
|
29
|
-
|
30
|
-
void index_auto_flush_ir(Index *self)
|
31
|
-
{
|
32
|
-
AUTOFLUSH_IR;
|
33
|
-
}
|
34
|
-
|
35
|
-
void index_auto_flush_iw(Index *self)
|
36
|
-
{
|
37
|
-
AUTOFLUSH_IW;
|
38
|
-
}
|
39
|
-
|
40
|
-
Index *index_new(Store *store, Analyzer *analyzer, HashSet *def_fields,
|
41
|
-
bool create)
|
42
|
-
{
|
43
|
-
HashSet *all_fields = hs_new_str(&free);
|
44
|
-
Index *self = ALLOC_AND_ZERO(Index);
|
45
|
-
self->config = default_config;
|
46
|
-
mutex_init(&self->mutex, NULL);
|
47
|
-
self->has_writes = false;
|
48
|
-
if (store) {
|
49
|
-
self->store = store;
|
50
|
-
REF(store);
|
51
|
-
} else {
|
52
|
-
self->store = open_ram_store();
|
53
|
-
create = true;
|
54
|
-
}
|
55
|
-
if (analyzer) {
|
56
|
-
self->analyzer = analyzer;
|
57
|
-
REF(analyzer);
|
58
|
-
} else {
|
59
|
-
self->analyzer = mb_standard_analyzer_new(true);
|
60
|
-
}
|
61
|
-
|
62
|
-
if (create) {
|
63
|
-
FieldInfos *fis = fis_new(STORE_YES, INDEX_YES,
|
64
|
-
TERM_VECTOR_WITH_POSITIONS_OFFSETS);
|
65
|
-
index_create(store, fis);
|
66
|
-
fis_deref(fis);
|
67
|
-
}
|
68
|
-
|
69
|
-
/* options */
|
70
|
-
self->key = NULL;
|
71
|
-
self->id_field = estrdup(ID_STRING);
|
72
|
-
self->def_field = estrdup(ID_STRING);
|
73
|
-
self->auto_flush = false;
|
74
|
-
self->check_latest = true;
|
75
|
-
|
76
|
-
REF(self->analyzer);
|
77
|
-
self->qp = qp_new(all_fields, def_fields, self->analyzer);
|
78
|
-
/* Index is a convenience class so set qp convenience options */
|
79
|
-
self->qp->allow_any_fields = true;
|
80
|
-
self->qp->clean_str = true;
|
81
|
-
self->qp->handle_parse_errors = true;
|
82
|
-
|
83
|
-
return self;
|
84
|
-
}
|
85
|
-
|
86
|
-
void index_destroy(Index *self)
|
87
|
-
{
|
88
|
-
mutex_destroy(&self->mutex);
|
89
|
-
INDEX_CLOSE_READER(self);
|
90
|
-
if (self->iw) iw_close(self->iw);
|
91
|
-
store_deref(self->store);
|
92
|
-
a_deref(self->analyzer);
|
93
|
-
if (self->qp) qp_destroy(self->qp);
|
94
|
-
if (self->key) hs_destroy(self->key);
|
95
|
-
free(self->id_field);
|
96
|
-
free(self->def_field);
|
97
|
-
free(self);
|
98
|
-
}
|
99
|
-
|
100
|
-
void index_flush(Index *self)
|
101
|
-
{
|
102
|
-
if (self->ir) {
|
103
|
-
ir_commit(self->ir);
|
104
|
-
} else if (self->iw) {
|
105
|
-
iw_close(self->iw);
|
106
|
-
self->iw = NULL;
|
107
|
-
}
|
108
|
-
self->has_writes = false;
|
109
|
-
}
|
110
|
-
|
111
|
-
__inline void ensure_writer_open(Index *self)
|
112
|
-
{
|
113
|
-
if (!self->iw) {
|
114
|
-
INDEX_CLOSE_READER(self);
|
115
|
-
|
116
|
-
/* make sure the analzyer isn't deleted by the IndexWriter */
|
117
|
-
REF(self->analyzer);
|
118
|
-
self->iw = iw_open(self->store, self->analyzer, false);
|
119
|
-
self->iw->config.use_compound_file = self->config.use_compound_file;
|
120
|
-
} else if (self->analyzer != self->iw->analyzer) {
|
121
|
-
a_deref(self->iw->analyzer);
|
122
|
-
REF(self->analyzer);
|
123
|
-
self->iw->analyzer = self->analyzer; /* in case it has changed */
|
124
|
-
}
|
125
|
-
}
|
126
|
-
|
127
|
-
__inline void ensure_reader_open(Index *self)
|
128
|
-
{
|
129
|
-
if (self->ir) {
|
130
|
-
if (self->check_latest && !ir_is_latest(self->ir)) {
|
131
|
-
INDEX_CLOSE_READER(self);
|
132
|
-
self->ir = ir_open(self->store);
|
133
|
-
}
|
134
|
-
} else {
|
135
|
-
if (self->iw) {
|
136
|
-
iw_close(self->iw);
|
137
|
-
self->iw = NULL;
|
138
|
-
}
|
139
|
-
self->ir = ir_open(self->store);
|
140
|
-
}
|
141
|
-
}
|
142
|
-
|
143
|
-
__inline void ensure_searcher_open(Index *self)
|
144
|
-
{
|
145
|
-
ensure_reader_open(self);
|
146
|
-
if (!self->sea) {
|
147
|
-
self->sea = isea_new(self->ir);
|
148
|
-
}
|
149
|
-
}
|
150
|
-
|
151
|
-
int index_size(Index *self)
|
152
|
-
{
|
153
|
-
int size;
|
154
|
-
mutex_lock(&self->mutex);
|
155
|
-
ensure_reader_open(self);
|
156
|
-
size = self->ir->num_docs(self->ir);
|
157
|
-
mutex_unlock(&self->mutex);
|
158
|
-
return size;
|
159
|
-
}
|
160
|
-
|
161
|
-
void index_optimize(Index *self)
|
162
|
-
{
|
163
|
-
mutex_lock(&self->mutex);
|
164
|
-
ensure_writer_open(self);
|
165
|
-
iw_optimize(self->iw);
|
166
|
-
AUTOFLUSH_IW;
|
167
|
-
mutex_unlock(&self->mutex);
|
168
|
-
}
|
169
|
-
|
170
|
-
bool index_has_del(Index *self)
|
171
|
-
{
|
172
|
-
bool has_del;
|
173
|
-
mutex_lock(&self->mutex);
|
174
|
-
ensure_reader_open(self);
|
175
|
-
has_del = self->ir->has_deletions(self->ir);
|
176
|
-
mutex_unlock(&self->mutex);
|
177
|
-
return has_del;
|
178
|
-
}
|
179
|
-
|
180
|
-
bool index_is_deleted(Index *self, int doc_num)
|
181
|
-
{
|
182
|
-
bool is_del;
|
183
|
-
mutex_lock(&self->mutex);
|
184
|
-
ensure_reader_open(self);
|
185
|
-
is_del = self->ir->is_deleted(self->ir, doc_num);
|
186
|
-
mutex_unlock(&self->mutex);
|
187
|
-
return is_del;
|
188
|
-
}
|
189
|
-
|
190
|
-
static __inline void index_add_doc_i(Index *self, Document *doc)
|
191
|
-
{
|
192
|
-
/* If there is a key specified delete the document with the same key */
|
193
|
-
if (self->key) {
|
194
|
-
int i;
|
195
|
-
char *field;
|
196
|
-
DocField *df;
|
197
|
-
if (self->key->size == 1) {
|
198
|
-
ensure_writer_open(self);
|
199
|
-
field = self->key->elems[0];
|
200
|
-
df = doc_get_field(doc, field);
|
201
|
-
if (df) {
|
202
|
-
iw_delete_term(self->iw, field, df->data[0]);
|
203
|
-
}
|
204
|
-
} else {
|
205
|
-
Query *q = bq_new(false);
|
206
|
-
TopDocs *td;
|
207
|
-
ensure_searcher_open(self);
|
208
|
-
for (i = 0; i < self->key->size; i++) {
|
209
|
-
field = self->key->elems[i];
|
210
|
-
df = doc_get_field(doc, field);
|
211
|
-
if (!df) continue;
|
212
|
-
bq_add_query(q, tq_new(field, df->data[0]), BC_MUST);
|
213
|
-
}
|
214
|
-
td = searcher_search(self->sea, q, 0, 1, NULL, NULL, NULL);
|
215
|
-
if (td->total_hits > 1) {
|
216
|
-
td_destroy(td);
|
217
|
-
RAISE(ARG_ERROR, NON_UNIQUE_KEY_ERROR_MSG);
|
218
|
-
} else if (td->total_hits == 1) {
|
219
|
-
ir_delete_doc(self->ir, td->hits[0]->doc);
|
220
|
-
}
|
221
|
-
q_deref(q);
|
222
|
-
td_destroy(td);
|
223
|
-
}
|
224
|
-
}
|
225
|
-
ensure_writer_open(self);
|
226
|
-
iw_add_doc(self->iw, doc);
|
227
|
-
AUTOFLUSH_IW;
|
228
|
-
}
|
229
|
-
|
230
|
-
void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer)
|
231
|
-
{
|
232
|
-
Analyzer *tmp_analyzer;
|
233
|
-
mutex_lock(&self->mutex);
|
234
|
-
if (analyzer != self->analyzer) {
|
235
|
-
REF(analyzer);
|
236
|
-
tmp_analyzer = self->analyzer;
|
237
|
-
self->analyzer = analyzer;
|
238
|
-
index_add_doc_i(self, doc);
|
239
|
-
self->analyzer = tmp_analyzer;
|
240
|
-
a_deref(analyzer);
|
241
|
-
} else {
|
242
|
-
index_add_doc_i(self, doc);
|
243
|
-
}
|
244
|
-
mutex_unlock(&self->mutex);
|
245
|
-
}
|
246
|
-
|
247
|
-
void index_add_doc(Index *self, Document *doc)
|
248
|
-
{
|
249
|
-
mutex_lock(&self->mutex);
|
250
|
-
index_add_doc_i(self, doc);
|
251
|
-
mutex_unlock(&self->mutex);
|
252
|
-
}
|
253
|
-
|
254
|
-
void index_add_string(Index *self, char *str, Analyzer *analyzer)
|
255
|
-
{
|
256
|
-
Document *doc = doc_new();
|
257
|
-
doc_add_field(doc, df_add_data(df_new(self->def_field), estrdup(str)));
|
258
|
-
if (analyzer) index_add_doc_a(self, doc, analyzer);
|
259
|
-
else index_add_doc(self, doc);
|
260
|
-
doc_destroy(doc);
|
261
|
-
}
|
262
|
-
|
263
|
-
void index_add_array(Index *self, char **fields, Analyzer *analyzer)
|
264
|
-
{
|
265
|
-
int i;
|
266
|
-
Document *doc = doc_new();
|
267
|
-
for (i = 0; i < ary_size(fields); i++) {
|
268
|
-
doc_add_field(doc, df_add_data(df_new(self->def_field),
|
269
|
-
estrdup(fields[i])));
|
270
|
-
}
|
271
|
-
if (analyzer) index_add_doc_a(self, doc, analyzer);
|
272
|
-
else index_add_doc(self, doc);
|
273
|
-
doc_destroy(doc);
|
274
|
-
}
|
275
|
-
|
276
|
-
Query *index_get_query(Index *self, char *qstr)
|
277
|
-
{
|
278
|
-
int i;
|
279
|
-
FieldInfos *fis;
|
280
|
-
ensure_searcher_open(self);
|
281
|
-
fis = self->ir->fis;
|
282
|
-
for (i = fis->size - 1; i >= 0; i--) {
|
283
|
-
char *field = fis->fields[i]->name;
|
284
|
-
hs_add(self->qp->all_fields, estrdup(field));
|
285
|
-
}
|
286
|
-
return qp_parse(self->qp, qstr);
|
287
|
-
}
|
288
|
-
|
289
|
-
TopDocs *index_search_str(Index *self, char *qstr, int first_doc,
|
290
|
-
int num_docs, Filter *filter, Sort *sort,
|
291
|
-
filter_ft filter_func)
|
292
|
-
{
|
293
|
-
Query *query;
|
294
|
-
TopDocs *td;
|
295
|
-
query = index_get_query(self, qstr); /* will ensure_searcher is open */
|
296
|
-
td = searcher_search(self->sea, query, first_doc, num_docs,
|
297
|
-
filter, sort, filter_func);
|
298
|
-
q_deref(query);
|
299
|
-
return td;
|
300
|
-
}
|
301
|
-
|
302
|
-
Document *index_get_doc(Index *self, int doc_num)
|
303
|
-
{
|
304
|
-
Document *doc;
|
305
|
-
ensure_reader_open(self);
|
306
|
-
doc = self->ir->get_doc(self->ir, doc_num);
|
307
|
-
return doc;
|
308
|
-
}
|
309
|
-
|
310
|
-
Document *index_get_doc_ts(Index *self, int doc_num)
|
311
|
-
{
|
312
|
-
Document *doc;
|
313
|
-
mutex_lock(&self->mutex);
|
314
|
-
doc = index_get_doc(self, doc_num);
|
315
|
-
mutex_unlock(&self->mutex);
|
316
|
-
return doc;
|
317
|
-
}
|
318
|
-
|
319
|
-
int index_term_id(Index *self, const char *field, const char *term)
|
320
|
-
{
|
321
|
-
TermDocEnum *tde;
|
322
|
-
int doc_num = -1;
|
323
|
-
ensure_reader_open(self);
|
324
|
-
tde = ir_term_docs_for(self->ir, field, term);
|
325
|
-
if (tde->next(tde)) {
|
326
|
-
doc_num = tde->doc_num(tde);
|
327
|
-
}
|
328
|
-
tde->close(tde);
|
329
|
-
return doc_num;
|
330
|
-
}
|
331
|
-
|
332
|
-
Document *index_get_doc_term(Index *self, const char *field,
|
333
|
-
const char *term)
|
334
|
-
{
|
335
|
-
Document *doc = NULL;
|
336
|
-
TermDocEnum *tde;
|
337
|
-
mutex_lock(&self->mutex);
|
338
|
-
ensure_reader_open(self);
|
339
|
-
tde = ir_term_docs_for(self->ir, field, term);
|
340
|
-
if (tde->next(tde)) {
|
341
|
-
doc = index_get_doc(self, tde->doc_num(tde));
|
342
|
-
}
|
343
|
-
tde->close(tde);
|
344
|
-
mutex_unlock(&self->mutex);
|
345
|
-
return doc;
|
346
|
-
}
|
347
|
-
|
348
|
-
Document *index_get_doc_id(Index *self, const char *id)
|
349
|
-
{
|
350
|
-
return index_get_doc_term(self, self->id_field, id);
|
351
|
-
}
|
352
|
-
|
353
|
-
void index_delete(Index *self, int doc_num)
|
354
|
-
{
|
355
|
-
mutex_lock(&self->mutex);
|
356
|
-
ensure_reader_open(self);
|
357
|
-
ir_delete_doc(self->ir, doc_num);
|
358
|
-
AUTOFLUSH_IR;
|
359
|
-
mutex_unlock(&self->mutex);
|
360
|
-
}
|
361
|
-
|
362
|
-
void index_delete_term(Index *self, const char *field, const char *term)
|
363
|
-
{
|
364
|
-
TermDocEnum *tde;
|
365
|
-
mutex_lock(&self->mutex);
|
366
|
-
if (self->ir) {
|
367
|
-
tde = ir_term_docs_for(self->ir, field, term);
|
368
|
-
TRY
|
369
|
-
while (tde->next(tde)) {
|
370
|
-
ir_delete_doc(self->ir, tde->doc_num(tde));
|
371
|
-
AUTOFLUSH_IR;
|
372
|
-
}
|
373
|
-
XFINALLY
|
374
|
-
tde->close(tde);
|
375
|
-
XENDTRY
|
376
|
-
} else {
|
377
|
-
ensure_writer_open(self);
|
378
|
-
iw_delete_term(self->iw, field, term);
|
379
|
-
}
|
380
|
-
mutex_unlock(&self->mutex);
|
381
|
-
}
|
382
|
-
|
383
|
-
void index_delete_id(Index *self, const char *id)
|
384
|
-
{
|
385
|
-
index_delete_term(self, self->id_field, id);
|
386
|
-
}
|
387
|
-
|
388
|
-
static void index_qdel_i(Searcher *sea, int doc_num, float score, void *arg)
|
389
|
-
{
|
390
|
-
(void)score; (void)arg;
|
391
|
-
ir_delete_doc(((IndexSearcher *)sea)->ir, doc_num);
|
392
|
-
}
|
393
|
-
|
394
|
-
void index_delete_query(Index *self, Query *q, Filter *f, filter_ft ff)
|
395
|
-
{
|
396
|
-
mutex_lock(&self->mutex);
|
397
|
-
ensure_searcher_open(self);
|
398
|
-
searcher_search_each(self->sea, q, f, ff, &index_qdel_i, NULL);
|
399
|
-
AUTOFLUSH_IR;
|
400
|
-
mutex_unlock(&self->mutex);
|
401
|
-
}
|
402
|
-
|
403
|
-
void index_delete_query_str(Index *self, char *qstr, Filter *f, filter_ft ff)
|
404
|
-
{
|
405
|
-
Query *q = index_get_query(self, qstr);
|
406
|
-
index_delete_query(self, q, f, ff);
|
407
|
-
q_deref(q);
|
408
|
-
}
|
409
|
-
|
410
|
-
Explanation *index_explain(Index *self, Query *q, int doc_num)
|
411
|
-
{
|
412
|
-
Explanation *expl;
|
413
|
-
mutex_lock(&self->mutex);
|
414
|
-
ensure_searcher_open(self);
|
415
|
-
expl = searcher_explain(self->sea, q, doc_num);
|
416
|
-
mutex_unlock(&self->mutex);
|
417
|
-
return expl;
|
418
|
-
}
|
data/ext/ind.h
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
#ifndef FRT_IND_H
|
2
|
-
#define FRT_IND_H
|
3
|
-
|
4
|
-
#include "search.h"
|
5
|
-
#include "index.h"
|
6
|
-
|
7
|
-
/***************************************************************************
|
8
|
-
*
|
9
|
-
* Index
|
10
|
-
*
|
11
|
-
***************************************************************************/
|
12
|
-
|
13
|
-
typedef struct Index
|
14
|
-
{
|
15
|
-
Config config;
|
16
|
-
mutex_t mutex;
|
17
|
-
Store *store;
|
18
|
-
Analyzer *analyzer;
|
19
|
-
IndexReader *ir;
|
20
|
-
IndexWriter *iw;
|
21
|
-
Searcher *sea;
|
22
|
-
QParser *qp;
|
23
|
-
HashSet *key;
|
24
|
-
char *id_field;
|
25
|
-
char *def_field;
|
26
|
-
/* for IndexWriter */
|
27
|
-
bool auto_flush : 1;
|
28
|
-
bool has_writes : 1;
|
29
|
-
bool check_latest : 1;
|
30
|
-
} Index;
|
31
|
-
|
32
|
-
extern Index *index_new(Store *store, Analyzer *analyzer,
|
33
|
-
HashSet *def_fields, bool create);
|
34
|
-
extern void index_destroy(Index *self);
|
35
|
-
extern void index_flush(Index *self);
|
36
|
-
extern int index_size(Index *self);
|
37
|
-
extern void index_optimize(Index *self);
|
38
|
-
extern bool index_has_del(Index *self);
|
39
|
-
extern bool index_is_deleted(Index *self, int doc_num);
|
40
|
-
extern void index_add_doc(Index *self, Document *doc);
|
41
|
-
extern void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
|
42
|
-
extern void index_add_string(Index *self, char *str, Analyzer *analyzer);
|
43
|
-
extern void index_add_array(Index *self, char **ary, Analyzer *analyzer);
|
44
|
-
extern TopDocs *index_search_str(Index *self, char *query, int first_doc,
|
45
|
-
int num_docs, Filter *filter,
|
46
|
-
Sort *sort, filter_ft filter_func);
|
47
|
-
extern Query *index_get_query(Index *self, char *qstr);
|
48
|
-
extern Document *index_get_doc(Index *self, int doc_num);
|
49
|
-
extern Document *index_get_doc_ts(Index *self, int doc_num);
|
50
|
-
extern Document *index_get_doc_id(Index *self, const char *id);
|
51
|
-
extern Document *index_get_doc_term(Index *self, const char *field,
|
52
|
-
const char *term);
|
53
|
-
extern void index_delete(Index *self, int doc_num);
|
54
|
-
extern void index_delete_term(Index *self, const char *field, const char *term);
|
55
|
-
extern void index_delete_id(Index *self, const char *id);
|
56
|
-
extern void index_delete_query(Index *self, Query *q, Filter *f, filter_ft ff);
|
57
|
-
extern void index_delete_query_str(Index *self, char *qstr,
|
58
|
-
Filter *f, filter_ft ff);
|
59
|
-
extern int index_term_id(Index *self, const char *field, const char *term);
|
60
|
-
extern Explanation *index_explain(Index *self, Query *q, int doc_num);
|
61
|
-
extern void index_auto_flush_ir(Index *self);
|
62
|
-
extern void index_auto_flush_iw(Index *self);
|
63
|
-
|
64
|
-
extern __inline void ensure_searcher_open(Index *self);
|
65
|
-
extern __inline void ensure_reader_open(Index *self);
|
66
|
-
extern __inline void ensure_writer_open(Index *self);
|
67
|
-
|
68
|
-
#endif
|