ferret 0.10.3 → 0.10.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -0
- data/ext/q_multi_term.c +1 -1
- data/lib/ferret_version.rb +1 -1
- metadata +1 -3
- data/ext/ind.c +0 -418
- data/ext/ind.h +0 -68
data/Rakefile
CHANGED
data/ext/q_multi_term.c
CHANGED
data/lib/ferret_version.rb
CHANGED
metadata
CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.10.
|
6
|
+
version: 0.10.4
|
7
7
|
date: 2006-09-09 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
@@ -69,7 +69,6 @@ files:
|
|
69
69
|
- ext/q_multi_term.c
|
70
70
|
- ext/q_parser.c
|
71
71
|
- ext/q_span.c
|
72
|
-
- ext/ind.c
|
73
72
|
- ext/term_vectors.c
|
74
73
|
- ext/priorityqueue.h
|
75
74
|
- ext/defines.h
|
@@ -90,7 +89,6 @@ files:
|
|
90
89
|
- ext/search.h
|
91
90
|
- ext/mem_pool.h
|
92
91
|
- ext/array.h
|
93
|
-
- ext/ind.h
|
94
92
|
- ext/lang.h
|
95
93
|
- ext/stem_UTF_8_norwegian.c
|
96
94
|
- ext/stem_UTF_8_danish.c
|
data/ext/ind.c
DELETED
@@ -1,418 +0,0 @@
|
|
1
|
-
#include "ind.h"
|
2
|
-
#include "array.h"
|
3
|
-
#include <string.h>
|
4
|
-
|
5
|
-
|
6
|
-
static char * const NON_UNIQUE_KEY_ERROR_MSG = "Tried to use a key that was not unique";
|
7
|
-
|
8
|
-
static const char *ID_STRING = "id";
|
9
|
-
|
10
|
-
#define INDEX_CLOSE_READER(self) do {\
|
11
|
-
if (self->sea) {\
|
12
|
-
searcher_close(self->sea);\
|
13
|
-
self->sea = NULL;\
|
14
|
-
self->ir = NULL;\
|
15
|
-
} else if (self->ir) {\
|
16
|
-
ir_close(self->ir);\
|
17
|
-
self->ir = NULL;\
|
18
|
-
}\
|
19
|
-
} while (0)
|
20
|
-
|
21
|
-
#define AUTOFLUSH_IR if (self->auto_flush) ir_commit(self->ir);\
|
22
|
-
else self->has_writes = true
|
23
|
-
|
24
|
-
#define AUTOFLUSH_IW \
|
25
|
-
if (self->auto_flush) {\
|
26
|
-
iw_close(self->iw);\
|
27
|
-
self->iw = NULL;\
|
28
|
-
} else self->has_writes = true
|
29
|
-
|
30
|
-
void index_auto_flush_ir(Index *self)
|
31
|
-
{
|
32
|
-
AUTOFLUSH_IR;
|
33
|
-
}
|
34
|
-
|
35
|
-
void index_auto_flush_iw(Index *self)
|
36
|
-
{
|
37
|
-
AUTOFLUSH_IW;
|
38
|
-
}
|
39
|
-
|
40
|
-
Index *index_new(Store *store, Analyzer *analyzer, HashSet *def_fields,
|
41
|
-
bool create)
|
42
|
-
{
|
43
|
-
HashSet *all_fields = hs_new_str(&free);
|
44
|
-
Index *self = ALLOC_AND_ZERO(Index);
|
45
|
-
self->config = default_config;
|
46
|
-
mutex_init(&self->mutex, NULL);
|
47
|
-
self->has_writes = false;
|
48
|
-
if (store) {
|
49
|
-
self->store = store;
|
50
|
-
REF(store);
|
51
|
-
} else {
|
52
|
-
self->store = open_ram_store();
|
53
|
-
create = true;
|
54
|
-
}
|
55
|
-
if (analyzer) {
|
56
|
-
self->analyzer = analyzer;
|
57
|
-
REF(analyzer);
|
58
|
-
} else {
|
59
|
-
self->analyzer = mb_standard_analyzer_new(true);
|
60
|
-
}
|
61
|
-
|
62
|
-
if (create) {
|
63
|
-
FieldInfos *fis = fis_new(STORE_YES, INDEX_YES,
|
64
|
-
TERM_VECTOR_WITH_POSITIONS_OFFSETS);
|
65
|
-
index_create(store, fis);
|
66
|
-
fis_deref(fis);
|
67
|
-
}
|
68
|
-
|
69
|
-
/* options */
|
70
|
-
self->key = NULL;
|
71
|
-
self->id_field = estrdup(ID_STRING);
|
72
|
-
self->def_field = estrdup(ID_STRING);
|
73
|
-
self->auto_flush = false;
|
74
|
-
self->check_latest = true;
|
75
|
-
|
76
|
-
REF(self->analyzer);
|
77
|
-
self->qp = qp_new(all_fields, def_fields, self->analyzer);
|
78
|
-
/* Index is a convenience class so set qp convenience options */
|
79
|
-
self->qp->allow_any_fields = true;
|
80
|
-
self->qp->clean_str = true;
|
81
|
-
self->qp->handle_parse_errors = true;
|
82
|
-
|
83
|
-
return self;
|
84
|
-
}
|
85
|
-
|
86
|
-
void index_destroy(Index *self)
|
87
|
-
{
|
88
|
-
mutex_destroy(&self->mutex);
|
89
|
-
INDEX_CLOSE_READER(self);
|
90
|
-
if (self->iw) iw_close(self->iw);
|
91
|
-
store_deref(self->store);
|
92
|
-
a_deref(self->analyzer);
|
93
|
-
if (self->qp) qp_destroy(self->qp);
|
94
|
-
if (self->key) hs_destroy(self->key);
|
95
|
-
free(self->id_field);
|
96
|
-
free(self->def_field);
|
97
|
-
free(self);
|
98
|
-
}
|
99
|
-
|
100
|
-
void index_flush(Index *self)
|
101
|
-
{
|
102
|
-
if (self->ir) {
|
103
|
-
ir_commit(self->ir);
|
104
|
-
} else if (self->iw) {
|
105
|
-
iw_close(self->iw);
|
106
|
-
self->iw = NULL;
|
107
|
-
}
|
108
|
-
self->has_writes = false;
|
109
|
-
}
|
110
|
-
|
111
|
-
__inline void ensure_writer_open(Index *self)
|
112
|
-
{
|
113
|
-
if (!self->iw) {
|
114
|
-
INDEX_CLOSE_READER(self);
|
115
|
-
|
116
|
-
/* make sure the analzyer isn't deleted by the IndexWriter */
|
117
|
-
REF(self->analyzer);
|
118
|
-
self->iw = iw_open(self->store, self->analyzer, false);
|
119
|
-
self->iw->config.use_compound_file = self->config.use_compound_file;
|
120
|
-
} else if (self->analyzer != self->iw->analyzer) {
|
121
|
-
a_deref(self->iw->analyzer);
|
122
|
-
REF(self->analyzer);
|
123
|
-
self->iw->analyzer = self->analyzer; /* in case it has changed */
|
124
|
-
}
|
125
|
-
}
|
126
|
-
|
127
|
-
__inline void ensure_reader_open(Index *self)
|
128
|
-
{
|
129
|
-
if (self->ir) {
|
130
|
-
if (self->check_latest && !ir_is_latest(self->ir)) {
|
131
|
-
INDEX_CLOSE_READER(self);
|
132
|
-
self->ir = ir_open(self->store);
|
133
|
-
}
|
134
|
-
} else {
|
135
|
-
if (self->iw) {
|
136
|
-
iw_close(self->iw);
|
137
|
-
self->iw = NULL;
|
138
|
-
}
|
139
|
-
self->ir = ir_open(self->store);
|
140
|
-
}
|
141
|
-
}
|
142
|
-
|
143
|
-
__inline void ensure_searcher_open(Index *self)
|
144
|
-
{
|
145
|
-
ensure_reader_open(self);
|
146
|
-
if (!self->sea) {
|
147
|
-
self->sea = isea_new(self->ir);
|
148
|
-
}
|
149
|
-
}
|
150
|
-
|
151
|
-
int index_size(Index *self)
|
152
|
-
{
|
153
|
-
int size;
|
154
|
-
mutex_lock(&self->mutex);
|
155
|
-
ensure_reader_open(self);
|
156
|
-
size = self->ir->num_docs(self->ir);
|
157
|
-
mutex_unlock(&self->mutex);
|
158
|
-
return size;
|
159
|
-
}
|
160
|
-
|
161
|
-
void index_optimize(Index *self)
|
162
|
-
{
|
163
|
-
mutex_lock(&self->mutex);
|
164
|
-
ensure_writer_open(self);
|
165
|
-
iw_optimize(self->iw);
|
166
|
-
AUTOFLUSH_IW;
|
167
|
-
mutex_unlock(&self->mutex);
|
168
|
-
}
|
169
|
-
|
170
|
-
bool index_has_del(Index *self)
|
171
|
-
{
|
172
|
-
bool has_del;
|
173
|
-
mutex_lock(&self->mutex);
|
174
|
-
ensure_reader_open(self);
|
175
|
-
has_del = self->ir->has_deletions(self->ir);
|
176
|
-
mutex_unlock(&self->mutex);
|
177
|
-
return has_del;
|
178
|
-
}
|
179
|
-
|
180
|
-
bool index_is_deleted(Index *self, int doc_num)
|
181
|
-
{
|
182
|
-
bool is_del;
|
183
|
-
mutex_lock(&self->mutex);
|
184
|
-
ensure_reader_open(self);
|
185
|
-
is_del = self->ir->is_deleted(self->ir, doc_num);
|
186
|
-
mutex_unlock(&self->mutex);
|
187
|
-
return is_del;
|
188
|
-
}
|
189
|
-
|
190
|
-
static __inline void index_add_doc_i(Index *self, Document *doc)
|
191
|
-
{
|
192
|
-
/* If there is a key specified delete the document with the same key */
|
193
|
-
if (self->key) {
|
194
|
-
int i;
|
195
|
-
char *field;
|
196
|
-
DocField *df;
|
197
|
-
if (self->key->size == 1) {
|
198
|
-
ensure_writer_open(self);
|
199
|
-
field = self->key->elems[0];
|
200
|
-
df = doc_get_field(doc, field);
|
201
|
-
if (df) {
|
202
|
-
iw_delete_term(self->iw, field, df->data[0]);
|
203
|
-
}
|
204
|
-
} else {
|
205
|
-
Query *q = bq_new(false);
|
206
|
-
TopDocs *td;
|
207
|
-
ensure_searcher_open(self);
|
208
|
-
for (i = 0; i < self->key->size; i++) {
|
209
|
-
field = self->key->elems[i];
|
210
|
-
df = doc_get_field(doc, field);
|
211
|
-
if (!df) continue;
|
212
|
-
bq_add_query(q, tq_new(field, df->data[0]), BC_MUST);
|
213
|
-
}
|
214
|
-
td = searcher_search(self->sea, q, 0, 1, NULL, NULL, NULL);
|
215
|
-
if (td->total_hits > 1) {
|
216
|
-
td_destroy(td);
|
217
|
-
RAISE(ARG_ERROR, NON_UNIQUE_KEY_ERROR_MSG);
|
218
|
-
} else if (td->total_hits == 1) {
|
219
|
-
ir_delete_doc(self->ir, td->hits[0]->doc);
|
220
|
-
}
|
221
|
-
q_deref(q);
|
222
|
-
td_destroy(td);
|
223
|
-
}
|
224
|
-
}
|
225
|
-
ensure_writer_open(self);
|
226
|
-
iw_add_doc(self->iw, doc);
|
227
|
-
AUTOFLUSH_IW;
|
228
|
-
}
|
229
|
-
|
230
|
-
void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer)
|
231
|
-
{
|
232
|
-
Analyzer *tmp_analyzer;
|
233
|
-
mutex_lock(&self->mutex);
|
234
|
-
if (analyzer != self->analyzer) {
|
235
|
-
REF(analyzer);
|
236
|
-
tmp_analyzer = self->analyzer;
|
237
|
-
self->analyzer = analyzer;
|
238
|
-
index_add_doc_i(self, doc);
|
239
|
-
self->analyzer = tmp_analyzer;
|
240
|
-
a_deref(analyzer);
|
241
|
-
} else {
|
242
|
-
index_add_doc_i(self, doc);
|
243
|
-
}
|
244
|
-
mutex_unlock(&self->mutex);
|
245
|
-
}
|
246
|
-
|
247
|
-
void index_add_doc(Index *self, Document *doc)
|
248
|
-
{
|
249
|
-
mutex_lock(&self->mutex);
|
250
|
-
index_add_doc_i(self, doc);
|
251
|
-
mutex_unlock(&self->mutex);
|
252
|
-
}
|
253
|
-
|
254
|
-
void index_add_string(Index *self, char *str, Analyzer *analyzer)
|
255
|
-
{
|
256
|
-
Document *doc = doc_new();
|
257
|
-
doc_add_field(doc, df_add_data(df_new(self->def_field), estrdup(str)));
|
258
|
-
if (analyzer) index_add_doc_a(self, doc, analyzer);
|
259
|
-
else index_add_doc(self, doc);
|
260
|
-
doc_destroy(doc);
|
261
|
-
}
|
262
|
-
|
263
|
-
void index_add_array(Index *self, char **fields, Analyzer *analyzer)
|
264
|
-
{
|
265
|
-
int i;
|
266
|
-
Document *doc = doc_new();
|
267
|
-
for (i = 0; i < ary_size(fields); i++) {
|
268
|
-
doc_add_field(doc, df_add_data(df_new(self->def_field),
|
269
|
-
estrdup(fields[i])));
|
270
|
-
}
|
271
|
-
if (analyzer) index_add_doc_a(self, doc, analyzer);
|
272
|
-
else index_add_doc(self, doc);
|
273
|
-
doc_destroy(doc);
|
274
|
-
}
|
275
|
-
|
276
|
-
Query *index_get_query(Index *self, char *qstr)
|
277
|
-
{
|
278
|
-
int i;
|
279
|
-
FieldInfos *fis;
|
280
|
-
ensure_searcher_open(self);
|
281
|
-
fis = self->ir->fis;
|
282
|
-
for (i = fis->size - 1; i >= 0; i--) {
|
283
|
-
char *field = fis->fields[i]->name;
|
284
|
-
hs_add(self->qp->all_fields, estrdup(field));
|
285
|
-
}
|
286
|
-
return qp_parse(self->qp, qstr);
|
287
|
-
}
|
288
|
-
|
289
|
-
TopDocs *index_search_str(Index *self, char *qstr, int first_doc,
|
290
|
-
int num_docs, Filter *filter, Sort *sort,
|
291
|
-
filter_ft filter_func)
|
292
|
-
{
|
293
|
-
Query *query;
|
294
|
-
TopDocs *td;
|
295
|
-
query = index_get_query(self, qstr); /* will ensure_searcher is open */
|
296
|
-
td = searcher_search(self->sea, query, first_doc, num_docs,
|
297
|
-
filter, sort, filter_func);
|
298
|
-
q_deref(query);
|
299
|
-
return td;
|
300
|
-
}
|
301
|
-
|
302
|
-
Document *index_get_doc(Index *self, int doc_num)
|
303
|
-
{
|
304
|
-
Document *doc;
|
305
|
-
ensure_reader_open(self);
|
306
|
-
doc = self->ir->get_doc(self->ir, doc_num);
|
307
|
-
return doc;
|
308
|
-
}
|
309
|
-
|
310
|
-
Document *index_get_doc_ts(Index *self, int doc_num)
|
311
|
-
{
|
312
|
-
Document *doc;
|
313
|
-
mutex_lock(&self->mutex);
|
314
|
-
doc = index_get_doc(self, doc_num);
|
315
|
-
mutex_unlock(&self->mutex);
|
316
|
-
return doc;
|
317
|
-
}
|
318
|
-
|
319
|
-
int index_term_id(Index *self, const char *field, const char *term)
|
320
|
-
{
|
321
|
-
TermDocEnum *tde;
|
322
|
-
int doc_num = -1;
|
323
|
-
ensure_reader_open(self);
|
324
|
-
tde = ir_term_docs_for(self->ir, field, term);
|
325
|
-
if (tde->next(tde)) {
|
326
|
-
doc_num = tde->doc_num(tde);
|
327
|
-
}
|
328
|
-
tde->close(tde);
|
329
|
-
return doc_num;
|
330
|
-
}
|
331
|
-
|
332
|
-
Document *index_get_doc_term(Index *self, const char *field,
|
333
|
-
const char *term)
|
334
|
-
{
|
335
|
-
Document *doc = NULL;
|
336
|
-
TermDocEnum *tde;
|
337
|
-
mutex_lock(&self->mutex);
|
338
|
-
ensure_reader_open(self);
|
339
|
-
tde = ir_term_docs_for(self->ir, field, term);
|
340
|
-
if (tde->next(tde)) {
|
341
|
-
doc = index_get_doc(self, tde->doc_num(tde));
|
342
|
-
}
|
343
|
-
tde->close(tde);
|
344
|
-
mutex_unlock(&self->mutex);
|
345
|
-
return doc;
|
346
|
-
}
|
347
|
-
|
348
|
-
Document *index_get_doc_id(Index *self, const char *id)
|
349
|
-
{
|
350
|
-
return index_get_doc_term(self, self->id_field, id);
|
351
|
-
}
|
352
|
-
|
353
|
-
void index_delete(Index *self, int doc_num)
|
354
|
-
{
|
355
|
-
mutex_lock(&self->mutex);
|
356
|
-
ensure_reader_open(self);
|
357
|
-
ir_delete_doc(self->ir, doc_num);
|
358
|
-
AUTOFLUSH_IR;
|
359
|
-
mutex_unlock(&self->mutex);
|
360
|
-
}
|
361
|
-
|
362
|
-
void index_delete_term(Index *self, const char *field, const char *term)
|
363
|
-
{
|
364
|
-
TermDocEnum *tde;
|
365
|
-
mutex_lock(&self->mutex);
|
366
|
-
if (self->ir) {
|
367
|
-
tde = ir_term_docs_for(self->ir, field, term);
|
368
|
-
TRY
|
369
|
-
while (tde->next(tde)) {
|
370
|
-
ir_delete_doc(self->ir, tde->doc_num(tde));
|
371
|
-
AUTOFLUSH_IR;
|
372
|
-
}
|
373
|
-
XFINALLY
|
374
|
-
tde->close(tde);
|
375
|
-
XENDTRY
|
376
|
-
} else {
|
377
|
-
ensure_writer_open(self);
|
378
|
-
iw_delete_term(self->iw, field, term);
|
379
|
-
}
|
380
|
-
mutex_unlock(&self->mutex);
|
381
|
-
}
|
382
|
-
|
383
|
-
void index_delete_id(Index *self, const char *id)
|
384
|
-
{
|
385
|
-
index_delete_term(self, self->id_field, id);
|
386
|
-
}
|
387
|
-
|
388
|
-
static void index_qdel_i(Searcher *sea, int doc_num, float score, void *arg)
|
389
|
-
{
|
390
|
-
(void)score; (void)arg;
|
391
|
-
ir_delete_doc(((IndexSearcher *)sea)->ir, doc_num);
|
392
|
-
}
|
393
|
-
|
394
|
-
void index_delete_query(Index *self, Query *q, Filter *f, filter_ft ff)
|
395
|
-
{
|
396
|
-
mutex_lock(&self->mutex);
|
397
|
-
ensure_searcher_open(self);
|
398
|
-
searcher_search_each(self->sea, q, f, ff, &index_qdel_i, NULL);
|
399
|
-
AUTOFLUSH_IR;
|
400
|
-
mutex_unlock(&self->mutex);
|
401
|
-
}
|
402
|
-
|
403
|
-
void index_delete_query_str(Index *self, char *qstr, Filter *f, filter_ft ff)
|
404
|
-
{
|
405
|
-
Query *q = index_get_query(self, qstr);
|
406
|
-
index_delete_query(self, q, f, ff);
|
407
|
-
q_deref(q);
|
408
|
-
}
|
409
|
-
|
410
|
-
Explanation *index_explain(Index *self, Query *q, int doc_num)
|
411
|
-
{
|
412
|
-
Explanation *expl;
|
413
|
-
mutex_lock(&self->mutex);
|
414
|
-
ensure_searcher_open(self);
|
415
|
-
expl = searcher_explain(self->sea, q, doc_num);
|
416
|
-
mutex_unlock(&self->mutex);
|
417
|
-
return expl;
|
418
|
-
}
|
data/ext/ind.h
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
#ifndef FRT_IND_H
|
2
|
-
#define FRT_IND_H
|
3
|
-
|
4
|
-
#include "search.h"
|
5
|
-
#include "index.h"
|
6
|
-
|
7
|
-
/***************************************************************************
|
8
|
-
*
|
9
|
-
* Index
|
10
|
-
*
|
11
|
-
***************************************************************************/
|
12
|
-
|
13
|
-
typedef struct Index
|
14
|
-
{
|
15
|
-
Config config;
|
16
|
-
mutex_t mutex;
|
17
|
-
Store *store;
|
18
|
-
Analyzer *analyzer;
|
19
|
-
IndexReader *ir;
|
20
|
-
IndexWriter *iw;
|
21
|
-
Searcher *sea;
|
22
|
-
QParser *qp;
|
23
|
-
HashSet *key;
|
24
|
-
char *id_field;
|
25
|
-
char *def_field;
|
26
|
-
/* for IndexWriter */
|
27
|
-
bool auto_flush : 1;
|
28
|
-
bool has_writes : 1;
|
29
|
-
bool check_latest : 1;
|
30
|
-
} Index;
|
31
|
-
|
32
|
-
extern Index *index_new(Store *store, Analyzer *analyzer,
|
33
|
-
HashSet *def_fields, bool create);
|
34
|
-
extern void index_destroy(Index *self);
|
35
|
-
extern void index_flush(Index *self);
|
36
|
-
extern int index_size(Index *self);
|
37
|
-
extern void index_optimize(Index *self);
|
38
|
-
extern bool index_has_del(Index *self);
|
39
|
-
extern bool index_is_deleted(Index *self, int doc_num);
|
40
|
-
extern void index_add_doc(Index *self, Document *doc);
|
41
|
-
extern void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
|
42
|
-
extern void index_add_string(Index *self, char *str, Analyzer *analyzer);
|
43
|
-
extern void index_add_array(Index *self, char **ary, Analyzer *analyzer);
|
44
|
-
extern TopDocs *index_search_str(Index *self, char *query, int first_doc,
|
45
|
-
int num_docs, Filter *filter,
|
46
|
-
Sort *sort, filter_ft filter_func);
|
47
|
-
extern Query *index_get_query(Index *self, char *qstr);
|
48
|
-
extern Document *index_get_doc(Index *self, int doc_num);
|
49
|
-
extern Document *index_get_doc_ts(Index *self, int doc_num);
|
50
|
-
extern Document *index_get_doc_id(Index *self, const char *id);
|
51
|
-
extern Document *index_get_doc_term(Index *self, const char *field,
|
52
|
-
const char *term);
|
53
|
-
extern void index_delete(Index *self, int doc_num);
|
54
|
-
extern void index_delete_term(Index *self, const char *field, const char *term);
|
55
|
-
extern void index_delete_id(Index *self, const char *id);
|
56
|
-
extern void index_delete_query(Index *self, Query *q, Filter *f, filter_ft ff);
|
57
|
-
extern void index_delete_query_str(Index *self, char *qstr,
|
58
|
-
Filter *f, filter_ft ff);
|
59
|
-
extern int index_term_id(Index *self, const char *field, const char *term);
|
60
|
-
extern Explanation *index_explain(Index *self, Query *q, int doc_num);
|
61
|
-
extern void index_auto_flush_ir(Index *self);
|
62
|
-
extern void index_auto_flush_iw(Index *self);
|
63
|
-
|
64
|
-
extern __inline void ensure_searcher_open(Index *self);
|
65
|
-
extern __inline void ensure_reader_open(Index *self);
|
66
|
-
extern __inline void ensure_writer_open(Index *self);
|
67
|
-
|
68
|
-
#endif
|