ferret 0.10.3 → 0.10.4

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -96,6 +96,7 @@ EXT_SRC.each do |fn|
96
96
  dest_fn = File.join("ext", File.basename(fn))
97
97
  file dest_fn => fn do |t|
98
98
  begin
99
+ raise "copy for release" if ENV["REL"]
99
100
  ln_s File.join("..", fn), dest_fn
100
101
  rescue Exception => e
101
102
  cp File.expand_path(fn), dest_fn
data/ext/q_multi_term.c CHANGED
@@ -290,7 +290,7 @@ static void multi_tsc_destroy(Scorer *self)
290
290
  tdew_destroy(tdew_a[i]);
291
291
  }
292
292
  free(tdew_a);
293
- pq_destroy(MTSc(self)->tdew_pq);
293
+ if (MTSc(self)->tdew_pq) pq_destroy(MTSc(self)->tdew_pq);
294
294
  scorer_destroy_i(self);
295
295
  }
296
296
 
@@ -1,3 +1,3 @@
1
1
  module Ferret
2
- VERSION = '0.10.3'
2
+ VERSION = '0.10.4'
3
3
  end
metadata CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.10.3
6
+ version: 0.10.4
7
7
  date: 2006-09-09 00:00:00 +09:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
@@ -69,7 +69,6 @@ files:
69
69
  - ext/q_multi_term.c
70
70
  - ext/q_parser.c
71
71
  - ext/q_span.c
72
- - ext/ind.c
73
72
  - ext/term_vectors.c
74
73
  - ext/priorityqueue.h
75
74
  - ext/defines.h
@@ -90,7 +89,6 @@ files:
90
89
  - ext/search.h
91
90
  - ext/mem_pool.h
92
91
  - ext/array.h
93
- - ext/ind.h
94
92
  - ext/lang.h
95
93
  - ext/stem_UTF_8_norwegian.c
96
94
  - ext/stem_UTF_8_danish.c
data/ext/ind.c DELETED
@@ -1,418 +0,0 @@
1
- #include "ind.h"
2
- #include "array.h"
3
- #include <string.h>
4
-
5
-
6
- static char * const NON_UNIQUE_KEY_ERROR_MSG = "Tried to use a key that was not unique";
7
-
8
- static const char *ID_STRING = "id";
9
-
10
- #define INDEX_CLOSE_READER(self) do {\
11
- if (self->sea) {\
12
- searcher_close(self->sea);\
13
- self->sea = NULL;\
14
- self->ir = NULL;\
15
- } else if (self->ir) {\
16
- ir_close(self->ir);\
17
- self->ir = NULL;\
18
- }\
19
- } while (0)
20
-
21
- #define AUTOFLUSH_IR if (self->auto_flush) ir_commit(self->ir);\
22
- else self->has_writes = true
23
-
24
- #define AUTOFLUSH_IW \
25
- if (self->auto_flush) {\
26
- iw_close(self->iw);\
27
- self->iw = NULL;\
28
- } else self->has_writes = true
29
-
30
- void index_auto_flush_ir(Index *self)
31
- {
32
- AUTOFLUSH_IR;
33
- }
34
-
35
- void index_auto_flush_iw(Index *self)
36
- {
37
- AUTOFLUSH_IW;
38
- }
39
-
40
- Index *index_new(Store *store, Analyzer *analyzer, HashSet *def_fields,
41
- bool create)
42
- {
43
- HashSet *all_fields = hs_new_str(&free);
44
- Index *self = ALLOC_AND_ZERO(Index);
45
- self->config = default_config;
46
- mutex_init(&self->mutex, NULL);
47
- self->has_writes = false;
48
- if (store) {
49
- self->store = store;
50
- REF(store);
51
- } else {
52
- self->store = open_ram_store();
53
- create = true;
54
- }
55
- if (analyzer) {
56
- self->analyzer = analyzer;
57
- REF(analyzer);
58
- } else {
59
- self->analyzer = mb_standard_analyzer_new(true);
60
- }
61
-
62
- if (create) {
63
- FieldInfos *fis = fis_new(STORE_YES, INDEX_YES,
64
- TERM_VECTOR_WITH_POSITIONS_OFFSETS);
65
- index_create(store, fis);
66
- fis_deref(fis);
67
- }
68
-
69
- /* options */
70
- self->key = NULL;
71
- self->id_field = estrdup(ID_STRING);
72
- self->def_field = estrdup(ID_STRING);
73
- self->auto_flush = false;
74
- self->check_latest = true;
75
-
76
- REF(self->analyzer);
77
- self->qp = qp_new(all_fields, def_fields, self->analyzer);
78
- /* Index is a convenience class so set qp convenience options */
79
- self->qp->allow_any_fields = true;
80
- self->qp->clean_str = true;
81
- self->qp->handle_parse_errors = true;
82
-
83
- return self;
84
- }
85
-
86
- void index_destroy(Index *self)
87
- {
88
- mutex_destroy(&self->mutex);
89
- INDEX_CLOSE_READER(self);
90
- if (self->iw) iw_close(self->iw);
91
- store_deref(self->store);
92
- a_deref(self->analyzer);
93
- if (self->qp) qp_destroy(self->qp);
94
- if (self->key) hs_destroy(self->key);
95
- free(self->id_field);
96
- free(self->def_field);
97
- free(self);
98
- }
99
-
100
- void index_flush(Index *self)
101
- {
102
- if (self->ir) {
103
- ir_commit(self->ir);
104
- } else if (self->iw) {
105
- iw_close(self->iw);
106
- self->iw = NULL;
107
- }
108
- self->has_writes = false;
109
- }
110
-
111
- __inline void ensure_writer_open(Index *self)
112
- {
113
- if (!self->iw) {
114
- INDEX_CLOSE_READER(self);
115
-
116
- /* make sure the analzyer isn't deleted by the IndexWriter */
117
- REF(self->analyzer);
118
- self->iw = iw_open(self->store, self->analyzer, false);
119
- self->iw->config.use_compound_file = self->config.use_compound_file;
120
- } else if (self->analyzer != self->iw->analyzer) {
121
- a_deref(self->iw->analyzer);
122
- REF(self->analyzer);
123
- self->iw->analyzer = self->analyzer; /* in case it has changed */
124
- }
125
- }
126
-
127
- __inline void ensure_reader_open(Index *self)
128
- {
129
- if (self->ir) {
130
- if (self->check_latest && !ir_is_latest(self->ir)) {
131
- INDEX_CLOSE_READER(self);
132
- self->ir = ir_open(self->store);
133
- }
134
- } else {
135
- if (self->iw) {
136
- iw_close(self->iw);
137
- self->iw = NULL;
138
- }
139
- self->ir = ir_open(self->store);
140
- }
141
- }
142
-
143
- __inline void ensure_searcher_open(Index *self)
144
- {
145
- ensure_reader_open(self);
146
- if (!self->sea) {
147
- self->sea = isea_new(self->ir);
148
- }
149
- }
150
-
151
- int index_size(Index *self)
152
- {
153
- int size;
154
- mutex_lock(&self->mutex);
155
- ensure_reader_open(self);
156
- size = self->ir->num_docs(self->ir);
157
- mutex_unlock(&self->mutex);
158
- return size;
159
- }
160
-
161
- void index_optimize(Index *self)
162
- {
163
- mutex_lock(&self->mutex);
164
- ensure_writer_open(self);
165
- iw_optimize(self->iw);
166
- AUTOFLUSH_IW;
167
- mutex_unlock(&self->mutex);
168
- }
169
-
170
- bool index_has_del(Index *self)
171
- {
172
- bool has_del;
173
- mutex_lock(&self->mutex);
174
- ensure_reader_open(self);
175
- has_del = self->ir->has_deletions(self->ir);
176
- mutex_unlock(&self->mutex);
177
- return has_del;
178
- }
179
-
180
- bool index_is_deleted(Index *self, int doc_num)
181
- {
182
- bool is_del;
183
- mutex_lock(&self->mutex);
184
- ensure_reader_open(self);
185
- is_del = self->ir->is_deleted(self->ir, doc_num);
186
- mutex_unlock(&self->mutex);
187
- return is_del;
188
- }
189
-
190
- static __inline void index_add_doc_i(Index *self, Document *doc)
191
- {
192
- /* If there is a key specified delete the document with the same key */
193
- if (self->key) {
194
- int i;
195
- char *field;
196
- DocField *df;
197
- if (self->key->size == 1) {
198
- ensure_writer_open(self);
199
- field = self->key->elems[0];
200
- df = doc_get_field(doc, field);
201
- if (df) {
202
- iw_delete_term(self->iw, field, df->data[0]);
203
- }
204
- } else {
205
- Query *q = bq_new(false);
206
- TopDocs *td;
207
- ensure_searcher_open(self);
208
- for (i = 0; i < self->key->size; i++) {
209
- field = self->key->elems[i];
210
- df = doc_get_field(doc, field);
211
- if (!df) continue;
212
- bq_add_query(q, tq_new(field, df->data[0]), BC_MUST);
213
- }
214
- td = searcher_search(self->sea, q, 0, 1, NULL, NULL, NULL);
215
- if (td->total_hits > 1) {
216
- td_destroy(td);
217
- RAISE(ARG_ERROR, NON_UNIQUE_KEY_ERROR_MSG);
218
- } else if (td->total_hits == 1) {
219
- ir_delete_doc(self->ir, td->hits[0]->doc);
220
- }
221
- q_deref(q);
222
- td_destroy(td);
223
- }
224
- }
225
- ensure_writer_open(self);
226
- iw_add_doc(self->iw, doc);
227
- AUTOFLUSH_IW;
228
- }
229
-
230
- void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer)
231
- {
232
- Analyzer *tmp_analyzer;
233
- mutex_lock(&self->mutex);
234
- if (analyzer != self->analyzer) {
235
- REF(analyzer);
236
- tmp_analyzer = self->analyzer;
237
- self->analyzer = analyzer;
238
- index_add_doc_i(self, doc);
239
- self->analyzer = tmp_analyzer;
240
- a_deref(analyzer);
241
- } else {
242
- index_add_doc_i(self, doc);
243
- }
244
- mutex_unlock(&self->mutex);
245
- }
246
-
247
- void index_add_doc(Index *self, Document *doc)
248
- {
249
- mutex_lock(&self->mutex);
250
- index_add_doc_i(self, doc);
251
- mutex_unlock(&self->mutex);
252
- }
253
-
254
- void index_add_string(Index *self, char *str, Analyzer *analyzer)
255
- {
256
- Document *doc = doc_new();
257
- doc_add_field(doc, df_add_data(df_new(self->def_field), estrdup(str)));
258
- if (analyzer) index_add_doc_a(self, doc, analyzer);
259
- else index_add_doc(self, doc);
260
- doc_destroy(doc);
261
- }
262
-
263
- void index_add_array(Index *self, char **fields, Analyzer *analyzer)
264
- {
265
- int i;
266
- Document *doc = doc_new();
267
- for (i = 0; i < ary_size(fields); i++) {
268
- doc_add_field(doc, df_add_data(df_new(self->def_field),
269
- estrdup(fields[i])));
270
- }
271
- if (analyzer) index_add_doc_a(self, doc, analyzer);
272
- else index_add_doc(self, doc);
273
- doc_destroy(doc);
274
- }
275
-
276
- Query *index_get_query(Index *self, char *qstr)
277
- {
278
- int i;
279
- FieldInfos *fis;
280
- ensure_searcher_open(self);
281
- fis = self->ir->fis;
282
- for (i = fis->size - 1; i >= 0; i--) {
283
- char *field = fis->fields[i]->name;
284
- hs_add(self->qp->all_fields, estrdup(field));
285
- }
286
- return qp_parse(self->qp, qstr);
287
- }
288
-
289
- TopDocs *index_search_str(Index *self, char *qstr, int first_doc,
290
- int num_docs, Filter *filter, Sort *sort,
291
- filter_ft filter_func)
292
- {
293
- Query *query;
294
- TopDocs *td;
295
- query = index_get_query(self, qstr); /* will ensure_searcher is open */
296
- td = searcher_search(self->sea, query, first_doc, num_docs,
297
- filter, sort, filter_func);
298
- q_deref(query);
299
- return td;
300
- }
301
-
302
- Document *index_get_doc(Index *self, int doc_num)
303
- {
304
- Document *doc;
305
- ensure_reader_open(self);
306
- doc = self->ir->get_doc(self->ir, doc_num);
307
- return doc;
308
- }
309
-
310
- Document *index_get_doc_ts(Index *self, int doc_num)
311
- {
312
- Document *doc;
313
- mutex_lock(&self->mutex);
314
- doc = index_get_doc(self, doc_num);
315
- mutex_unlock(&self->mutex);
316
- return doc;
317
- }
318
-
319
- int index_term_id(Index *self, const char *field, const char *term)
320
- {
321
- TermDocEnum *tde;
322
- int doc_num = -1;
323
- ensure_reader_open(self);
324
- tde = ir_term_docs_for(self->ir, field, term);
325
- if (tde->next(tde)) {
326
- doc_num = tde->doc_num(tde);
327
- }
328
- tde->close(tde);
329
- return doc_num;
330
- }
331
-
332
- Document *index_get_doc_term(Index *self, const char *field,
333
- const char *term)
334
- {
335
- Document *doc = NULL;
336
- TermDocEnum *tde;
337
- mutex_lock(&self->mutex);
338
- ensure_reader_open(self);
339
- tde = ir_term_docs_for(self->ir, field, term);
340
- if (tde->next(tde)) {
341
- doc = index_get_doc(self, tde->doc_num(tde));
342
- }
343
- tde->close(tde);
344
- mutex_unlock(&self->mutex);
345
- return doc;
346
- }
347
-
348
- Document *index_get_doc_id(Index *self, const char *id)
349
- {
350
- return index_get_doc_term(self, self->id_field, id);
351
- }
352
-
353
- void index_delete(Index *self, int doc_num)
354
- {
355
- mutex_lock(&self->mutex);
356
- ensure_reader_open(self);
357
- ir_delete_doc(self->ir, doc_num);
358
- AUTOFLUSH_IR;
359
- mutex_unlock(&self->mutex);
360
- }
361
-
362
- void index_delete_term(Index *self, const char *field, const char *term)
363
- {
364
- TermDocEnum *tde;
365
- mutex_lock(&self->mutex);
366
- if (self->ir) {
367
- tde = ir_term_docs_for(self->ir, field, term);
368
- TRY
369
- while (tde->next(tde)) {
370
- ir_delete_doc(self->ir, tde->doc_num(tde));
371
- AUTOFLUSH_IR;
372
- }
373
- XFINALLY
374
- tde->close(tde);
375
- XENDTRY
376
- } else {
377
- ensure_writer_open(self);
378
- iw_delete_term(self->iw, field, term);
379
- }
380
- mutex_unlock(&self->mutex);
381
- }
382
-
383
- void index_delete_id(Index *self, const char *id)
384
- {
385
- index_delete_term(self, self->id_field, id);
386
- }
387
-
388
- static void index_qdel_i(Searcher *sea, int doc_num, float score, void *arg)
389
- {
390
- (void)score; (void)arg;
391
- ir_delete_doc(((IndexSearcher *)sea)->ir, doc_num);
392
- }
393
-
394
- void index_delete_query(Index *self, Query *q, Filter *f, filter_ft ff)
395
- {
396
- mutex_lock(&self->mutex);
397
- ensure_searcher_open(self);
398
- searcher_search_each(self->sea, q, f, ff, &index_qdel_i, NULL);
399
- AUTOFLUSH_IR;
400
- mutex_unlock(&self->mutex);
401
- }
402
-
403
- void index_delete_query_str(Index *self, char *qstr, Filter *f, filter_ft ff)
404
- {
405
- Query *q = index_get_query(self, qstr);
406
- index_delete_query(self, q, f, ff);
407
- q_deref(q);
408
- }
409
-
410
- Explanation *index_explain(Index *self, Query *q, int doc_num)
411
- {
412
- Explanation *expl;
413
- mutex_lock(&self->mutex);
414
- ensure_searcher_open(self);
415
- expl = searcher_explain(self->sea, q, doc_num);
416
- mutex_unlock(&self->mutex);
417
- return expl;
418
- }
data/ext/ind.h DELETED
@@ -1,68 +0,0 @@
1
- #ifndef FRT_IND_H
2
- #define FRT_IND_H
3
-
4
- #include "search.h"
5
- #include "index.h"
6
-
7
- /***************************************************************************
8
- *
9
- * Index
10
- *
11
- ***************************************************************************/
12
-
13
- typedef struct Index
14
- {
15
- Config config;
16
- mutex_t mutex;
17
- Store *store;
18
- Analyzer *analyzer;
19
- IndexReader *ir;
20
- IndexWriter *iw;
21
- Searcher *sea;
22
- QParser *qp;
23
- HashSet *key;
24
- char *id_field;
25
- char *def_field;
26
- /* for IndexWriter */
27
- bool auto_flush : 1;
28
- bool has_writes : 1;
29
- bool check_latest : 1;
30
- } Index;
31
-
32
- extern Index *index_new(Store *store, Analyzer *analyzer,
33
- HashSet *def_fields, bool create);
34
- extern void index_destroy(Index *self);
35
- extern void index_flush(Index *self);
36
- extern int index_size(Index *self);
37
- extern void index_optimize(Index *self);
38
- extern bool index_has_del(Index *self);
39
- extern bool index_is_deleted(Index *self, int doc_num);
40
- extern void index_add_doc(Index *self, Document *doc);
41
- extern void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
42
- extern void index_add_string(Index *self, char *str, Analyzer *analyzer);
43
- extern void index_add_array(Index *self, char **ary, Analyzer *analyzer);
44
- extern TopDocs *index_search_str(Index *self, char *query, int first_doc,
45
- int num_docs, Filter *filter,
46
- Sort *sort, filter_ft filter_func);
47
- extern Query *index_get_query(Index *self, char *qstr);
48
- extern Document *index_get_doc(Index *self, int doc_num);
49
- extern Document *index_get_doc_ts(Index *self, int doc_num);
50
- extern Document *index_get_doc_id(Index *self, const char *id);
51
- extern Document *index_get_doc_term(Index *self, const char *field,
52
- const char *term);
53
- extern void index_delete(Index *self, int doc_num);
54
- extern void index_delete_term(Index *self, const char *field, const char *term);
55
- extern void index_delete_id(Index *self, const char *id);
56
- extern void index_delete_query(Index *self, Query *q, Filter *f, filter_ft ff);
57
- extern void index_delete_query_str(Index *self, char *qstr,
58
- Filter *f, filter_ft ff);
59
- extern int index_term_id(Index *self, const char *field, const char *term);
60
- extern Explanation *index_explain(Index *self, Query *q, int doc_num);
61
- extern void index_auto_flush_ir(Index *self);
62
- extern void index_auto_flush_iw(Index *self);
63
-
64
- extern __inline void ensure_searcher_open(Index *self);
65
- extern __inline void ensure_reader_open(Index *self);
66
- extern __inline void ensure_writer_open(Index *self);
67
-
68
- #endif