ferret 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +6 -5
- data/Rakefile +34 -13
- data/TODO +1 -0
- data/TUTORIAL +1 -1
- data/ext/analysis.c +87 -70
- data/ext/analysis.h +18 -6
- data/ext/array.c +1 -2
- data/ext/array.h +1 -1
- data/ext/bitvector.c +10 -6
- data/ext/bitvector.h +2 -2
- data/ext/compound_io.c +30 -27
- data/ext/document.c +15 -15
- data/ext/document.h +5 -5
- data/ext/except.c +2 -0
- data/ext/except.h +25 -23
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +10 -8
- data/ext/ferret.h +9 -8
- data/ext/field.c +29 -25
- data/ext/filter.c +52 -14
- data/ext/frtio.h +13 -0
- data/ext/fs_store.c +115 -170
- data/ext/global.c +9 -8
- data/ext/global.h +17 -13
- data/ext/hash.c +13 -19
- data/ext/hash.h +11 -11
- data/ext/hashset.c +5 -7
- data/ext/hashset.h +9 -8
- data/ext/helper.c +1 -1
- data/ext/helper.h +2 -1
- data/ext/inc/except.h +25 -23
- data/ext/inc/lang.h +11 -1
- data/ext/ind.c +33 -21
- data/ext/index.h +44 -39
- data/ext/index_io.c +61 -57
- data/ext/index_rw.c +418 -361
- data/ext/lang.c +10 -0
- data/ext/lang.h +11 -1
- data/ext/nix_io.c +135 -0
- data/ext/priorityqueue.c +16 -16
- data/ext/priorityqueue.h +9 -6
- data/ext/q_boolean.c +128 -76
- data/ext/q_const_score.c +20 -20
- data/ext/q_filtered_query.c +20 -20
- data/ext/q_fuzzy.c +37 -23
- data/ext/q_match_all.c +15 -19
- data/ext/q_multi_phrase.c +87 -46
- data/ext/q_parser.c +247 -119
- data/ext/q_phrase.c +86 -52
- data/ext/q_prefix.c +25 -14
- data/ext/q_range.c +59 -14
- data/ext/q_span.c +263 -172
- data/ext/q_term.c +62 -51
- data/ext/q_wildcard.c +24 -13
- data/ext/r_analysis.c +328 -80
- data/ext/r_doc.c +11 -6
- data/ext/r_index_io.c +40 -32
- data/ext/r_qparser.c +15 -14
- data/ext/r_search.c +270 -152
- data/ext/r_store.c +32 -17
- data/ext/ram_store.c +38 -22
- data/ext/search.c +617 -87
- data/ext/search.h +227 -163
- data/ext/similarity.c +54 -45
- data/ext/similarity.h +3 -3
- data/ext/sort.c +132 -53
- data/ext/store.c +21 -2
- data/ext/store.h +14 -14
- data/ext/tags +4322 -232
- data/ext/term.c +140 -109
- data/ext/termdocs.c +74 -60
- data/ext/vector.c +181 -152
- data/ext/w32_io.c +150 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
- data/lib/ferret/document/field.rb +1 -1
- data/lib/ferret/index/field_infos.rb +1 -1
- data/lib/ferret/index/term.rb +1 -1
- data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
- data/lib/ferret/search.rb +1 -0
- data/lib/ferret/search/boolean_query.rb +0 -4
- data/lib/ferret/search/index_searcher.rb +21 -8
- data/lib/ferret/search/multi_phrase_query.rb +7 -0
- data/lib/ferret/search/multi_searcher.rb +261 -0
- data/lib/ferret/search/phrase_query.rb +1 -1
- data/lib/ferret/search/query.rb +34 -5
- data/lib/ferret/search/sort.rb +7 -3
- data/lib/ferret/search/sort_field.rb +8 -4
- data/lib/ferret/store/fs_store.rb +13 -6
- data/lib/ferret/store/index_io.rb +0 -14
- data/lib/ferret/store/ram_store.rb +3 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/analysis/ctc_analyzer.rb +131 -0
- data/test/unit/analysis/ctc_tokenstream.rb +98 -9
- data/test/unit/index/tc_index.rb +40 -1
- data/test/unit/index/tc_term.rb +7 -0
- data/test/unit/index/th_doc.rb +8 -0
- data/test/unit/query_parser/tc_query_parser.rb +6 -4
- data/test/unit/search/rtc_sort_field.rb +6 -6
- data/test/unit/search/tc_index_searcher.rb +8 -0
- data/test/unit/search/tc_multi_searcher.rb +275 -0
- data/test/unit/search/tc_multi_searcher2.rb +126 -0
- data/test/unit/search/tc_search_and_sort.rb +66 -0
- metadata +31 -26
- data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/search.h
CHANGED
@@ -9,6 +9,9 @@ typedef struct Scorer Scorer;
|
|
9
9
|
#include "bitvector.h"
|
10
10
|
#include "similarity.h"
|
11
11
|
|
12
|
+
#define term_set_create() \
|
13
|
+
hs_create((hash_ft)&term_hash, (eq_ft)&term_eq, (free_ft)&term_destroy)
|
14
|
+
|
12
15
|
/***************************************************************************
|
13
16
|
*
|
14
17
|
* Explanation
|
@@ -24,11 +27,11 @@ typedef struct Explanation {
|
|
24
27
|
int dcapa;
|
25
28
|
} Explanation;
|
26
29
|
|
27
|
-
Explanation *expl_create(float value, char *description);
|
28
|
-
void expl_destoy(void *p);
|
29
|
-
Explanation *expl_add_detail(Explanation *self, Explanation *detail);
|
30
|
-
char *expl_to_s(Explanation *self, int depth);
|
31
|
-
char *expl_to_html(Explanation *self);
|
30
|
+
extern Explanation *expl_create(float value, char *description);
|
31
|
+
extern void expl_destoy(void *p);
|
32
|
+
extern Explanation *expl_add_detail(Explanation *self, Explanation *detail);
|
33
|
+
extern char *expl_to_s(Explanation *self, int depth);
|
34
|
+
extern char *expl_to_html(Explanation *self);
|
32
35
|
|
33
36
|
/***************************************************************************
|
34
37
|
*
|
@@ -41,7 +44,7 @@ typedef struct Hit {
|
|
41
44
|
float score;
|
42
45
|
} Hit;
|
43
46
|
|
44
|
-
bool hit_less_than(void *p1, void *p2);
|
47
|
+
extern bool hit_less_than(void *p1, void *p2);
|
45
48
|
|
46
49
|
/***************************************************************************
|
47
50
|
*
|
@@ -55,9 +58,9 @@ typedef struct TopDocs {
|
|
55
58
|
Hit **hits;
|
56
59
|
} TopDocs;
|
57
60
|
|
58
|
-
TopDocs *td_create(int total_hits, int size, Hit **hits);
|
59
|
-
void td_destroy(
|
60
|
-
char *td_to_s(TopDocs *td);
|
61
|
+
extern TopDocs *td_create(int total_hits, int size, Hit **hits);
|
62
|
+
extern void td_destroy(TopDocs *td);
|
63
|
+
extern char *td_to_s(TopDocs *td);
|
61
64
|
|
62
65
|
/***************************************************************************
|
63
66
|
*
|
@@ -71,13 +74,17 @@ typedef struct Filter {
|
|
71
74
|
HshTable *cache;
|
72
75
|
BitVector *(*get_bv)(struct Filter *self, IndexReader *ir);
|
73
76
|
char *(*to_s)(struct Filter *self);
|
74
|
-
|
77
|
+
uint (*hash)(struct Filter *self);
|
78
|
+
int (*eq)(struct Filter *self, struct Filter *o);
|
79
|
+
void (*destroy)(struct Filter *self);
|
75
80
|
} Filter;
|
76
81
|
|
77
|
-
Filter *filt_create(char *name);
|
78
|
-
char *
|
79
|
-
BitVector *filt_get_bv(Filter *self, IndexReader *ir);
|
80
|
-
void filt_destroy(
|
82
|
+
extern Filter *filt_create(char *name);
|
83
|
+
extern char *filt_to_s_i(Filter *self);
|
84
|
+
extern BitVector *filt_get_bv(Filter *self, IndexReader *ir);
|
85
|
+
extern void filt_destroy(Filter *self);
|
86
|
+
extern uint filt_hash(Filter *self);
|
87
|
+
extern int filt_eq(Filter *self, Filter *o);
|
81
88
|
|
82
89
|
/***************************************************************************
|
83
90
|
*
|
@@ -85,9 +92,8 @@ void filt_destroy(void *p);
|
|
85
92
|
*
|
86
93
|
***************************************************************************/
|
87
94
|
|
88
|
-
Filter *rfilt_create(const char *field, char *lower_term,
|
89
|
-
bool include_lower, bool include_upper);
|
90
|
-
void rfilt_destroy(void *p);
|
95
|
+
extern Filter *rfilt_create(const char *field, char *lower_term,
|
96
|
+
char *upper_term, bool include_lower, bool include_upper);
|
91
97
|
|
92
98
|
/***************************************************************************
|
93
99
|
*
|
@@ -99,8 +105,7 @@ typedef struct QueryFilter {
|
|
99
105
|
Query *query;
|
100
106
|
} QueryFilter;
|
101
107
|
|
102
|
-
Filter *qfilt_create(Query *query);
|
103
|
-
|
108
|
+
extern Filter *qfilt_create(Query *query);
|
104
109
|
|
105
110
|
/***************************************************************************
|
106
111
|
*
|
@@ -123,13 +128,16 @@ struct Weight {
|
|
123
128
|
Explanation *(*explain)(Weight *self, IndexReader *ir, int doc_num);
|
124
129
|
float (*sum_of_squared_weights)(Weight *self);
|
125
130
|
char *(*to_s)(Weight *self);
|
126
|
-
void (*destroy)(
|
131
|
+
void (*destroy)(Weight *self);
|
127
132
|
};
|
128
133
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
134
|
+
extern Weight *w_create(Query *query);
|
135
|
+
extern void w_destroy(Weight *self);
|
136
|
+
|
137
|
+
extern Query *w_get_query(Weight *self);
|
138
|
+
extern float w_get_value(Weight *self);
|
139
|
+
extern float w_sum_of_squared_weights(Weight *self);
|
140
|
+
extern void w_normalize(Weight *self, float normalization_factor);
|
133
141
|
|
134
142
|
/***************************************************************************
|
135
143
|
*
|
@@ -137,7 +145,7 @@ void w_normalize(Weight *self, float normalization_factor);
|
|
137
145
|
*
|
138
146
|
***************************************************************************/
|
139
147
|
|
140
|
-
Weight *tw_create(Query *query, Searcher *searcher);
|
148
|
+
extern Weight *tw_create(Query *query, Searcher *searcher);
|
141
149
|
|
142
150
|
/***************************************************************************
|
143
151
|
*
|
@@ -149,7 +157,8 @@ typedef struct BooleanWeight {
|
|
149
157
|
Weight **weights;
|
150
158
|
int w_cnt;
|
151
159
|
} BooleanWeight;
|
152
|
-
|
160
|
+
|
161
|
+
extern Weight *bw_create(Query *query, Searcher *searcher);
|
153
162
|
|
154
163
|
/***************************************************************************
|
155
164
|
*
|
@@ -157,7 +166,7 @@ Weight *bw_create(Query *query, Searcher *searcher);
|
|
157
166
|
*
|
158
167
|
***************************************************************************/
|
159
168
|
|
160
|
-
Weight *phw_create(Query *query, Searcher *searcher);
|
169
|
+
extern Weight *phw_create(Query *query, Searcher *searcher);
|
161
170
|
|
162
171
|
/***************************************************************************
|
163
172
|
*
|
@@ -165,7 +174,7 @@ Weight *phw_create(Query *query, Searcher *searcher);
|
|
165
174
|
*
|
166
175
|
***************************************************************************/
|
167
176
|
|
168
|
-
Weight *csw_create(Query *query, Searcher *searcher);
|
177
|
+
extern Weight *csw_create(Query *query, Searcher *searcher);
|
169
178
|
|
170
179
|
/***************************************************************************
|
171
180
|
*
|
@@ -173,7 +182,7 @@ Weight *csw_create(Query *query, Searcher *searcher);
|
|
173
182
|
*
|
174
183
|
***************************************************************************/
|
175
184
|
|
176
|
-
Weight *maw_create(Query *query, Searcher *searcher);
|
185
|
+
extern Weight *maw_create(Query *query, Searcher *searcher);
|
177
186
|
|
178
187
|
/***************************************************************************
|
179
188
|
*
|
@@ -181,7 +190,7 @@ Weight *maw_create(Query *query, Searcher *searcher);
|
|
181
190
|
*
|
182
191
|
***************************************************************************/
|
183
192
|
|
184
|
-
Weight *spanw_create(Query *query, Searcher *searcher);
|
193
|
+
extern Weight *spanw_create(Query *query, Searcher *searcher);
|
185
194
|
|
186
195
|
/***************************************************************************
|
187
196
|
*
|
@@ -211,24 +220,32 @@ enum QUERY_TYPE {
|
|
211
220
|
struct Query {
|
212
221
|
bool destroy_all : 1;
|
213
222
|
uchar type;
|
223
|
+
int ref_cnt;
|
214
224
|
void *data;
|
215
225
|
float boost;
|
216
|
-
float original_boost;
|
217
226
|
Weight *weight;
|
218
|
-
Query *rewritten;
|
219
|
-
Weight *(*create_weight)(Query *self, Searcher *searcher);
|
220
227
|
Query *(*rewrite)(Query *self, IndexReader *ir);
|
221
|
-
void (*extract_terms)(Query *self,
|
228
|
+
void (*extract_terms)(Query *self, HashSet *terms);
|
222
229
|
Similarity *(*get_similarity)(Query *self, Searcher *searcher);
|
223
230
|
char *(*to_s)(Query *self, char *field);
|
224
|
-
|
231
|
+
uint (*hash)(Query *self);
|
232
|
+
int (*eq)(Query *self, Query *o);
|
233
|
+
void (*destroy_i)(Query *self);
|
234
|
+
Weight *(*create_weight_i)(Query *self, Searcher *searcher);
|
225
235
|
};
|
226
236
|
|
227
|
-
|
228
|
-
|
229
|
-
Similarity *
|
230
|
-
void
|
231
|
-
Query *
|
237
|
+
/* Internal Query Functions */
|
238
|
+
extern Query *q_create();
|
239
|
+
extern Similarity *q_get_similarity_i(Query *self, Searcher *searcher);
|
240
|
+
extern void q_destroy_i(Query *self);
|
241
|
+
extern Weight *q_create_weight_unsup(Query *self, Searcher *searcher);
|
242
|
+
|
243
|
+
|
244
|
+
extern void q_deref(Query *self);
|
245
|
+
extern Weight *q_weight(Query *self, Searcher *searcher);
|
246
|
+
extern Query *q_combine(Query **queries, int q_cnt);
|
247
|
+
extern uint q_hash(Query *self);
|
248
|
+
extern int q_eq(Query *self, Query *o);
|
232
249
|
|
233
250
|
/***************************************************************************
|
234
251
|
*
|
@@ -240,7 +257,7 @@ typedef struct TermQuery {
|
|
240
257
|
Term *term;
|
241
258
|
} TermQuery;
|
242
259
|
|
243
|
-
Query *tq_create(Term *term);
|
260
|
+
extern Query *tq_create(Term *term);
|
244
261
|
|
245
262
|
/***************************************************************************
|
246
263
|
*
|
@@ -259,6 +276,7 @@ enum BC_TYPE {
|
|
259
276
|
};
|
260
277
|
|
261
278
|
typedef struct BooleanClause {
|
279
|
+
int ref_cnt;
|
262
280
|
Query *query;
|
263
281
|
Query *rewritten;
|
264
282
|
unsigned int occur : 4;
|
@@ -266,9 +284,9 @@ typedef struct BooleanClause {
|
|
266
284
|
bool is_required : 1;
|
267
285
|
} BooleanClause;
|
268
286
|
|
269
|
-
BooleanClause *bc_create(Query *query, unsigned int occur);
|
270
|
-
void
|
271
|
-
void bc_set_occur(BooleanClause *self, unsigned int occur);
|
287
|
+
extern BooleanClause *bc_create(Query *query, unsigned int occur);
|
288
|
+
extern void bc_deref(BooleanClause *self);
|
289
|
+
extern void bc_set_occur(BooleanClause *self, unsigned int occur);
|
272
290
|
|
273
291
|
/***************************************************************************
|
274
292
|
* BooleanQuery
|
@@ -283,13 +301,15 @@ typedef struct BooleanQuery {
|
|
283
301
|
int max_clause_cnt;
|
284
302
|
int clause_cnt;
|
285
303
|
int clause_capa;
|
304
|
+
float original_boost;
|
286
305
|
BooleanClause **clauses;
|
287
306
|
Similarity *similarity;
|
288
307
|
} BooleanQuery;
|
289
308
|
|
290
|
-
Query *bq_create(bool coord_disabled);
|
291
|
-
BooleanClause *bq_add_query(Query *self, Query *sub_query,
|
292
|
-
|
309
|
+
extern Query *bq_create(bool coord_disabled);
|
310
|
+
extern BooleanClause *bq_add_query(Query *self, Query *sub_query,
|
311
|
+
unsigned int occur);
|
312
|
+
extern BooleanClause *bq_add_clause(Query *self, BooleanClause *bc);
|
293
313
|
|
294
314
|
/***************************************************************************
|
295
315
|
*
|
@@ -307,8 +327,8 @@ typedef struct PhraseQuery {
|
|
307
327
|
char *field;
|
308
328
|
} PhraseQuery;
|
309
329
|
|
310
|
-
Query *phq_create();
|
311
|
-
void phq_add_term(Query *self, Term *term, int pos_inc);
|
330
|
+
extern Query *phq_create();
|
331
|
+
extern void phq_add_term(Query *self, Term *term, int pos_inc);
|
312
332
|
|
313
333
|
/***************************************************************************
|
314
334
|
*
|
@@ -326,8 +346,8 @@ typedef struct MultiPhraseQuery {
|
|
326
346
|
char *field;
|
327
347
|
} MultiPhraseQuery;
|
328
348
|
|
329
|
-
Query *mphq_create();
|
330
|
-
void mphq_add_terms(Query *self, Term **
|
349
|
+
extern Query *mphq_create();
|
350
|
+
extern void mphq_add_terms(Query *self, Term **ts, int t_cnt, int pos_inc);
|
331
351
|
|
332
352
|
/***************************************************************************
|
333
353
|
*
|
@@ -335,7 +355,7 @@ void mphq_add_terms(Query *self, Term **terms, int t_cnt, int pos_inc);
|
|
335
355
|
*
|
336
356
|
***************************************************************************/
|
337
357
|
|
338
|
-
Query *prefixq_create(Term *prefix);
|
358
|
+
extern Query *prefixq_create(Term *prefix);
|
339
359
|
|
340
360
|
/***************************************************************************
|
341
361
|
*
|
@@ -345,8 +365,9 @@ Query *prefixq_create(Term *prefix);
|
|
345
365
|
|
346
366
|
#define WILD_CHAR '?'
|
347
367
|
#define WILD_STRING '*'
|
348
|
-
|
349
|
-
|
368
|
+
|
369
|
+
extern Query *wcq_create(Term *term);
|
370
|
+
extern bool wc_match(char *pattern, char *text);
|
350
371
|
|
351
372
|
/***************************************************************************
|
352
373
|
*
|
@@ -370,8 +391,8 @@ typedef struct FuzzyQuery {
|
|
370
391
|
int da_capa;
|
371
392
|
} FuzzyQuery;
|
372
393
|
|
373
|
-
Query *fuzq_create(Term *term);
|
374
|
-
Query *fuzq_create_mp(Term *term, float min_sim, int pre_len);
|
394
|
+
extern Query *fuzq_create(Term *term);
|
395
|
+
extern Query *fuzq_create_mp(Term *term, float min_sim, int pre_len);
|
375
396
|
|
376
397
|
/***************************************************************************
|
377
398
|
*
|
@@ -379,7 +400,7 @@ Query *fuzq_create_mp(Term *term, float min_sim, int pre_len);
|
|
379
400
|
*
|
380
401
|
***************************************************************************/
|
381
402
|
|
382
|
-
Query *csq_create(Filter *filter);
|
403
|
+
extern Query *csq_create(Filter *filter);
|
383
404
|
|
384
405
|
/***************************************************************************
|
385
406
|
*
|
@@ -387,7 +408,7 @@ Query *csq_create(Filter *filter);
|
|
387
408
|
*
|
388
409
|
***************************************************************************/
|
389
410
|
|
390
|
-
Query *fq_create(Query *query, Filter *filter);
|
411
|
+
extern Query *fq_create(Query *query, Filter *filter);
|
391
412
|
|
392
413
|
/***************************************************************************
|
393
414
|
*
|
@@ -395,7 +416,7 @@ Query *fq_create(Query *query, Filter *filter);
|
|
395
416
|
*
|
396
417
|
***************************************************************************/
|
397
418
|
|
398
|
-
Query *maq_create();
|
419
|
+
extern Query *maq_create();
|
399
420
|
|
400
421
|
/***************************************************************************
|
401
422
|
*
|
@@ -407,14 +428,16 @@ typedef struct Range {
|
|
407
428
|
char *field;
|
408
429
|
char *lower_term;
|
409
430
|
char *upper_term;
|
410
|
-
bool include_lower;
|
411
|
-
bool include_upper;
|
431
|
+
bool include_lower : 1;
|
432
|
+
bool include_upper : 1;
|
412
433
|
} Range;
|
413
434
|
|
414
|
-
Query *rq_create(const char *field, char *lower_term,
|
415
|
-
bool include_lower, bool include_upper);
|
416
|
-
Query *rq_create_less(const char *field, char *upper_term,
|
417
|
-
|
435
|
+
extern Query *rq_create(const char *field, char *lower_term,
|
436
|
+
char *upper_term, bool include_lower, bool include_upper);
|
437
|
+
extern Query *rq_create_less(const char *field, char *upper_term,
|
438
|
+
bool include_upper);
|
439
|
+
extern Query *rq_create_more(const char *field, char *lower_term,
|
440
|
+
bool include_lower);
|
418
441
|
|
419
442
|
/***************************************************************************
|
420
443
|
*
|
@@ -447,7 +470,7 @@ struct SpanEnum {
|
|
447
470
|
int (*start)(SpanEnum *self);
|
448
471
|
int (*end)(SpanEnum *self);
|
449
472
|
char *(*to_s)(SpanEnum *self);
|
450
|
-
void (*destroy)(
|
473
|
+
void (*destroy)(SpanEnum *self);
|
451
474
|
};
|
452
475
|
|
453
476
|
/***************************************************************************
|
@@ -463,13 +486,13 @@ struct SpanTermEnum {
|
|
463
486
|
int freq;
|
464
487
|
};
|
465
488
|
|
466
|
-
SpanEnum *spante_create(Query *query, IndexReader *ir);
|
489
|
+
extern SpanEnum *spante_create(Query *query, IndexReader *ir);
|
467
490
|
|
468
491
|
/***************************************************************************
|
469
492
|
* SpanFirstEnum
|
470
493
|
***************************************************************************/
|
471
494
|
|
472
|
-
SpanEnum *spanfe_create(Query *query, IndexReader *ir);
|
495
|
+
extern SpanEnum *spanfe_create(Query *query, IndexReader *ir);
|
473
496
|
|
474
497
|
/***************************************************************************
|
475
498
|
* SpanOrEnum
|
@@ -481,7 +504,8 @@ typedef struct SpanOrEnum {
|
|
481
504
|
int s_cnt;
|
482
505
|
bool first_time;
|
483
506
|
} SpanOrEnum;
|
484
|
-
|
507
|
+
|
508
|
+
extern SpanEnum *spanoe_create(Query *query, IndexReader *ir);
|
485
509
|
|
486
510
|
/***************************************************************************
|
487
511
|
* SpanEnumCell
|
@@ -493,7 +517,8 @@ typedef struct SpanEnumCell {
|
|
493
517
|
int index;
|
494
518
|
int length;
|
495
519
|
} SpanEnumCell;
|
496
|
-
|
520
|
+
|
521
|
+
extern SpanEnum *spanec_create(Query *parent, Query *child, int index);
|
497
522
|
|
498
523
|
/***************************************************************************
|
499
524
|
* SpanNearEnum
|
@@ -511,7 +536,7 @@ typedef struct SpanNearEnum {
|
|
511
536
|
int end;
|
512
537
|
} SpanNearEnum;
|
513
538
|
|
514
|
-
SpanEnum *spanne_create(Query *query, IndexReader *ir);
|
539
|
+
extern SpanEnum *spanne_create(Query *query, IndexReader *ir);
|
515
540
|
|
516
541
|
/***************************************************************************
|
517
542
|
* SpanNotEnum
|
@@ -524,7 +549,7 @@ typedef struct SpanNotEnum {
|
|
524
549
|
bool more_exc : 1;
|
525
550
|
} SpanNotEnum;
|
526
551
|
|
527
|
-
SpanEnum *spanxe_create(Query *query, IndexReader *ir);
|
552
|
+
extern SpanEnum *spanxe_create(Query *query, IndexReader *ir);
|
528
553
|
|
529
554
|
/***************************************************************************
|
530
555
|
* SpanQuery
|
@@ -535,14 +560,14 @@ struct SpanQuery {
|
|
535
560
|
void *data;
|
536
561
|
char *field;
|
537
562
|
SpanEnum *(*get_spans)(Query *self, IndexReader *ir);
|
538
|
-
|
563
|
+
HashSet *(*get_terms)(Query *self);
|
539
564
|
};
|
540
565
|
|
541
566
|
/***************************************************************************
|
542
567
|
* SpanTermQuery
|
543
568
|
***************************************************************************/
|
544
569
|
|
545
|
-
Query *spantq_create(Term *term);
|
570
|
+
extern Query *spantq_create(Term *term);
|
546
571
|
|
547
572
|
/***************************************************************************
|
548
573
|
* SpanFirstQuery
|
@@ -553,7 +578,7 @@ typedef struct SpanFirstQuery {
|
|
553
578
|
Query *match;
|
554
579
|
} SpanFirstQuery;
|
555
580
|
|
556
|
-
Query *spanfq_create(Query *match, int end);
|
581
|
+
extern Query *spanfq_create(Query *match, int end);
|
557
582
|
|
558
583
|
/***************************************************************************
|
559
584
|
* SpanOrQuery
|
@@ -564,7 +589,7 @@ typedef struct SpanOrQuery {
|
|
564
589
|
int c_cnt;
|
565
590
|
} SpanOrQuery;
|
566
591
|
|
567
|
-
Query *spanoq_create(Query **clauses, int c_cnt);
|
592
|
+
extern Query *spanoq_create(Query **clauses, int c_cnt);
|
568
593
|
|
569
594
|
/***************************************************************************
|
570
595
|
* SpanNearQuery
|
@@ -577,7 +602,8 @@ typedef struct SpanNearQuery {
|
|
577
602
|
bool in_order;
|
578
603
|
} SpanNearQuery;
|
579
604
|
|
580
|
-
Query *spannq_create(Query **clauses, int c_cnt, int slop,
|
605
|
+
extern Query *spannq_create(Query **clauses, int c_cnt, int slop,
|
606
|
+
bool in_order);
|
581
607
|
|
582
608
|
|
583
609
|
/***************************************************************************
|
@@ -589,7 +615,7 @@ typedef struct SpanNotQuery {
|
|
589
615
|
Query *exc;
|
590
616
|
} SpanNotQuery;
|
591
617
|
|
592
|
-
Query *spanxq_create(Query *inc, Query *exc);
|
618
|
+
extern Query *spanxq_create(Query *inc, Query *exc);
|
593
619
|
|
594
620
|
/***************************************************************************
|
595
621
|
*
|
@@ -607,14 +633,16 @@ struct Scorer {
|
|
607
633
|
bool (*next)(Scorer *self);
|
608
634
|
bool (*skip_to)(Scorer *self, int doc_num);
|
609
635
|
Explanation *(*explain)(Scorer *self, int doc_num);
|
610
|
-
void (*destroy)(
|
636
|
+
void (*destroy)(Scorer *self);
|
611
637
|
};
|
612
638
|
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
639
|
+
/* Internal Scorer Function */
|
640
|
+
extern void scorer_destroy_i(Scorer *self);
|
641
|
+
|
642
|
+
extern Scorer *scorer_create(Similarity *similarity);
|
643
|
+
extern bool scorer_less_than(void *p1, void *p2);
|
644
|
+
extern bool scorer_doc_less_than(void *p1, void *p2);
|
645
|
+
extern int scorer_doc_cmp(const void *p1, const void *p2);
|
618
646
|
|
619
647
|
/***************************************************************************
|
620
648
|
*
|
@@ -637,7 +665,7 @@ typedef struct TermScorer {
|
|
637
665
|
float weight_value;
|
638
666
|
} TermScorer;
|
639
667
|
|
640
|
-
Scorer *tsc_create(Weight *weight, TermDocEnum *tde, uchar *norms);
|
668
|
+
extern Scorer *tsc_create(Weight *weight, TermDocEnum *tde, uchar *norms);
|
641
669
|
|
642
670
|
/***************************************************************************
|
643
671
|
*
|
@@ -656,9 +684,6 @@ typedef struct Coordinator {
|
|
656
684
|
int num_matches;
|
657
685
|
} Coordinator;
|
658
686
|
|
659
|
-
Coordinator *coo_create(Similarity *similarity);
|
660
|
-
Coordinator *coo_init(Coordinator *self);
|
661
|
-
|
662
687
|
/***************************************************************************
|
663
688
|
* DisjunctionSumScorer
|
664
689
|
***************************************************************************/
|
@@ -680,7 +705,7 @@ typedef struct DisjunctionSumScorer{
|
|
680
705
|
typedef struct ConjunctionScorer{
|
681
706
|
bool first_time : 1;
|
682
707
|
bool more : 1;
|
683
|
-
|
708
|
+
float coord;
|
684
709
|
int ss_cnt;
|
685
710
|
int ss_capa;
|
686
711
|
Scorer **sub_scorers;
|
@@ -737,8 +762,8 @@ typedef struct BooleanScorer {
|
|
737
762
|
Coordinator *coordinator;
|
738
763
|
} BooleanScorer;
|
739
764
|
|
740
|
-
Scorer *bsc_create(Similarity *similarity);
|
741
|
-
void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur);
|
765
|
+
extern Scorer *bsc_create(Similarity *similarity);
|
766
|
+
extern void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur);
|
742
767
|
|
743
768
|
/***************************************************************************
|
744
769
|
*
|
@@ -757,7 +782,8 @@ typedef struct PhrasePosition {
|
|
757
782
|
int position;
|
758
783
|
} PhrasePosition;
|
759
784
|
|
760
|
-
PhrasePosition *pp_create(TermDocEnum *tpe, int offset);
|
785
|
+
extern PhrasePosition *pp_create(TermDocEnum *tpe, int offset);
|
786
|
+
|
761
787
|
/***************************************************************************
|
762
788
|
* PhraseScorer
|
763
789
|
***************************************************************************/
|
@@ -777,22 +803,24 @@ typedef struct PhraseScorer {
|
|
777
803
|
int slop;
|
778
804
|
} PhraseScorer;
|
779
805
|
|
780
|
-
Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
|
806
|
+
extern Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
|
781
807
|
int *positions, int t_cnt, Similarity *similarity, uchar *norms);
|
782
808
|
|
783
809
|
/***************************************************************************
|
784
810
|
* ExactPhraseScorer
|
785
811
|
***************************************************************************/
|
786
812
|
|
787
|
-
Scorer *exact_phrase_scorer_create(Weight *weight,
|
788
|
-
int *positions, int t_cnt,
|
813
|
+
extern Scorer *exact_phrase_scorer_create(Weight *weight,
|
814
|
+
TermDocEnum **term_pos_enum, int *positions, int t_cnt,
|
815
|
+
Similarity *similarity, uchar *norms);
|
789
816
|
|
790
817
|
/***************************************************************************
|
791
818
|
* SloppyPhraseScorer
|
792
819
|
***************************************************************************/
|
793
820
|
|
794
|
-
Scorer *sloppy_phrase_scorer_create(Weight *weight,
|
795
|
-
|
821
|
+
extern Scorer *sloppy_phrase_scorer_create(Weight *weight,
|
822
|
+
TermDocEnum **term_pos_enum, int *positions, int t_cnt,
|
823
|
+
Similarity *similarity, int slop, uchar *norms);
|
796
824
|
|
797
825
|
/***************************************************************************
|
798
826
|
*
|
@@ -805,7 +833,7 @@ typedef struct ConstantScoreScorer {
|
|
805
833
|
float score;
|
806
834
|
} ConstantScoreScorer;
|
807
835
|
|
808
|
-
Scorer *cssc_create(Weight *weight, IndexReader *ir);
|
836
|
+
extern Scorer *cssc_create(Weight *weight, IndexReader *ir);
|
809
837
|
|
810
838
|
|
811
839
|
/***************************************************************************
|
@@ -820,7 +848,7 @@ typedef struct MatchAllScorer {
|
|
820
848
|
float score;
|
821
849
|
} MatchAllScorer;
|
822
850
|
|
823
|
-
Scorer *masc_create(Weight *weight, IndexReader *ir);
|
851
|
+
extern Scorer *masc_create(Weight *weight, IndexReader *ir);
|
824
852
|
|
825
853
|
|
826
854
|
/***************************************************************************
|
@@ -841,7 +869,7 @@ typedef struct SpanScorer {
|
|
841
869
|
float freq;
|
842
870
|
} SpanScorer;
|
843
871
|
|
844
|
-
Scorer *spansc_create(Weight *weight, IndexReader *ir);
|
872
|
+
extern Scorer *spansc_create(Weight *weight, IndexReader *ir);
|
845
873
|
|
846
874
|
/***************************************************************************
|
847
875
|
*
|
@@ -874,14 +902,15 @@ typedef struct SortField {
|
|
874
902
|
void (*handle_term)(void *index, TermDocEnum *tde, char *text);
|
875
903
|
} SortField;
|
876
904
|
|
877
|
-
SortField *sort_field_create(char *field, int type, bool reverse);
|
878
|
-
SortField *sort_field_score_create(bool reverse);
|
879
|
-
SortField *sort_field_doc_create(bool reverse);
|
880
|
-
SortField *sort_field_int_create(char *field, bool reverse);
|
881
|
-
SortField *sort_field_float_create(char *field, bool reverse);
|
882
|
-
SortField *sort_field_string_create(char *field, bool reverse);
|
883
|
-
SortField *sort_field_auto_create(char *field, bool reverse);
|
884
|
-
void sort_field_destroy(void *p);
|
905
|
+
extern SortField *sort_field_create(char *field, int type, bool reverse);
|
906
|
+
extern SortField *sort_field_score_create(bool reverse);
|
907
|
+
extern SortField *sort_field_doc_create(bool reverse);
|
908
|
+
extern SortField *sort_field_int_create(char *field, bool reverse);
|
909
|
+
extern SortField *sort_field_float_create(char *field, bool reverse);
|
910
|
+
extern SortField *sort_field_string_create(char *field, bool reverse);
|
911
|
+
extern SortField *sort_field_auto_create(char *field, bool reverse);
|
912
|
+
extern void sort_field_destroy(void *p);
|
913
|
+
extern char *sort_field_to_s(SortField *self);
|
885
914
|
|
886
915
|
extern SortField SORT_FIELD_SCORE;
|
887
916
|
extern SortField SORT_FIELD_SCORE_REV;
|
@@ -899,20 +928,21 @@ typedef struct Sort {
|
|
899
928
|
bool destroy_all : 1;
|
900
929
|
} Sort;
|
901
930
|
|
902
|
-
Sort *sort_create();
|
903
|
-
void sort_destroy(void *p);
|
904
|
-
void sort_add_sort_field(Sort *self, SortField *sf);
|
905
|
-
void sort_clear(Sort *self);
|
931
|
+
extern Sort *sort_create();
|
932
|
+
extern void sort_destroy(void *p);
|
933
|
+
extern void sort_add_sort_field(Sort *self, SortField *sf);
|
934
|
+
extern void sort_clear(Sort *self);
|
935
|
+
extern char *sort_to_s(Sort *self);
|
906
936
|
|
907
937
|
/***************************************************************************
|
908
938
|
* FieldSortedHitQueue
|
909
939
|
***************************************************************************/
|
910
940
|
|
911
|
-
Hit *fshq_pq_pop(PriorityQueue *pq);
|
912
|
-
void fshq_pq_down(PriorityQueue *pq);
|
913
|
-
void
|
914
|
-
void fshq_pq_destroy(
|
915
|
-
PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
|
941
|
+
extern Hit *fshq_pq_pop(PriorityQueue *pq);
|
942
|
+
extern void fshq_pq_down(PriorityQueue *pq);
|
943
|
+
extern void fshq_pq_insert(PriorityQueue *pq, Hit *hit);
|
944
|
+
extern void fshq_pq_destroy(PriorityQueue *pq);
|
945
|
+
extern PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
|
916
946
|
|
917
947
|
/***************************************************************************
|
918
948
|
*
|
@@ -921,6 +951,7 @@ PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
|
|
921
951
|
***************************************************************************/
|
922
952
|
|
923
953
|
struct Searcher {
|
954
|
+
void *data;
|
924
955
|
IndexReader *ir;
|
925
956
|
Similarity *similarity;
|
926
957
|
bool close_ir : 1;
|
@@ -930,25 +961,53 @@ struct Searcher {
|
|
930
961
|
int (*max_doc)(Searcher *self);
|
931
962
|
Weight *(*create_weight)(Searcher *self, Query *query);
|
932
963
|
TopDocs *(*search)(Searcher *self, Query *query, int first_doc,
|
933
|
-
|
964
|
+
int num_docs, Filter *filter, Sort *sort);
|
965
|
+
void (*search_each)(Searcher *self, Query *query, Filter *filter,
|
966
|
+
void (*fn)(Searcher *, int, float, void *), void *arg);
|
967
|
+
void (*search_each_w)(Searcher *self, Weight *weight,
|
968
|
+
Filter *filter, void (*fn)(Searcher *, int, float, void *),
|
969
|
+
void *arg);
|
934
970
|
Query *(*rewrite)(Searcher *self, Query *original);
|
935
971
|
Explanation *(*explain)(Searcher *self, Query *query, int doc_num);
|
972
|
+
Explanation *(*explain_w)(Searcher *self, Weight *weight, int doc_num);
|
936
973
|
Similarity *(*get_similarity)(Searcher *self);
|
937
974
|
void (*close)(Searcher *self);
|
938
975
|
};
|
939
976
|
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
977
|
+
#define sea_doc_freq(s, t) s->doc_freq(s, t)
|
978
|
+
#define sea_doc_freqs(s, t, c) s->doc_freqs(s, t, c)
|
979
|
+
#define sea_get_doc(s, dn) s->get_doc(s, dn)
|
980
|
+
#define sea_max_doc(s) s->max_doc(s)
|
981
|
+
#define sea_search(s, q, fd, nd, filt, sort)\
|
982
|
+
s->search(s, q, fd, nd, filt, sort)
|
983
|
+
#define sea_search_each(s, q, filt, fn, arg)\
|
984
|
+
s->search_each(s, q, filt, fn, arg)
|
985
|
+
#define sea_search_each_w(s, q, filt, fn, arg)\
|
986
|
+
s->search_each_w(s, q, filt, fn, arg)
|
987
|
+
#define sea_rewrite(s, q) s->rewrite(s, q)
|
988
|
+
#define sea_explain(s, q, dn) s->explain(s, q, dn)
|
989
|
+
#define sea_explain_w(s, q, dn) s->explain_w(s, q, dn)
|
990
|
+
#define sea_get_similarity(s) s->get_similarity(s)
|
991
|
+
#define sea_close(s) s->close(s)
|
992
|
+
|
993
|
+
extern Searcher *sea_create(IndexReader *ir);
|
994
|
+
|
995
|
+
/***************************************************************************
|
996
|
+
*
|
997
|
+
* MultiSearcher
|
998
|
+
*
|
999
|
+
***************************************************************************/
|
1000
|
+
|
1001
|
+
typedef struct MultiSearcher {
|
1002
|
+
int s_cnt;
|
1003
|
+
Searcher **searchers;
|
1004
|
+
int *starts;
|
1005
|
+
int max_doc;
|
1006
|
+
bool close_subs : 1;
|
1007
|
+
} MultiSearcher;
|
1008
|
+
|
1009
|
+
extern Searcher *msea_create(Searcher **searchers, int s_cnt,
|
1010
|
+
bool close_subs);
|
952
1011
|
|
953
1012
|
/***************************************************************************
|
954
1013
|
*
|
@@ -979,10 +1038,11 @@ typedef struct QParser {
|
|
979
1038
|
Query *result;
|
980
1039
|
} QParser;
|
981
1040
|
|
982
|
-
QParser *qp_create(HashSet *all_fields, HashSet *def_fields,
|
983
|
-
|
984
|
-
|
985
|
-
|
1041
|
+
extern QParser *qp_create(HashSet *all_fields, HashSet *def_fields,
|
1042
|
+
Analyzer *analyzer);
|
1043
|
+
extern void qp_destroy(QParser *self);
|
1044
|
+
extern Query *qp_parse(QParser *self, char *qstr);
|
1045
|
+
extern char *qp_clean_str(char *str);
|
986
1046
|
|
987
1047
|
/***************************************************************************
|
988
1048
|
*
|
@@ -1001,8 +1061,6 @@ typedef struct Index {
|
|
1001
1061
|
HashSet *key;
|
1002
1062
|
char *id_field;
|
1003
1063
|
char *def_field;
|
1004
|
-
bool close_analyzer : 1;
|
1005
|
-
bool close_store : 1;
|
1006
1064
|
/* for IndexWriter */
|
1007
1065
|
bool use_compound_file : 1;
|
1008
1066
|
bool auto_flush : 1;
|
@@ -1010,32 +1068,38 @@ typedef struct Index {
|
|
1010
1068
|
bool check_latest : 1;
|
1011
1069
|
} Index;
|
1012
1070
|
|
1013
|
-
Index *index_create(Store *store, Analyzer *analyzer,
|
1014
|
-
bool create);
|
1015
|
-
void index_destroy(Index *self);
|
1016
|
-
void index_flush(Index *self);
|
1017
|
-
int index_size(Index *self);
|
1018
|
-
void index_optimize(Index *self);
|
1019
|
-
bool index_has_del(Index *self);
|
1020
|
-
bool index_is_deleted(Index *self, int doc_num);
|
1021
|
-
void index_add_doc(Index *self, Document *doc);
|
1022
|
-
void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
|
1023
|
-
void index_add_string(Index *self, char *str, Analyzer *analyzer);
|
1024
|
-
void index_add_array(Index *self, Array *ary, Analyzer *analyzer);
|
1025
|
-
TopDocs *index_search_str(Index *self, char *query, int first_doc,
|
1071
|
+
extern Index *index_create(Store *store, Analyzer *analyzer,
|
1072
|
+
HashSet *def_fields, bool create);
|
1073
|
+
extern void index_destroy(Index *self);
|
1074
|
+
extern void index_flush(Index *self);
|
1075
|
+
extern int index_size(Index *self);
|
1076
|
+
extern void index_optimize(Index *self);
|
1077
|
+
extern bool index_has_del(Index *self);
|
1078
|
+
extern bool index_is_deleted(Index *self, int doc_num);
|
1079
|
+
extern void index_add_doc(Index *self, Document *doc);
|
1080
|
+
extern void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
|
1081
|
+
extern void index_add_string(Index *self, char *str, Analyzer *analyzer);
|
1082
|
+
extern void index_add_array(Index *self, Array *ary, Analyzer *analyzer);
|
1083
|
+
extern TopDocs *index_search_str(Index *self, char *query, int first_doc,
|
1026
1084
|
int num_docs, Filter *filter, Sort *sort);
|
1027
|
-
Query *index_get_query(Index *self, char *qstr);
|
1028
|
-
Document *index_get_doc(Index *self, int doc_num);
|
1029
|
-
Document *index_get_doc_ts(Index *self, int doc_num);
|
1030
|
-
Document *index_get_doc_id(Index *self, char *id);
|
1031
|
-
Document *index_get_doc_term(Index *self, Term *term);
|
1032
|
-
void index_delete(Index *self, int doc_num);
|
1033
|
-
void index_delete_term(Index *self, Term *term);
|
1034
|
-
void index_delete_id(Index *self, char *id);
|
1035
|
-
void index_delete_query(Index *self, Query *q, Filter *f);
|
1036
|
-
void index_delete_query_str(Index *self, char *qstr, Filter *f);
|
1037
|
-
int index_term_id(Index *self, Term *term);
|
1038
|
-
Explanation *index_explain(Index *self, Query *q, int doc_num);
|
1039
|
-
void index_auto_flush_ir(Index *self);
|
1040
|
-
void index_auto_flush_iw(Index *self);
|
1085
|
+
extern Query *index_get_query(Index *self, char *qstr);
|
1086
|
+
extern Document *index_get_doc(Index *self, int doc_num);
|
1087
|
+
extern Document *index_get_doc_ts(Index *self, int doc_num);
|
1088
|
+
extern Document *index_get_doc_id(Index *self, char *id);
|
1089
|
+
extern Document *index_get_doc_term(Index *self, Term *term);
|
1090
|
+
extern void index_delete(Index *self, int doc_num);
|
1091
|
+
extern void index_delete_term(Index *self, Term *term);
|
1092
|
+
extern void index_delete_id(Index *self, char *id);
|
1093
|
+
extern void index_delete_query(Index *self, Query *q, Filter *f);
|
1094
|
+
extern void index_delete_query_str(Index *self, char *qstr, Filter *f);
|
1095
|
+
extern int index_term_id(Index *self, Term *term);
|
1096
|
+
extern Explanation *index_explain(Index *self, Query *q, int doc_num);
|
1097
|
+
extern void index_auto_flush_ir(Index *self);
|
1098
|
+
extern void index_auto_flush_iw(Index *self);
|
1099
|
+
|
1100
|
+
extern inline void ensure_searcher_open(Index *self);
|
1101
|
+
extern inline void ensure_reader_open(Index *self);
|
1102
|
+
extern inline void ensure_writer_open(Index *self);
|
1103
|
+
|
1041
1104
|
#endif
|
1105
|
+
|