ferret 0.3.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +9 -0
- data/Rakefile +51 -25
- data/ext/analysis.c +553 -0
- data/ext/analysis.h +76 -0
- data/ext/array.c +83 -0
- data/ext/array.h +19 -0
- data/ext/bitvector.c +164 -0
- data/ext/bitvector.h +29 -0
- data/ext/compound_io.c +335 -0
- data/ext/document.c +336 -0
- data/ext/document.h +87 -0
- data/ext/ferret.c +88 -47
- data/ext/ferret.h +43 -109
- data/ext/field.c +395 -0
- data/ext/filter.c +103 -0
- data/ext/fs_store.c +352 -0
- data/ext/global.c +219 -0
- data/ext/global.h +73 -0
- data/ext/hash.c +446 -0
- data/ext/hash.h +80 -0
- data/ext/hashset.c +141 -0
- data/ext/hashset.h +37 -0
- data/ext/helper.c +11 -0
- data/ext/helper.h +5 -0
- data/ext/inc/lang.h +41 -0
- data/ext/ind.c +389 -0
- data/ext/index.h +884 -0
- data/ext/index_io.c +269 -415
- data/ext/index_rw.c +2543 -0
- data/ext/lang.c +31 -0
- data/ext/lang.h +41 -0
- data/ext/priorityqueue.c +228 -0
- data/ext/priorityqueue.h +44 -0
- data/ext/q_boolean.c +1331 -0
- data/ext/q_const_score.c +154 -0
- data/ext/q_fuzzy.c +287 -0
- data/ext/q_match_all.c +142 -0
- data/ext/q_multi_phrase.c +343 -0
- data/ext/q_parser.c +2180 -0
- data/ext/q_phrase.c +657 -0
- data/ext/q_prefix.c +75 -0
- data/ext/q_range.c +247 -0
- data/ext/q_span.c +1566 -0
- data/ext/q_term.c +308 -0
- data/ext/q_wildcard.c +146 -0
- data/ext/r_analysis.c +255 -0
- data/ext/r_doc.c +578 -0
- data/ext/r_index_io.c +996 -0
- data/ext/r_qparser.c +158 -0
- data/ext/r_search.c +2321 -0
- data/ext/r_store.c +263 -0
- data/ext/r_term.c +219 -0
- data/ext/ram_store.c +447 -0
- data/ext/search.c +524 -0
- data/ext/search.h +1065 -0
- data/ext/similarity.c +143 -39
- data/ext/sort.c +661 -0
- data/ext/store.c +35 -0
- data/ext/store.h +152 -0
- data/ext/term.c +704 -143
- data/ext/termdocs.c +599 -0
- data/ext/vector.c +594 -0
- data/lib/ferret.rb +9 -10
- data/lib/ferret/analysis/analyzers.rb +2 -2
- data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
- data/lib/ferret/analysis/token.rb +14 -14
- data/lib/ferret/analysis/token_filters.rb +3 -3
- data/lib/ferret/document/field.rb +16 -17
- data/lib/ferret/index/document_writer.rb +4 -4
- data/lib/ferret/index/index.rb +39 -23
- data/lib/ferret/index/index_writer.rb +2 -2
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
- data/lib/ferret/index/segment_term_vector.rb +4 -4
- data/lib/ferret/index/term.rb +5 -1
- data/lib/ferret/index/term_vector_offset_info.rb +6 -6
- data/lib/ferret/index/term_vectors_io.rb +5 -5
- data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
- data/lib/ferret/search.rb +1 -1
- data/lib/ferret/search/boolean_query.rb +2 -1
- data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
- data/lib/ferret/search/fuzzy_query.rb +2 -1
- data/lib/ferret/search/index_searcher.rb +3 -0
- data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
- data/lib/ferret/search/multi_phrase_query.rb +6 -5
- data/lib/ferret/search/phrase_query.rb +3 -6
- data/lib/ferret/search/prefix_query.rb +4 -4
- data/lib/ferret/search/sort.rb +3 -1
- data/lib/ferret/search/sort_field.rb +9 -9
- data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
- data/lib/ferret/search/spans/span_near_query.rb +1 -1
- data/lib/ferret/search/spans/span_weight.rb +1 -1
- data/lib/ferret/search/spans/spans_enum.rb +7 -7
- data/lib/ferret/store/fs_store.rb +10 -6
- data/lib/ferret/store/ram_store.rb +3 -3
- data/lib/rferret.rb +36 -0
- data/test/functional/thread_safety_index_test.rb +2 -2
- data/test/test_helper.rb +16 -2
- data/test/unit/analysis/c_token.rb +25 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
- data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
- data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
- data/test/unit/document/c_field.rb +98 -0
- data/test/unit/document/tc_field.rb +0 -66
- data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
- data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
- data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
- data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
- data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
- data/test/unit/index/tc_segment_term_vector.rb +2 -2
- data/test/unit/index/tc_term_vectors_io.rb +4 -4
- data/test/unit/query_parser/c_query_parser.rb +138 -0
- data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
- data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
- data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
- data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
- data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
- data/test/unit/search/c_sort_field.rb +27 -0
- data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +7 -20
- data/test/unit/store/c_fs_store.rb +76 -0
- data/test/unit/store/c_ram_store.rb +35 -0
- data/test/unit/store/m_store.rb +34 -0
- data/test/unit/store/m_store_lock.rb +68 -0
- data/test/unit/store/tc_fs_store.rb +0 -53
- data/test/unit/store/tc_ram_store.rb +0 -20
- data/test/unit/store/tm_store.rb +0 -30
- data/test/unit/store/tm_store_lock.rb +0 -66
- metadata +84 -31
- data/ext/Makefile +0 -140
- data/ext/ferret_ext.so +0 -0
- data/ext/priority_queue.c +0 -232
- data/ext/ram_directory.c +0 -321
- data/ext/segment_merge_queue.c +0 -37
- data/ext/segment_term_enum.c +0 -326
- data/ext/string_helper.c +0 -42
- data/ext/tags +0 -344
- data/ext/term_buffer.c +0 -230
- data/ext/term_infos_reader.c +0 -54
- data/ext/terminfo.c +0 -160
- data/ext/token.c +0 -93
- data/ext/util.c +0 -12
data/ext/search.h
ADDED
@@ -0,0 +1,1065 @@
|
|
1
|
+
#ifndef FRT_SEARCH_H
|
2
|
+
#define FRT_SEARCH_H
|
3
|
+
|
4
|
+
typedef struct Similarity Similarity;
|
5
|
+
typedef struct Query Query;
|
6
|
+
typedef struct Weight Weight;
|
7
|
+
typedef struct Scorer Scorer;
|
8
|
+
typedef struct Searcher Searcher;
|
9
|
+
|
10
|
+
#include "index.h"
|
11
|
+
#include "bitvector.h"
|
12
|
+
|
13
|
+
/***************************************************************************
|
14
|
+
*
|
15
|
+
* Similarity
|
16
|
+
*
|
17
|
+
***************************************************************************/
|
18
|
+
|
19
|
+
struct Similarity {
|
20
|
+
void *data;
|
21
|
+
float norm_table[256];
|
22
|
+
float (*length_norm)(Similarity *self, char *field, int num_terms);
|
23
|
+
float (*query_norm)(Similarity *self, float sum_of_squared_weights);
|
24
|
+
float (*tf)(Similarity *self, float freq);
|
25
|
+
float (*sloppy_freq)(Similarity *self, int distance);
|
26
|
+
float (*idf_term)(Similarity *self, Term *term, Searcher *searcher);
|
27
|
+
float (*idf_phrase)(Similarity *self, Term **terms, int tcnt, Searcher *searcher);
|
28
|
+
float (*idf)(Similarity *self, int doc_freq, int num_docs);
|
29
|
+
float (*coord)(Similarity *self, int overlap, int max_overlap);
|
30
|
+
float (*decode_norm)(Similarity *self, uchar b);
|
31
|
+
float (*encode_norm)(Similarity *self, float f);
|
32
|
+
void (*destroy)(void *p);
|
33
|
+
};
|
34
|
+
|
35
|
+
#define sim_length_norm(msim, field, num_terms) msim->length_norm(msim, field, num_terms)
|
36
|
+
#define sim_query_norm(msim, sosw) msim->query_norm(msim, sosw)
|
37
|
+
#define sim_tf(msim, freq) msim->tf(msim, freq)
|
38
|
+
#define sim_sloppy_freq(msim, distance) msim->sloppy_freq(msim, distance)
|
39
|
+
#define sim_idf_term(msim, term, searcher) msim->idf_term(msim, term, searcher)
|
40
|
+
#define sim_idf_phrase(msim, terms, tcnt, searcher) msim->idf_phrase(msim, terms, tcnt, searcher)
|
41
|
+
#define sim_idf(msim, doc_freq, num_docs) msim->idf(msim, doc_freq, num_docs)
|
42
|
+
#define sim_coord(msim, overlap, max_overlap) msim->coord(msim, overlap, max_overlap)
|
43
|
+
#define sim_decode_norm(msim, b) msim->decode_norm(msim, b)
|
44
|
+
#define sim_encode_norm(msim, f) msim->encode_norm(msim, f)
|
45
|
+
#define sim_destroy(msim) msim->destroy(msim)
|
46
|
+
|
47
|
+
float byte_to_float(uchar b);
|
48
|
+
uchar float_to_byte(float f);
|
49
|
+
|
50
|
+
Similarity *sim_create_default();
|
51
|
+
|
52
|
+
/***************************************************************************
|
53
|
+
*
|
54
|
+
* Explanation
|
55
|
+
*
|
56
|
+
***************************************************************************/
|
57
|
+
|
58
|
+
#define EXPLANATION_DETAILS_START_SIZE 4
|
59
|
+
typedef struct Explanation {
|
60
|
+
float value;
|
61
|
+
char *description;
|
62
|
+
struct Explanation **details;
|
63
|
+
int dcnt;
|
64
|
+
int dcapa;
|
65
|
+
} Explanation;
|
66
|
+
|
67
|
+
Explanation *expl_create(float value, char *description);
|
68
|
+
void expl_destoy(void *p);
|
69
|
+
Explanation *expl_add_detail(Explanation *self, Explanation *detail);
|
70
|
+
char *expl_to_s(Explanation *self, int depth);
|
71
|
+
char *expl_to_html(Explanation *self);
|
72
|
+
|
73
|
+
/***************************************************************************
|
74
|
+
*
|
75
|
+
* Hit
|
76
|
+
*
|
77
|
+
***************************************************************************/
|
78
|
+
|
79
|
+
typedef struct Hit {
|
80
|
+
int doc;
|
81
|
+
float score;
|
82
|
+
} Hit;
|
83
|
+
|
84
|
+
bool hit_less_than(void *p1, void *p2);
|
85
|
+
|
86
|
+
/***************************************************************************
|
87
|
+
*
|
88
|
+
* TopDocs
|
89
|
+
*
|
90
|
+
***************************************************************************/
|
91
|
+
|
92
|
+
typedef struct TopDocs {
|
93
|
+
int total_hits;
|
94
|
+
int size;
|
95
|
+
Hit **hits;
|
96
|
+
} TopDocs;
|
97
|
+
|
98
|
+
TopDocs *td_create(int total_hits, int size, Hit **hits);
|
99
|
+
void td_destroy(void *p);
|
100
|
+
char *td_to_s(TopDocs *td);
|
101
|
+
|
102
|
+
/***************************************************************************
|
103
|
+
*
|
104
|
+
* Filter
|
105
|
+
*
|
106
|
+
***************************************************************************/
|
107
|
+
|
108
|
+
typedef struct Filter {
|
109
|
+
void *data;
|
110
|
+
char *name;
|
111
|
+
HshTable *cache;
|
112
|
+
BitVector *(*get_bv)(struct Filter *self, IndexReader *ir);
|
113
|
+
char *(*to_s)(struct Filter *self);
|
114
|
+
void (*destroy)(void *p);
|
115
|
+
} Filter;
|
116
|
+
|
117
|
+
Filter *filt_create(char *name);
|
118
|
+
char *filt_to_s(Filter *self);
|
119
|
+
BitVector *filt_get_bv(Filter *self, IndexReader *ir);
|
120
|
+
void filt_destroy(void *p);
|
121
|
+
|
122
|
+
/***************************************************************************
|
123
|
+
*
|
124
|
+
* RangeFilter
|
125
|
+
*
|
126
|
+
***************************************************************************/
|
127
|
+
|
128
|
+
Filter *rfilt_create(const char *field, char *lower_term, char *upper_term,
|
129
|
+
bool include_lower, bool include_upper);
|
130
|
+
void rfilt_destroy(void *p);
|
131
|
+
|
132
|
+
/***************************************************************************
|
133
|
+
*
|
134
|
+
* QueryFilter
|
135
|
+
*
|
136
|
+
***************************************************************************/
|
137
|
+
|
138
|
+
typedef struct QueryFilter {
|
139
|
+
Query *query;
|
140
|
+
} QueryFilter;
|
141
|
+
|
142
|
+
Filter *qfilt_create(Query *query);
|
143
|
+
|
144
|
+
|
145
|
+
/***************************************************************************
|
146
|
+
*
|
147
|
+
* Weight
|
148
|
+
*
|
149
|
+
***************************************************************************/
|
150
|
+
|
151
|
+
struct Weight {
|
152
|
+
void *data;
|
153
|
+
float value;
|
154
|
+
float qweight;
|
155
|
+
float qnorm;
|
156
|
+
float idf;
|
157
|
+
Query *query;
|
158
|
+
Similarity *similarity;
|
159
|
+
Query *(*get_query)(Weight *self);
|
160
|
+
float (*get_value)(Weight *self);
|
161
|
+
void (*normalize)(Weight *self, float normalization_factor);
|
162
|
+
Scorer *(*scorer)(Weight *self, IndexReader *ir);
|
163
|
+
Explanation *(*explain)(Weight *self, IndexReader *ir, int doc_num);
|
164
|
+
float (*sum_of_squared_weights)(Weight *self);
|
165
|
+
char *(*to_s)(Weight *self);
|
166
|
+
void (*destroy)(void *p);
|
167
|
+
};
|
168
|
+
|
169
|
+
Query *w_get_query(Weight *self);
|
170
|
+
float w_get_value(Weight *self);
|
171
|
+
float w_sum_of_squared_weights(Weight *self);
|
172
|
+
void w_normalize(Weight *self, float normalization_factor);
|
173
|
+
|
174
|
+
/***************************************************************************
|
175
|
+
*
|
176
|
+
* TermWeight
|
177
|
+
*
|
178
|
+
***************************************************************************/
|
179
|
+
|
180
|
+
Weight *tw_create(Query *query, Searcher *searcher);
|
181
|
+
|
182
|
+
/***************************************************************************
|
183
|
+
*
|
184
|
+
* BooleanWeight
|
185
|
+
*
|
186
|
+
***************************************************************************/
|
187
|
+
|
188
|
+
typedef struct BooleanWeight {
|
189
|
+
Weight **weights;
|
190
|
+
int w_cnt;
|
191
|
+
} BooleanWeight;
|
192
|
+
Weight *bw_create(Query *query, Searcher *searcher);
|
193
|
+
|
194
|
+
/***************************************************************************
|
195
|
+
*
|
196
|
+
* PhraseWeight
|
197
|
+
*
|
198
|
+
***************************************************************************/
|
199
|
+
|
200
|
+
Weight *phw_create(Query *query, Searcher *searcher);
|
201
|
+
|
202
|
+
/***************************************************************************
|
203
|
+
*
|
204
|
+
* ConstantScoreWeight
|
205
|
+
*
|
206
|
+
***************************************************************************/
|
207
|
+
|
208
|
+
Weight *csw_create(Query *query, Searcher *searcher);
|
209
|
+
|
210
|
+
/***************************************************************************
|
211
|
+
*
|
212
|
+
* MatchAllWeight
|
213
|
+
*
|
214
|
+
***************************************************************************/
|
215
|
+
|
216
|
+
Weight *maw_create(Query *query, Searcher *searcher);
|
217
|
+
|
218
|
+
/***************************************************************************
|
219
|
+
*
|
220
|
+
* SpanWeight
|
221
|
+
*
|
222
|
+
***************************************************************************/
|
223
|
+
|
224
|
+
Weight *spanw_create(Query *query, Searcher *searcher);
|
225
|
+
|
226
|
+
/***************************************************************************
|
227
|
+
*
|
228
|
+
* Query
|
229
|
+
*
|
230
|
+
***************************************************************************/
|
231
|
+
|
232
|
+
enum QUERY_TYPE {
|
233
|
+
TERM_QUERY,
|
234
|
+
BOOLEAN_QUERY,
|
235
|
+
PHRASE_QUERY,
|
236
|
+
MULTI_PHRASE_QUERY,
|
237
|
+
CONSTANT_QUERY,
|
238
|
+
MATCH_ALL_QUERY,
|
239
|
+
RANGE_QUERY,
|
240
|
+
WILD_CARD_QUERY,
|
241
|
+
FUZZY_QUERY,
|
242
|
+
PREFIX_QUERY,
|
243
|
+
SPAN_TERM_QUERY,
|
244
|
+
SPAN_FIRST_QUERY,
|
245
|
+
SPAN_OR_QUERY,
|
246
|
+
SPAN_NOT_QUERY,
|
247
|
+
SPAN_NEAR_QUERY
|
248
|
+
};
|
249
|
+
|
250
|
+
struct Query {
|
251
|
+
bool destroy_all : 1;
|
252
|
+
uchar type;
|
253
|
+
void *data;
|
254
|
+
float boost;
|
255
|
+
float original_boost;
|
256
|
+
Weight *weight;
|
257
|
+
Query *rewritten;
|
258
|
+
Weight *(*create_weight)(Query *self, Searcher *searcher);
|
259
|
+
Query *(*rewrite)(Query *self, IndexReader *ir);
|
260
|
+
void (*extract_terms)(Query *self, Array *terms);
|
261
|
+
Similarity *(*get_similarity)(Query *self, Searcher *searcher);
|
262
|
+
char *(*to_s)(Query *self, char *field);
|
263
|
+
void (*destroy)(void *p);
|
264
|
+
};
|
265
|
+
|
266
|
+
Weight *q_weight(Query *self, Searcher *searcher);
|
267
|
+
void q_destroy(Query *self);
|
268
|
+
Similarity *q_get_similarity(Query *self, Searcher *searcher);
|
269
|
+
void q_extract_terms(Query *self, Array *terms);
|
270
|
+
Query *q_create();
|
271
|
+
|
272
|
+
/***************************************************************************
|
273
|
+
*
|
274
|
+
* TermQuery
|
275
|
+
*
|
276
|
+
***************************************************************************/
|
277
|
+
|
278
|
+
typedef struct TermQuery {
|
279
|
+
Term *term;
|
280
|
+
} TermQuery;
|
281
|
+
|
282
|
+
Query *tq_create(Term *term);
|
283
|
+
|
284
|
+
/***************************************************************************
|
285
|
+
*
|
286
|
+
* BooleanQuery
|
287
|
+
*
|
288
|
+
***************************************************************************/
|
289
|
+
|
290
|
+
/***************************************************************************
|
291
|
+
* BooleanClause
|
292
|
+
***************************************************************************/
|
293
|
+
|
294
|
+
enum BC_TYPE {
|
295
|
+
BC_SHOULD,
|
296
|
+
BC_MUST,
|
297
|
+
BC_MUST_NOT
|
298
|
+
};
|
299
|
+
|
300
|
+
typedef struct BooleanClause {
|
301
|
+
Query *query;
|
302
|
+
Query *rewritten;
|
303
|
+
unsigned int occur : 4;
|
304
|
+
bool is_prohibited : 1;
|
305
|
+
bool is_required : 1;
|
306
|
+
} BooleanClause;
|
307
|
+
|
308
|
+
BooleanClause *bc_create(Query *query, unsigned int occur);
|
309
|
+
void bc_set_occur(BooleanClause *self, unsigned int occur);
|
310
|
+
|
311
|
+
/***************************************************************************
|
312
|
+
* BooleanQuery
|
313
|
+
***************************************************************************/
|
314
|
+
|
315
|
+
#define DEFAULT_MAX_CLAUSE_COUNT 1024
|
316
|
+
#define BOOLEAN_CLAUSES_START_CAPA 4
|
317
|
+
#define QUERY_STRING_START_SIZE 64
|
318
|
+
|
319
|
+
typedef struct BooleanQuery {
|
320
|
+
bool coord_disabled;
|
321
|
+
int max_clause_cnt;
|
322
|
+
int clause_cnt;
|
323
|
+
int clause_capa;
|
324
|
+
BooleanClause **clauses;
|
325
|
+
Similarity *similarity;
|
326
|
+
} BooleanQuery;
|
327
|
+
|
328
|
+
Query *bq_create(bool coord_disabled);
|
329
|
+
void bq_add_query(Query *self, Query *sub_query, unsigned int occur);
|
330
|
+
|
331
|
+
/***************************************************************************
|
332
|
+
*
|
333
|
+
* PhraseQuery
|
334
|
+
*
|
335
|
+
***************************************************************************/
|
336
|
+
|
337
|
+
#define PHQ_INIT_CAPA 4
|
338
|
+
typedef struct PhraseQuery {
|
339
|
+
int slop;
|
340
|
+
Term **terms;
|
341
|
+
int *positions;
|
342
|
+
int t_cnt;
|
343
|
+
int t_capa;
|
344
|
+
char *field;
|
345
|
+
} PhraseQuery;
|
346
|
+
|
347
|
+
Query *phq_create();
|
348
|
+
void phq_add_term(Query *self, Term *term, int pos_inc);
|
349
|
+
|
350
|
+
/***************************************************************************
|
351
|
+
*
|
352
|
+
* MultiPhraseQuery
|
353
|
+
*
|
354
|
+
***************************************************************************/
|
355
|
+
|
356
|
+
typedef struct MultiPhraseQuery {
|
357
|
+
int slop;
|
358
|
+
Term ***terms;
|
359
|
+
int *positions;
|
360
|
+
int *pt_cnt;
|
361
|
+
int t_cnt;
|
362
|
+
int t_capa;
|
363
|
+
char *field;
|
364
|
+
} MultiPhraseQuery;
|
365
|
+
|
366
|
+
Query *mphq_create();
|
367
|
+
void mphq_add_terms(Query *self, Term **terms, int t_cnt, int pos_inc);
|
368
|
+
|
369
|
+
/***************************************************************************
|
370
|
+
*
|
371
|
+
* PrefixQuery
|
372
|
+
*
|
373
|
+
***************************************************************************/
|
374
|
+
|
375
|
+
Query *prefixq_create(Term *prefix);
|
376
|
+
|
377
|
+
/***************************************************************************
|
378
|
+
*
|
379
|
+
* WildCardQuery
|
380
|
+
*
|
381
|
+
***************************************************************************/
|
382
|
+
|
383
|
+
#define WILD_CHAR '?'
|
384
|
+
#define WILD_STRING '*'
|
385
|
+
Query *wcq_create(Term *term);
|
386
|
+
bool wc_match(char *pattern, char *text);
|
387
|
+
|
388
|
+
/***************************************************************************
|
389
|
+
*
|
390
|
+
* FuzzyQuery
|
391
|
+
*
|
392
|
+
***************************************************************************/
|
393
|
+
|
394
|
+
#define DEF_MIN_SIM 0.5
|
395
|
+
#define DEF_PRE_LEN 0
|
396
|
+
#define TYPICAL_LONGEST_WORD 20
|
397
|
+
|
398
|
+
typedef struct FuzzyQuery {
|
399
|
+
Term *term;
|
400
|
+
char *text; /* term text after prefix */
|
401
|
+
int text_len;
|
402
|
+
int pre_len;
|
403
|
+
float min_sim;
|
404
|
+
float scale_factor;
|
405
|
+
int max_distances[TYPICAL_LONGEST_WORD];
|
406
|
+
int *da;
|
407
|
+
int da_capa;
|
408
|
+
} FuzzyQuery;
|
409
|
+
|
410
|
+
Query *fuzq_create(Term *term);
|
411
|
+
Query *fuzq_create_mp(Term *term, float min_sim, int pre_len);
|
412
|
+
|
413
|
+
/***************************************************************************
|
414
|
+
*
|
415
|
+
* ConstantScoreQuery
|
416
|
+
*
|
417
|
+
***************************************************************************/
|
418
|
+
|
419
|
+
Query *csq_create(Filter *filter);
|
420
|
+
|
421
|
+
/***************************************************************************
|
422
|
+
*
|
423
|
+
* MatchAllQuery
|
424
|
+
*
|
425
|
+
***************************************************************************/
|
426
|
+
|
427
|
+
Query *maq_create();
|
428
|
+
|
429
|
+
/***************************************************************************
|
430
|
+
*
|
431
|
+
* ConstantScoreQuery
|
432
|
+
*
|
433
|
+
***************************************************************************/
|
434
|
+
|
435
|
+
Query *maq_create();
|
436
|
+
|
437
|
+
/***************************************************************************
|
438
|
+
*
|
439
|
+
* RangeQuery
|
440
|
+
*
|
441
|
+
***************************************************************************/
|
442
|
+
|
443
|
+
typedef struct Range {
|
444
|
+
char *field;
|
445
|
+
char *lower_term;
|
446
|
+
char *upper_term;
|
447
|
+
bool include_lower;
|
448
|
+
bool include_upper;
|
449
|
+
} Range;
|
450
|
+
|
451
|
+
Query *rq_create(const char *field, char *lower_term, char *upper_term,
|
452
|
+
bool include_lower, bool include_upper);
|
453
|
+
Query *rq_create_less(const char *field, char *upper_term, bool include_upper);
|
454
|
+
Query *rq_create_more(const char *field, char *lower_term, bool include_lower);
|
455
|
+
|
456
|
+
/***************************************************************************
|
457
|
+
*
|
458
|
+
* SpanQuery
|
459
|
+
*
|
460
|
+
***************************************************************************/
|
461
|
+
|
462
|
+
/***************************************************************************
|
463
|
+
* SpanEnum
|
464
|
+
***************************************************************************/
|
465
|
+
|
466
|
+
typedef struct SpanEnum SpanEnum;
|
467
|
+
struct SpanEnum {
|
468
|
+
void *data;
|
469
|
+
Query *query;
|
470
|
+
bool (*next)(SpanEnum *self);
|
471
|
+
bool (*skip_to)(SpanEnum *self, int target_doc);
|
472
|
+
int (*doc)(SpanEnum *self);
|
473
|
+
int (*start)(SpanEnum *self);
|
474
|
+
int (*end)(SpanEnum *self);
|
475
|
+
char *(*to_s)(SpanEnum *self);
|
476
|
+
void (*destroy)(void *p);
|
477
|
+
};
|
478
|
+
|
479
|
+
/***************************************************************************
|
480
|
+
* SpanTermEnum
|
481
|
+
***************************************************************************/
|
482
|
+
|
483
|
+
typedef struct SpanTermEnum SpanTermEnum;
|
484
|
+
struct SpanTermEnum {
|
485
|
+
TermDocEnum *positions;
|
486
|
+
int position;
|
487
|
+
int doc;
|
488
|
+
int count;
|
489
|
+
int freq;
|
490
|
+
};
|
491
|
+
|
492
|
+
SpanEnum *spante_create(Query *query, IndexReader *ir);
|
493
|
+
|
494
|
+
/***************************************************************************
|
495
|
+
* SpanFirstEnum
|
496
|
+
***************************************************************************/
|
497
|
+
|
498
|
+
SpanEnum *spanfe_create(Query *query, IndexReader *ir);
|
499
|
+
|
500
|
+
/***************************************************************************
|
501
|
+
* SpanOrEnum
|
502
|
+
***************************************************************************/
|
503
|
+
|
504
|
+
typedef struct SpanOrEnum {
|
505
|
+
PriorityQueue *queue;
|
506
|
+
SpanEnum **span_enums;
|
507
|
+
int s_cnt;
|
508
|
+
bool first_time;
|
509
|
+
} SpanOrEnum;
|
510
|
+
SpanEnum *spanoe_create(Query *query, IndexReader *ir);
|
511
|
+
|
512
|
+
/***************************************************************************
|
513
|
+
* SpanEnumCell
|
514
|
+
***************************************************************************/
|
515
|
+
|
516
|
+
typedef struct SpanEnumCell {
|
517
|
+
SpanEnum *parent;
|
518
|
+
SpanEnum *se;
|
519
|
+
int index;
|
520
|
+
int length;
|
521
|
+
} SpanEnumCell;
|
522
|
+
SpanEnum *spanec_create(Query *parent, Query *child, int index);
|
523
|
+
|
524
|
+
/***************************************************************************
|
525
|
+
* SpanNearEnum
|
526
|
+
***************************************************************************/
|
527
|
+
|
528
|
+
typedef struct SpanNearEnum {
|
529
|
+
SpanEnum **span_enums;
|
530
|
+
int s_cnt;
|
531
|
+
int slop;
|
532
|
+
int current;
|
533
|
+
bool first_time : 1;
|
534
|
+
bool in_order : 1;
|
535
|
+
int doc;
|
536
|
+
int start;
|
537
|
+
int end;
|
538
|
+
} SpanNearEnum;
|
539
|
+
|
540
|
+
SpanEnum *spanne_create(Query *query, IndexReader *ir);
|
541
|
+
|
542
|
+
/***************************************************************************
|
543
|
+
* SpanNotEnum
|
544
|
+
***************************************************************************/
|
545
|
+
|
546
|
+
typedef struct SpanNotEnum {
|
547
|
+
SpanEnum *inc;
|
548
|
+
SpanEnum *exc;
|
549
|
+
bool more_inc : 1;
|
550
|
+
bool more_exc : 1;
|
551
|
+
} SpanNotEnum;
|
552
|
+
|
553
|
+
SpanEnum *spanxe_create(Query *query, IndexReader *ir);
|
554
|
+
|
555
|
+
/***************************************************************************
|
556
|
+
* SpanQuery
|
557
|
+
***************************************************************************/
|
558
|
+
|
559
|
+
typedef struct SpanQuery SpanQuery;
|
560
|
+
struct SpanQuery {
|
561
|
+
void *data;
|
562
|
+
char *field;
|
563
|
+
SpanEnum *(*get_spans)(Query *self, IndexReader *ir);
|
564
|
+
Array *(*get_terms)(Query *self);
|
565
|
+
};
|
566
|
+
|
567
|
+
/***************************************************************************
|
568
|
+
* SpanTermQuery
|
569
|
+
***************************************************************************/
|
570
|
+
|
571
|
+
Query *spantq_create(Term *term);
|
572
|
+
|
573
|
+
/***************************************************************************
|
574
|
+
* SpanFirstQuery
|
575
|
+
***************************************************************************/
|
576
|
+
|
577
|
+
typedef struct SpanFirstQuery {
|
578
|
+
int end;
|
579
|
+
Query *match;
|
580
|
+
} SpanFirstQuery;
|
581
|
+
|
582
|
+
Query *spanfq_create(Query *match, int end);
|
583
|
+
|
584
|
+
/***************************************************************************
|
585
|
+
* SpanOrQuery
|
586
|
+
***************************************************************************/
|
587
|
+
|
588
|
+
typedef struct SpanOrQuery {
|
589
|
+
Query **clauses;
|
590
|
+
int c_cnt;
|
591
|
+
} SpanOrQuery;
|
592
|
+
|
593
|
+
Query *spanoq_create(Query **clauses, int c_cnt);
|
594
|
+
|
595
|
+
/***************************************************************************
|
596
|
+
* SpanNearQuery
|
597
|
+
***************************************************************************/
|
598
|
+
|
599
|
+
typedef struct SpanNearQuery {
|
600
|
+
Query **clauses;
|
601
|
+
int c_cnt;
|
602
|
+
int slop;
|
603
|
+
bool in_order;
|
604
|
+
} SpanNearQuery;
|
605
|
+
|
606
|
+
Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order);
|
607
|
+
|
608
|
+
|
609
|
+
/***************************************************************************
|
610
|
+
* SpanNotQuery
|
611
|
+
***************************************************************************/
|
612
|
+
|
613
|
+
typedef struct SpanNotQuery {
|
614
|
+
Query *inc;
|
615
|
+
Query *exc;
|
616
|
+
} SpanNotQuery;
|
617
|
+
|
618
|
+
Query *spanxq_create(Query *inc, Query *exc);
|
619
|
+
|
620
|
+
/***************************************************************************
|
621
|
+
*
|
622
|
+
* Scorer
|
623
|
+
*
|
624
|
+
***************************************************************************/
|
625
|
+
|
626
|
+
#define SCORER_NULLIFY(mscorer) mscorer->destroy(mscorer); mscorer = NULL
|
627
|
+
|
628
|
+
struct Scorer {
|
629
|
+
void *data;
|
630
|
+
Similarity *similarity;
|
631
|
+
int doc;
|
632
|
+
float (*score)(Scorer *self);
|
633
|
+
bool (*next)(Scorer *self);
|
634
|
+
bool (*skip_to)(Scorer *self, int doc_num);
|
635
|
+
Explanation *(*explain)(Scorer *self, int doc_num);
|
636
|
+
void (*destroy)(void *p);
|
637
|
+
};
|
638
|
+
|
639
|
+
void scorer_destroy(void *p);
|
640
|
+
Scorer *scorer_create(Similarity *similarity);
|
641
|
+
bool scorer_less_than(void *p1, void *p2);
|
642
|
+
bool scorer_doc_less_than(void *p1, void *p2);
|
643
|
+
int scorer_doc_cmp(const void *p1, const void *p2);
|
644
|
+
|
645
|
+
/***************************************************************************
|
646
|
+
*
|
647
|
+
* TermScorer
|
648
|
+
*
|
649
|
+
***************************************************************************/
|
650
|
+
|
651
|
+
#define SCORE_CACHE_SIZE 32
|
652
|
+
#define TDE_READ_SIZE 32
|
653
|
+
|
654
|
+
typedef struct TermScorer {
|
655
|
+
int docs[TDE_READ_SIZE];
|
656
|
+
int freqs[TDE_READ_SIZE];
|
657
|
+
int pointer;
|
658
|
+
int pointer_max;
|
659
|
+
float score_cache[SCORE_CACHE_SIZE];
|
660
|
+
Weight *weight;
|
661
|
+
TermDocEnum *tde;
|
662
|
+
uchar *norms;
|
663
|
+
float weight_value;
|
664
|
+
} TermScorer;
|
665
|
+
|
666
|
+
Scorer *tsc_create(Weight *weight, TermDocEnum *tde, uchar *norms);
|
667
|
+
|
668
|
+
/***************************************************************************
|
669
|
+
*
|
670
|
+
* BooleanScorer
|
671
|
+
*
|
672
|
+
***************************************************************************/
|
673
|
+
|
674
|
+
/***************************************************************************
|
675
|
+
* Coordinator
|
676
|
+
***************************************************************************/
|
677
|
+
|
678
|
+
typedef struct Coordinator {
|
679
|
+
int max_coord;
|
680
|
+
float *coord_factors;
|
681
|
+
Similarity *similarity;
|
682
|
+
int num_matches;
|
683
|
+
} Coordinator;
|
684
|
+
|
685
|
+
Coordinator *coo_create(Similarity *similarity);
|
686
|
+
Coordinator *coo_init(Coordinator *self);
|
687
|
+
|
688
|
+
/***************************************************************************
|
689
|
+
* DisjunctionSumScorer
|
690
|
+
***************************************************************************/
|
691
|
+
|
692
|
+
typedef struct DisjunctionSumScorer{
|
693
|
+
float cum_score;
|
694
|
+
int num_matches;
|
695
|
+
int min_num_matches;
|
696
|
+
Scorer **sub_scorers;
|
697
|
+
int ss_cnt;
|
698
|
+
PriorityQueue *scorer_queue;
|
699
|
+
Coordinator *coordinator;
|
700
|
+
} DisjunctionSumScorer;
|
701
|
+
|
702
|
+
/***************************************************************************
|
703
|
+
* ConjunctionScorer
|
704
|
+
***************************************************************************/
|
705
|
+
|
706
|
+
typedef struct ConjunctionScorer{
|
707
|
+
bool first_time : 1;
|
708
|
+
bool more : 1;
|
709
|
+
int coord;
|
710
|
+
int ss_cnt;
|
711
|
+
int ss_capa;
|
712
|
+
Scorer **sub_scorers;
|
713
|
+
int first;
|
714
|
+
int last;
|
715
|
+
Coordinator *coordinator;
|
716
|
+
int last_scored_doc;
|
717
|
+
} ConjunctionScorer;
|
718
|
+
|
719
|
+
/***************************************************************************
|
720
|
+
* SingleMatchScorer
|
721
|
+
***************************************************************************/
|
722
|
+
|
723
|
+
typedef struct SingleMatchScorer {
|
724
|
+
Coordinator *coordinator;
|
725
|
+
Scorer *scorer;
|
726
|
+
} SingleMatchScorer;
|
727
|
+
|
728
|
+
/***************************************************************************
|
729
|
+
* ReqOptSumScorer
|
730
|
+
***************************************************************************/
|
731
|
+
|
732
|
+
typedef struct ReqOptSumScorer {
|
733
|
+
Scorer *req_scorer;
|
734
|
+
Scorer *opt_scorer;
|
735
|
+
bool first_time_opt;
|
736
|
+
} ReqOptSumScorer;
|
737
|
+
|
738
|
+
/***************************************************************************
|
739
|
+
* ReqExclScorer
|
740
|
+
***************************************************************************/
|
741
|
+
|
742
|
+
typedef struct ReqExclScorer {
|
743
|
+
Scorer *req_scorer;
|
744
|
+
Scorer *excl_scorer;
|
745
|
+
bool first_time;
|
746
|
+
} ReqExclScorer;
|
747
|
+
|
748
|
+
/***************************************************************************
|
749
|
+
* BooleanScorer
|
750
|
+
***************************************************************************/
|
751
|
+
|
752
|
+
typedef struct BooleanScorer {
|
753
|
+
Scorer **required_scorers;
|
754
|
+
int rs_cnt;
|
755
|
+
int rs_capa;
|
756
|
+
Scorer **optional_scorers;
|
757
|
+
int os_cnt;
|
758
|
+
int os_capa;
|
759
|
+
Scorer **prohibited_scorers;
|
760
|
+
int ps_cnt;
|
761
|
+
int ps_capa;
|
762
|
+
Scorer *counting_sum_scorer;
|
763
|
+
Coordinator *coordinator;
|
764
|
+
} BooleanScorer;
|
765
|
+
|
766
|
+
Scorer *bsc_create(Similarity *similarity);
|
767
|
+
void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur);
|
768
|
+
|
769
|
+
/***************************************************************************
|
770
|
+
*
|
771
|
+
* PhraseScorer
|
772
|
+
*
|
773
|
+
***************************************************************************/
|
774
|
+
|
775
|
+
/***************************************************************************
|
776
|
+
* PhrasePosition
|
777
|
+
***************************************************************************/
|
778
|
+
typedef struct PhrasePosition {
|
779
|
+
TermDocEnum *tpe;
|
780
|
+
int offset;
|
781
|
+
int count;
|
782
|
+
int doc;
|
783
|
+
int position;
|
784
|
+
} PhrasePosition;
|
785
|
+
|
786
|
+
PhrasePosition *pp_create(TermDocEnum *tpe, int offset);
|
787
|
+
/***************************************************************************
|
788
|
+
* PhraseScorer
|
789
|
+
***************************************************************************/
|
790
|
+
|
791
|
+
typedef struct PhraseScorer {
|
792
|
+
float freq;
|
793
|
+
uchar *norms;
|
794
|
+
float value;
|
795
|
+
Weight *weight;
|
796
|
+
bool first_time : 1;
|
797
|
+
bool more : 1;
|
798
|
+
int pp_first;
|
799
|
+
int pp_last;
|
800
|
+
int pp_cnt;
|
801
|
+
PhrasePosition **phrase_pos;
|
802
|
+
float (*phrase_freq)(Scorer *self);
|
803
|
+
int slop;
|
804
|
+
} PhraseScorer;
|
805
|
+
|
806
|
+
Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
|
807
|
+
int *positions, int t_cnt, Similarity *similarity, uchar *norms);
|
808
|
+
|
809
|
+
/***************************************************************************
|
810
|
+
* ExactPhraseScorer
|
811
|
+
***************************************************************************/
|
812
|
+
|
813
|
+
Scorer *exact_phrase_scorer_create(Weight *weight, TermDocEnum **term_pos_enum,
|
814
|
+
int *positions, int t_cnt, Similarity *similarity, uchar *norms);
|
815
|
+
|
816
|
+
/***************************************************************************
|
817
|
+
* SloppyPhraseScorer
|
818
|
+
***************************************************************************/
|
819
|
+
|
820
|
+
Scorer *sloppy_phrase_scorer_create(Weight *weight, TermDocEnum **term_pos_enum,
|
821
|
+
int *positions, int t_cnt, Similarity *similarity, int slop, uchar *norms);
|
822
|
+
|
823
|
+
/***************************************************************************
|
824
|
+
*
|
825
|
+
* ConstantScoreScorer
|
826
|
+
*
|
827
|
+
***************************************************************************/
|
828
|
+
|
829
|
+
typedef struct ConstantScoreScorer {
|
830
|
+
BitVector *bv;
|
831
|
+
float score;
|
832
|
+
} ConstantScoreScorer;
|
833
|
+
|
834
|
+
Scorer *cssc_create(Weight *weight, IndexReader *ir);
|
835
|
+
|
836
|
+
|
837
|
+
/***************************************************************************
|
838
|
+
*
|
839
|
+
* MatchAllScorer
|
840
|
+
*
|
841
|
+
***************************************************************************/
|
842
|
+
|
843
|
+
typedef struct MatchAllScorer {
|
844
|
+
IndexReader *ir;
|
845
|
+
int max_doc;
|
846
|
+
float score;
|
847
|
+
} MatchAllScorer;
|
848
|
+
|
849
|
+
Scorer *masc_create(Weight *weight, IndexReader *ir);
|
850
|
+
|
851
|
+
|
852
|
+
/***************************************************************************
|
853
|
+
*
|
854
|
+
* SpanScorer
|
855
|
+
*
|
856
|
+
***************************************************************************/
|
857
|
+
|
858
|
+
typedef struct SpanScorer {
|
859
|
+
bool first_time : 1;
|
860
|
+
bool more : 1;
|
861
|
+
IndexReader *ir;
|
862
|
+
SpanEnum *spans;
|
863
|
+
Similarity *sim;
|
864
|
+
uchar *norms;
|
865
|
+
Weight *weight;
|
866
|
+
float value;
|
867
|
+
float freq;
|
868
|
+
} SpanScorer;
|
869
|
+
|
870
|
+
Scorer *spansc_create(Weight *weight, IndexReader *ir);
|
871
|
+
|
872
|
+
/***************************************************************************
|
873
|
+
*
|
874
|
+
* Sort
|
875
|
+
*
|
876
|
+
***************************************************************************/
|
877
|
+
|
878
|
+
enum SORT_TYPE {
|
879
|
+
SORT_TYPE_SCORE,
|
880
|
+
SORT_TYPE_DOC,
|
881
|
+
SORT_TYPE_INTEGER,
|
882
|
+
SORT_TYPE_FLOAT,
|
883
|
+
SORT_TYPE_STRING,
|
884
|
+
SORT_TYPE_AUTO
|
885
|
+
};
|
886
|
+
|
887
|
+
/***************************************************************************
|
888
|
+
* SortField
|
889
|
+
***************************************************************************/
|
890
|
+
|
891
|
+
typedef struct SortField {
|
892
|
+
mutex_t mutex;
|
893
|
+
char *field;
|
894
|
+
int type;
|
895
|
+
bool reverse : 1;
|
896
|
+
void *index;
|
897
|
+
int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
|
898
|
+
void *(*create_index)(int size);
|
899
|
+
void (*destroy_index)(void *p);
|
900
|
+
void (*handle_term)(void *index, TermDocEnum *tde, char *text);
|
901
|
+
} SortField;
|
902
|
+
|
903
|
+
SortField *sort_field_create(char *field, int type, bool reverse);
|
904
|
+
SortField *sort_field_score_create(bool reverse);
|
905
|
+
SortField *sort_field_doc_create(bool reverse);
|
906
|
+
SortField *sort_field_int_create(char *field, bool reverse);
|
907
|
+
SortField *sort_field_float_create(char *field, bool reverse);
|
908
|
+
SortField *sort_field_string_create(char *field, bool reverse);
|
909
|
+
SortField *sort_field_auto_create(char *field, bool reverse);
|
910
|
+
void sort_field_destroy(void *p);
|
911
|
+
|
912
|
+
extern SortField SORT_FIELD_SCORE;
|
913
|
+
extern SortField SORT_FIELD_SCORE_REV;
|
914
|
+
extern SortField SORT_FIELD_DOC;
|
915
|
+
extern SortField SORT_FIELD_DOC_REV;
|
916
|
+
|
917
|
+
/***************************************************************************
|
918
|
+
* Sort
|
919
|
+
***************************************************************************/
|
920
|
+
|
921
|
+
typedef struct Sort {
|
922
|
+
SortField **sort_fields;
|
923
|
+
int sf_cnt;
|
924
|
+
int sf_capa;
|
925
|
+
bool destroy_all : 1;
|
926
|
+
} Sort;
|
927
|
+
|
928
|
+
Sort *sort_create();
|
929
|
+
void sort_destroy(void *p);
|
930
|
+
void sort_add_sort_field(Sort *self, SortField *sf);
|
931
|
+
void sort_clear(Sort *self);
|
932
|
+
|
933
|
+
/***************************************************************************
|
934
|
+
* FieldSortedHitQueue
|
935
|
+
***************************************************************************/
|
936
|
+
|
937
|
+
Hit *fshq_pq_pop(PriorityQueue *pq);
|
938
|
+
void fshq_pq_down(PriorityQueue *pq);
|
939
|
+
void fshq_pq_push(PriorityQueue *pq, void *elem);
|
940
|
+
void fshq_pq_destroy(void *p);
|
941
|
+
PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
|
942
|
+
|
943
|
+
/***************************************************************************
|
944
|
+
*
|
945
|
+
* Searcher
|
946
|
+
*
|
947
|
+
***************************************************************************/
|
948
|
+
|
949
|
+
struct Searcher {
|
950
|
+
IndexReader *ir;
|
951
|
+
Similarity *similarity;
|
952
|
+
int (*doc_freq)(Searcher *self, Term *term);
|
953
|
+
int *(*doc_freqs)(Searcher *self, Term **terms, int tcnt);
|
954
|
+
Document *(*get_doc)(Searcher *self, int doc_num);
|
955
|
+
int (*max_doc)(Searcher *self);
|
956
|
+
Weight *(*create_weight)(Searcher *self, Query *query);
|
957
|
+
TopDocs *(*search)(Searcher *self, Query *query, int first_doc,
|
958
|
+
int num_docs, Filter *filter, Sort *sort);
|
959
|
+
Query *(*rewrite)(Searcher *self, Query *original);
|
960
|
+
Explanation *(*explain)(Searcher *self, Query *query, int doc_num);
|
961
|
+
Similarity *(*get_similarity)(Searcher *self);
|
962
|
+
void (*close)(Searcher *self);
|
963
|
+
};
|
964
|
+
|
965
|
+
Searcher *sea_create(IndexReader *ir);
|
966
|
+
TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
|
967
|
+
int num_docs, Filter *filter, Sort *sort);
|
968
|
+
void sea_search_each(Searcher *self, Query *query, Filter *filter,
|
969
|
+
void (*fn)(Searcher *self, int doc_num, void *arg), void *arg);
|
970
|
+
Explanation *sea_explain(Searcher *self, Query *query, int doc_num);
|
971
|
+
Similarity *sea_get_similarity(Searcher *self);
|
972
|
+
Query *sea_rewrite(Searcher *self, Query *original);
|
973
|
+
void sea_close(Searcher *self);
|
974
|
+
Document *sea_get_doc(Searcher *self, int doc_num);
|
975
|
+
Weight *sea_create_weight(Searcher *self, Query *query);
|
976
|
+
int sea_doc_freq(Searcher *self, Term *term);
|
977
|
+
|
978
|
+
/***************************************************************************
|
979
|
+
*
|
980
|
+
* QParser
|
981
|
+
*
|
982
|
+
***************************************************************************/
|
983
|
+
|
984
|
+
#define CONC_WORDS 2
|
985
|
+
|
986
|
+
typedef struct QParser {
|
987
|
+
mutex_t mutex;
|
988
|
+
bool or_default : 1;
|
989
|
+
bool wild_lower : 1;
|
990
|
+
bool clean_str : 1;
|
991
|
+
bool handle_parse_errors : 1;
|
992
|
+
bool allow_any_fields : 1;
|
993
|
+
bool close_def_fields : 1;
|
994
|
+
int def_slop;
|
995
|
+
char *qstr;
|
996
|
+
char *qstrp;
|
997
|
+
char buf[CONC_WORDS][MAX_WORD_SIZE];
|
998
|
+
int buf_index;
|
999
|
+
HashSet *fields;
|
1000
|
+
HashSet *fields_buf;
|
1001
|
+
HashSet *def_fields;
|
1002
|
+
HashSet *all_fields;
|
1003
|
+
Analyzer *analyzer;
|
1004
|
+
Query *result;
|
1005
|
+
} QParser;
|
1006
|
+
|
1007
|
+
QParser *qp_create(HashSet *all_fields, HashSet *def_fields, Analyzer *analyzer);
|
1008
|
+
void qp_destroy(void *p);
|
1009
|
+
Query *qp_parse(QParser *self, char *qstr);
|
1010
|
+
char *qp_clean_str(char *str);
|
1011
|
+
|
1012
|
+
/***************************************************************************
|
1013
|
+
*
|
1014
|
+
* Index
|
1015
|
+
*
|
1016
|
+
***************************************************************************/
|
1017
|
+
|
1018
|
+
typedef struct Index {
|
1019
|
+
mutex_t mutex;
|
1020
|
+
Store *store;
|
1021
|
+
Analyzer *analyzer;
|
1022
|
+
IndexReader *ir;
|
1023
|
+
IndexWriter *iw;
|
1024
|
+
Searcher *sea;
|
1025
|
+
QParser *qp;
|
1026
|
+
HashSet *key;
|
1027
|
+
char *id_field;
|
1028
|
+
char *def_field;
|
1029
|
+
bool close_analyzer : 1;
|
1030
|
+
bool close_store : 1;
|
1031
|
+
/* for IndexWriter */
|
1032
|
+
bool use_compound_file : 1;
|
1033
|
+
bool auto_flush : 1;
|
1034
|
+
bool has_writes : 1;
|
1035
|
+
} Index;
|
1036
|
+
|
1037
|
+
Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
|
1038
|
+
bool create);
|
1039
|
+
void index_destroy(Index *self);
|
1040
|
+
void index_flush(Index *self);
|
1041
|
+
int index_size(Index *self);
|
1042
|
+
void index_optimize(Index *self);
|
1043
|
+
bool index_has_del(Index *self);
|
1044
|
+
bool index_is_deleted(Index *self, int doc_num);
|
1045
|
+
void index_add_doc(Index *self, Document *doc);
|
1046
|
+
void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
|
1047
|
+
void index_add_string(Index *self, char *str, Analyzer *analyzer);
|
1048
|
+
void index_add_array(Index *self, Array *ary, Analyzer *analyzer);
|
1049
|
+
TopDocs *index_search_str(Index *self, char *query, int first_doc,
|
1050
|
+
int num_docs, Filter *filter, Sort *sort);
|
1051
|
+
Query *index_get_query(Index *self, char *qstr);
|
1052
|
+
Document *index_get_doc(Index *self, int doc_num);
|
1053
|
+
Document *index_get_doc_ts(Index *self, int doc_num);
|
1054
|
+
Document *index_get_doc_id(Index *self, char *id);
|
1055
|
+
Document *index_get_doc_term(Index *self, Term *term);
|
1056
|
+
void index_delete(Index *self, int doc_num);
|
1057
|
+
void index_delete_term(Index *self, Term *term);
|
1058
|
+
void index_delete_id(Index *self, char *id);
|
1059
|
+
void index_delete_query(Index *self, Query *q, Filter *f);
|
1060
|
+
void index_delete_query_str(Index *self, char *qstr, Filter *f);
|
1061
|
+
int index_term_id(Index *self, Term *term);
|
1062
|
+
Explanation *index_explain(Index *self, Query *q, int doc_num);
|
1063
|
+
void index_auto_flush_ir(Index *self);
|
1064
|
+
void index_auto_flush_iw(Index *self);
|
1065
|
+
#endif
|