sdsykes-ferret 0.11.6.19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (195) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/analysis.c +1555 -0
  9. data/ext/analysis.h +219 -0
  10. data/ext/api.c +69 -0
  11. data/ext/api.h +27 -0
  12. data/ext/array.c +123 -0
  13. data/ext/array.h +53 -0
  14. data/ext/bitvector.c +540 -0
  15. data/ext/bitvector.h +272 -0
  16. data/ext/compound_io.c +383 -0
  17. data/ext/config.h +42 -0
  18. data/ext/document.c +156 -0
  19. data/ext/document.h +53 -0
  20. data/ext/except.c +120 -0
  21. data/ext/except.h +168 -0
  22. data/ext/extconf.rb +14 -0
  23. data/ext/ferret.c +402 -0
  24. data/ext/ferret.h +91 -0
  25. data/ext/filter.c +156 -0
  26. data/ext/fs_store.c +483 -0
  27. data/ext/global.c +418 -0
  28. data/ext/global.h +117 -0
  29. data/ext/hash.c +567 -0
  30. data/ext/hash.h +473 -0
  31. data/ext/hashset.c +170 -0
  32. data/ext/hashset.h +187 -0
  33. data/ext/header.h +58 -0
  34. data/ext/helper.c +62 -0
  35. data/ext/helper.h +13 -0
  36. data/ext/inc/lang.h +48 -0
  37. data/ext/inc/threading.h +31 -0
  38. data/ext/index.c +6425 -0
  39. data/ext/index.h +961 -0
  40. data/ext/lang.h +66 -0
  41. data/ext/libstemmer.c +92 -0
  42. data/ext/libstemmer.h +79 -0
  43. data/ext/mempool.c +87 -0
  44. data/ext/mempool.h +35 -0
  45. data/ext/modules.h +162 -0
  46. data/ext/multimapper.c +310 -0
  47. data/ext/multimapper.h +51 -0
  48. data/ext/posh.c +1006 -0
  49. data/ext/posh.h +1007 -0
  50. data/ext/priorityqueue.c +151 -0
  51. data/ext/priorityqueue.h +143 -0
  52. data/ext/q_boolean.c +1608 -0
  53. data/ext/q_const_score.c +161 -0
  54. data/ext/q_filtered_query.c +209 -0
  55. data/ext/q_fuzzy.c +268 -0
  56. data/ext/q_match_all.c +148 -0
  57. data/ext/q_multi_term.c +677 -0
  58. data/ext/q_parser.c +2825 -0
  59. data/ext/q_phrase.c +1126 -0
  60. data/ext/q_prefix.c +100 -0
  61. data/ext/q_range.c +350 -0
  62. data/ext/q_span.c +2402 -0
  63. data/ext/q_term.c +337 -0
  64. data/ext/q_wildcard.c +171 -0
  65. data/ext/r_analysis.c +2575 -0
  66. data/ext/r_index.c +3472 -0
  67. data/ext/r_qparser.c +585 -0
  68. data/ext/r_search.c +4105 -0
  69. data/ext/r_store.c +513 -0
  70. data/ext/r_utils.c +963 -0
  71. data/ext/ram_store.c +471 -0
  72. data/ext/search.c +1741 -0
  73. data/ext/search.h +885 -0
  74. data/ext/similarity.c +150 -0
  75. data/ext/similarity.h +82 -0
  76. data/ext/sort.c +983 -0
  77. data/ext/stem_ISO_8859_1_danish.c +338 -0
  78. data/ext/stem_ISO_8859_1_danish.h +16 -0
  79. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  80. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  81. data/ext/stem_ISO_8859_1_english.c +1156 -0
  82. data/ext/stem_ISO_8859_1_english.h +16 -0
  83. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  84. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  85. data/ext/stem_ISO_8859_1_french.c +1276 -0
  86. data/ext/stem_ISO_8859_1_french.h +16 -0
  87. data/ext/stem_ISO_8859_1_german.c +512 -0
  88. data/ext/stem_ISO_8859_1_german.h +16 -0
  89. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  90. data/ext/stem_ISO_8859_1_italian.h +16 -0
  91. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  92. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  93. data/ext/stem_ISO_8859_1_porter.c +776 -0
  94. data/ext/stem_ISO_8859_1_porter.h +16 -0
  95. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  96. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  97. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  98. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  99. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  100. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  101. data/ext/stem_KOI8_R_russian.c +701 -0
  102. data/ext/stem_KOI8_R_russian.h +16 -0
  103. data/ext/stem_UTF_8_danish.c +344 -0
  104. data/ext/stem_UTF_8_danish.h +16 -0
  105. data/ext/stem_UTF_8_dutch.c +653 -0
  106. data/ext/stem_UTF_8_dutch.h +16 -0
  107. data/ext/stem_UTF_8_english.c +1176 -0
  108. data/ext/stem_UTF_8_english.h +16 -0
  109. data/ext/stem_UTF_8_finnish.c +808 -0
  110. data/ext/stem_UTF_8_finnish.h +16 -0
  111. data/ext/stem_UTF_8_french.c +1296 -0
  112. data/ext/stem_UTF_8_french.h +16 -0
  113. data/ext/stem_UTF_8_german.c +526 -0
  114. data/ext/stem_UTF_8_german.h +16 -0
  115. data/ext/stem_UTF_8_italian.c +1113 -0
  116. data/ext/stem_UTF_8_italian.h +16 -0
  117. data/ext/stem_UTF_8_norwegian.c +302 -0
  118. data/ext/stem_UTF_8_norwegian.h +16 -0
  119. data/ext/stem_UTF_8_porter.c +794 -0
  120. data/ext/stem_UTF_8_porter.h +16 -0
  121. data/ext/stem_UTF_8_portuguese.c +1055 -0
  122. data/ext/stem_UTF_8_portuguese.h +16 -0
  123. data/ext/stem_UTF_8_russian.c +709 -0
  124. data/ext/stem_UTF_8_russian.h +16 -0
  125. data/ext/stem_UTF_8_spanish.c +1137 -0
  126. data/ext/stem_UTF_8_spanish.h +16 -0
  127. data/ext/stem_UTF_8_swedish.c +313 -0
  128. data/ext/stem_UTF_8_swedish.h +16 -0
  129. data/ext/stopwords.c +401 -0
  130. data/ext/store.c +692 -0
  131. data/ext/store.h +777 -0
  132. data/ext/term_vectors.c +352 -0
  133. data/ext/threading.h +31 -0
  134. data/ext/utilities.c +446 -0
  135. data/ext/win32.h +54 -0
  136. data/lib/ferret.rb +29 -0
  137. data/lib/ferret/browser.rb +246 -0
  138. data/lib/ferret/browser/s/global.js +192 -0
  139. data/lib/ferret/browser/s/style.css +148 -0
  140. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  141. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  142. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  143. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  144. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  145. data/lib/ferret/browser/views/layout.rhtml +22 -0
  146. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  147. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  148. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  149. data/lib/ferret/browser/webrick.rb +14 -0
  150. data/lib/ferret/document.rb +130 -0
  151. data/lib/ferret/field_infos.rb +44 -0
  152. data/lib/ferret/index.rb +786 -0
  153. data/lib/ferret/number_tools.rb +157 -0
  154. data/lib/ferret_version.rb +3 -0
  155. data/setup.rb +1555 -0
  156. data/test/test_all.rb +5 -0
  157. data/test/test_helper.rb +24 -0
  158. data/test/threading/number_to_spoken.rb +132 -0
  159. data/test/threading/thread_safety_index_test.rb +79 -0
  160. data/test/threading/thread_safety_read_write_test.rb +76 -0
  161. data/test/threading/thread_safety_test.rb +133 -0
  162. data/test/unit/analysis/tc_analyzer.rb +548 -0
  163. data/test/unit/analysis/tc_token_stream.rb +646 -0
  164. data/test/unit/index/tc_index.rb +762 -0
  165. data/test/unit/index/tc_index_reader.rb +699 -0
  166. data/test/unit/index/tc_index_writer.rb +437 -0
  167. data/test/unit/index/th_doc.rb +315 -0
  168. data/test/unit/largefile/tc_largefile.rb +46 -0
  169. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  170. data/test/unit/search/tc_filter.rb +135 -0
  171. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  172. data/test/unit/search/tc_index_searcher.rb +61 -0
  173. data/test/unit/search/tc_multi_searcher.rb +128 -0
  174. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  175. data/test/unit/search/tc_search_and_sort.rb +179 -0
  176. data/test/unit/search/tc_sort.rb +49 -0
  177. data/test/unit/search/tc_sort_field.rb +27 -0
  178. data/test/unit/search/tc_spans.rb +190 -0
  179. data/test/unit/search/tm_searcher.rb +384 -0
  180. data/test/unit/store/tc_fs_store.rb +77 -0
  181. data/test/unit/store/tc_ram_store.rb +35 -0
  182. data/test/unit/store/tm_store.rb +34 -0
  183. data/test/unit/store/tm_store_lock.rb +68 -0
  184. data/test/unit/tc_document.rb +81 -0
  185. data/test/unit/ts_analysis.rb +2 -0
  186. data/test/unit/ts_index.rb +2 -0
  187. data/test/unit/ts_largefile.rb +4 -0
  188. data/test/unit/ts_query_parser.rb +2 -0
  189. data/test/unit/ts_search.rb +2 -0
  190. data/test/unit/ts_store.rb +2 -0
  191. data/test/unit/ts_utils.rb +2 -0
  192. data/test/unit/utils/tc_bit_vector.rb +295 -0
  193. data/test/unit/utils/tc_number_tools.rb +117 -0
  194. data/test/unit/utils/tc_priority_queue.rb +106 -0
  195. metadata +285 -0
@@ -0,0 +1,885 @@
1
+ #ifndef FRT_SEARCH_H
2
+ #define FRT_SEARCH_H
3
+
4
+ typedef struct Query Query;
5
+ typedef struct Weight Weight;
6
+ typedef struct Scorer Scorer;
7
+
8
+ #include "index.h"
9
+ #include "bitvector.h"
10
+ #include "similarity.h"
11
+
12
+ /***************************************************************************
13
+ *
14
+ * Explanation
15
+ *
16
+ ***************************************************************************/
17
+
18
+ #define EXPLANATION_DETAILS_START_SIZE 4
19
+ typedef struct Explanation
20
+ {
21
+ float value;
22
+ char *description;
23
+ struct Explanation **details;
24
+ } Explanation;
25
+
26
+ extern Explanation *expl_new(float value, const char *description, ...);
27
+ extern void expl_destroy(Explanation *expl);
28
+ extern Explanation *expl_add_detail(Explanation *expl, Explanation *detail);
29
+ extern char *expl_to_s_depth(Explanation *expl, int depth);
30
+ extern char *expl_to_html(Explanation *expl);
31
+
32
+ #define expl_to_s(expl) expl_to_s_depth(expl, 0)
33
+
34
+ /***************************************************************************
35
+ *
36
+ * Highlighter
37
+ *
38
+ ***************************************************************************/
39
+
40
+ typedef struct MatchRange
41
+ {
42
+ int start;
43
+ int end;
44
+ int start_offset;
45
+ int end_offset;
46
+ double score;
47
+ } MatchRange;
48
+
49
+ #define MATCH_VECTOR_INIT_CAPA 8
50
+ typedef struct MatchVector
51
+ {
52
+ int size;
53
+ int capa;
54
+ MatchRange *matches;
55
+ } MatchVector;
56
+
57
+ extern MatchVector *matchv_new();
58
+ extern MatchVector *matchv_add(MatchVector *mp, int start, int end);
59
+ extern MatchVector *matchv_sort(MatchVector *self);
60
+ extern void matchv_destroy(MatchVector *self);
61
+ extern MatchVector *matchv_compact(MatchVector *self);
62
+ extern MatchVector *matchv_compact_with_breaks(MatchVector *self);
63
+
64
+ /***************************************************************************
65
+ *
66
+ * Hit
67
+ *
68
+ ***************************************************************************/
69
+
70
+ typedef struct Hit
71
+ {
72
+ int doc;
73
+ float score;
74
+ } Hit;
75
+
76
+ /***************************************************************************
77
+ *
78
+ * TopDocs
79
+ *
80
+ ***************************************************************************/
81
+
82
+ typedef struct TopDocs
83
+ {
84
+ int total_hits;
85
+ int size;
86
+ Hit **hits;
87
+ float max_score;
88
+ } TopDocs;
89
+
90
+ extern TopDocs *td_new(int total_hits, int size, Hit **hits, float max_score);
91
+ extern void td_destroy(TopDocs *td);
92
+ extern char *td_to_s(TopDocs *td);
93
+
94
+ /***************************************************************************
95
+ *
96
+ * Filter
97
+ *
98
+ ***************************************************************************/
99
+
100
+ typedef struct Filter
101
+ {
102
+ char *name;
103
+ HashTable *cache;
104
+ BitVector *(*get_bv_i)(struct Filter *self, IndexReader *ir);
105
+ char *(*to_s)(struct Filter *self);
106
+ unsigned long (*hash)(struct Filter *self);
107
+ int (*eq)(struct Filter *self, struct Filter *o);
108
+ void (*destroy_i)(struct Filter *self);
109
+ int ref_cnt;
110
+ } Filter;
111
+
112
+ #define filt_new(type) filt_create(sizeof(type), #type)
113
+ extern Filter *filt_create(size_t size, const char *name);
114
+ extern BitVector *filt_get_bv(Filter *filt, IndexReader *ir);
115
+ extern void filt_destroy_i(Filter *filt);
116
+ extern void filt_deref(Filter *filt);
117
+ extern unsigned long filt_hash(Filter *filt);
118
+ extern int filt_eq(Filter *filt, Filter *o);
119
+
120
+ /***************************************************************************
121
+ *
122
+ * RangeFilter
123
+ *
124
+ ***************************************************************************/
125
+
126
+ extern Filter *rfilt_new(const char *field,
127
+ const char *lower_term, const char *upper_term,
128
+ bool include_lower, bool include_upper);
129
+
130
+ /***************************************************************************
131
+ *
132
+ * QueryFilter
133
+ *
134
+ ***************************************************************************/
135
+
136
+ extern Filter *qfilt_new(Query *query);
137
+ extern Filter *qfilt_new_nr(Query *query);
138
+
139
+ /***************************************************************************
140
+ *
141
+ * Weight
142
+ *
143
+ ***************************************************************************/
144
+
145
+ struct Weight
146
+ {
147
+ float value;
148
+ float qweight;
149
+ float qnorm;
150
+ float idf;
151
+ Query *query;
152
+ Similarity *similarity;
153
+ Query *(*get_query)(Weight *self);
154
+ float (*get_value)(Weight *self);
155
+ void (*normalize)(Weight *self, float normalization_factor);
156
+ Scorer *(*scorer)(Weight *self, IndexReader *ir);
157
+ Explanation *(*explain)(Weight *self, IndexReader *ir, int doc_num);
158
+ float (*sum_of_squared_weights)(Weight *self);
159
+ char *(*to_s)(Weight *self);
160
+ void (*destroy)(Weight *self);
161
+ };
162
+
163
+ #define w_new(type, query) w_create(sizeof(type), query)
164
+ extern Weight *w_create(size_t size, Query *query);
165
+ extern void w_destroy(Weight *self);
166
+ extern Query *w_get_query(Weight *self);
167
+ extern float w_get_value(Weight *self);
168
+ extern float w_sum_of_squared_weights(Weight *self);
169
+ extern void w_normalize(Weight *self, float normalization_factor);
170
+
171
+ /***************************************************************************
172
+ *
173
+ * Query
174
+ *
175
+ ***************************************************************************/
176
+
177
+ enum QUERY_TYPE
178
+ {
179
+ TERM_QUERY,
180
+ MULTI_TERM_QUERY,
181
+ BOOLEAN_QUERY,
182
+ PHRASE_QUERY,
183
+ CONSTANT_QUERY,
184
+ FILTERED_QUERY,
185
+ MATCH_ALL_QUERY,
186
+ RANGE_QUERY,
187
+ WILD_CARD_QUERY,
188
+ FUZZY_QUERY,
189
+ PREFIX_QUERY,
190
+ SPAN_TERM_QUERY,
191
+ SPAN_MULTI_TERM_QUERY,
192
+ SPAN_PREFIX_QUERY,
193
+ SPAN_FIRST_QUERY,
194
+ SPAN_OR_QUERY,
195
+ SPAN_NOT_QUERY,
196
+ SPAN_NEAR_QUERY
197
+ };
198
+
199
+ struct Query
200
+ {
201
+ int ref_cnt;
202
+ float boost;
203
+ Weight *weight;
204
+ Query *(*rewrite)(Query *self, IndexReader *ir);
205
+ void (*extract_terms)(Query *self, HashSet *terms);
206
+ Similarity *(*get_similarity)(Query *self, Searcher *searcher);
207
+ char *(*to_s)(Query *self, const char *field);
208
+ unsigned long (*hash)(Query *self);
209
+ int (*eq)(Query *self, Query *o);
210
+ void (*destroy_i)(Query *self);
211
+ Weight *(*create_weight_i)(Query *self, Searcher *searcher);
212
+ MatchVector *(*get_matchv_i)(Query *self, MatchVector *mv, TermVector *tv);
213
+ enum QUERY_TYPE type;
214
+ };
215
+
216
+ /* Internal Query Functions */
217
+ extern Similarity *q_get_similarity_i(Query *self, Searcher *searcher);
218
+ extern void q_destroy_i(Query *self);
219
+ extern Weight *q_create_weight_unsup(Query *self, Searcher *searcher);
220
+
221
+ extern void q_deref(Query *self);
222
+ extern const char *q_get_query_name(enum QUERY_TYPE type);
223
+ extern Weight *q_weight(Query *self, Searcher *searcher);
224
+ extern Query *q_combine(Query **queries, int q_cnt);
225
+ extern unsigned long q_hash(Query *self);
226
+ extern int q_eq(Query *self, Query *o);
227
+ extern Query *q_create(size_t size);
228
+ #define q_new(type) q_create(sizeof(type))
229
+
230
+ /***************************************************************************
231
+ * TermQuery
232
+ ***************************************************************************/
233
+
234
+ typedef struct TermQuery
235
+ {
236
+ Query super;
237
+ char *field;
238
+ char *term;
239
+ } TermQuery;
240
+
241
+ Query *tq_new(const char *field, const char *term);
242
+
243
+ /***************************************************************************
244
+ * BooleanQuery
245
+ ***************************************************************************/
246
+
247
+ /* *** BooleanClause *** */
248
+
249
+ enum BC_TYPE
250
+ {
251
+ BC_SHOULD,
252
+ BC_MUST,
253
+ BC_MUST_NOT
254
+ };
255
+
256
+ typedef struct BooleanClause
257
+ {
258
+ int ref_cnt;
259
+ Query *query;
260
+ unsigned int occur : 4;
261
+ bool is_prohibited : 1;
262
+ bool is_required : 1;
263
+ } BooleanClause;
264
+
265
+ extern BooleanClause *bc_new(Query *query, enum BC_TYPE occur);
266
+ extern void bc_deref(BooleanClause *self);
267
+ extern void bc_set_occur(BooleanClause *self, enum BC_TYPE occur);
268
+
269
+ /* *** BooleanQuery *** */
270
+
271
+ #define DEFAULT_MAX_CLAUSE_COUNT 1024
272
+ #define BOOLEAN_CLAUSES_START_CAPA 4
273
+ #define QUERY_STRING_START_SIZE 64
274
+
275
+ typedef struct BooleanQuery
276
+ {
277
+ Query super;
278
+ bool coord_disabled;
279
+ int max_clause_cnt;
280
+ int clause_cnt;
281
+ int clause_capa;
282
+ float original_boost;
283
+ BooleanClause **clauses;
284
+ Similarity *similarity;
285
+ } BooleanQuery;
286
+
287
+ extern Query *bq_new(bool coord_disabled);
288
+ extern Query *bq_new_max(bool coord_disabled, int max);
289
+ extern BooleanClause *bq_add_query(Query *self, Query *sub_query,
290
+ enum BC_TYPE occur);
291
+ extern BooleanClause *bq_add_query_nr(Query *self, Query *sub_query,
292
+ enum BC_TYPE occur);
293
+ extern BooleanClause *bq_add_clause(Query *self, BooleanClause *bc);
294
+ extern BooleanClause *bq_add_clause_nr(Query *self, BooleanClause *bc);
295
+
296
+ /***************************************************************************
297
+ * PhraseQuery
298
+ ***************************************************************************/
299
+
300
+ #define PHQ_INIT_CAPA 4
301
+ typedef struct PhraseQuery
302
+ {
303
+ Query super;
304
+ int slop;
305
+ char *field;
306
+ PhrasePosition *positions;
307
+ int pos_cnt;
308
+ int pos_capa;
309
+ } PhraseQuery;
310
+
311
+ extern Query *phq_new(const char *field);
312
+ extern void phq_add_term(Query *self, const char *term, int pos_inc);
313
+ extern void phq_add_term_abs(Query *self, const char *term, int position);
314
+ extern void phq_append_multi_term(Query *self, const char *term);
315
+
316
+ /***************************************************************************
317
+ * MultiTermQuery
318
+ ***************************************************************************/
319
+
320
+ #define MULTI_TERM_QUERY_MAX_TERMS 256
321
+ typedef struct MultiTermQuery
322
+ {
323
+ Query super;
324
+ char *field;
325
+ PriorityQueue *boosted_terms;
326
+ float min_boost;
327
+ } MultiTermQuery;
328
+
329
+ extern void multi_tq_add_term(Query *self, const char *term);
330
+ extern void multi_tq_add_term_boost(Query *self, const char *term, float boost);
331
+ extern Query *multi_tq_new(const char *field);
332
+ extern Query *multi_tq_new_conf(const char *field, int max_terms,
333
+ float min_boost);
334
+
335
+ #define MTQMaxTerms(query) (((MTQSubQuery *)(query))->max_terms)
336
+ typedef struct MTQSubQuery
337
+ {
338
+ Query super;
339
+ int max_terms;
340
+ } MTQSubQuery;
341
+
342
+ /***************************************************************************
343
+ * PrefixQuery
344
+ ***************************************************************************/
345
+
346
+ #define PREFIX_QUERY_MAX_TERMS 256
347
+
348
+ typedef struct PrefixQuery
349
+ {
350
+ MTQSubQuery super;
351
+ char *field;
352
+ char *prefix;
353
+ } PrefixQuery;
354
+
355
+ extern Query *prefixq_new(const char *field, const char *prefix);
356
+
357
+ /***************************************************************************
358
+ * WildCardQuery
359
+ ***************************************************************************/
360
+
361
+ #define WILD_CHAR '?'
362
+ #define WILD_STRING '*'
363
+ #define WILD_CARD_QUERY_MAX_TERMS 256
364
+
365
+ typedef struct WildCardQuery
366
+ {
367
+ MTQSubQuery super;
368
+ char *field;
369
+ char *pattern;
370
+ } WildCardQuery;
371
+
372
+
373
+ extern Query *wcq_new(const char *field, const char *pattern);
374
+ extern bool wc_match(const char *pattern, const char *text);
375
+
376
+ /***************************************************************************
377
+ * FuzzyQuery
378
+ ***************************************************************************/
379
+
380
+ #define DEF_MIN_SIM 0.5f
381
+ #define DEF_PRE_LEN 0
382
+ #define DEF_MAX_TERMS 256
383
+ #define TYPICAL_LONGEST_WORD 20
384
+
385
+ typedef struct FuzzyQuery
386
+ {
387
+ MTQSubQuery super;
388
+ char *field;
389
+ char *term;
390
+ const char *text; /* term text after prefix */
391
+ int text_len;
392
+ int pre_len;
393
+ float min_sim;
394
+ float scale_factor;
395
+ int max_distances[TYPICAL_LONGEST_WORD];
396
+ int *da;
397
+ } FuzzyQuery;
398
+
399
+ extern Query *fuzq_new(const char *term, const char *field);
400
+ extern Query *fuzq_new_conf(const char *field, const char *term,
401
+ float min_sim, int pre_len, int max_terms);
402
+
403
+ /***************************************************************************
404
+ * ConstantScoreQuery
405
+ ***************************************************************************/
406
+
407
+ typedef struct ConstantScoreQuery
408
+ {
409
+ Query super;
410
+ Filter *filter;
411
+ Query *original;
412
+ } ConstantScoreQuery;
413
+
414
+ extern Query *csq_new(Filter *filter);
415
+ extern Query *csq_new_nr(Filter *filter);
416
+
417
+ /***************************************************************************
418
+ * FilteredQuery
419
+ ***************************************************************************/
420
+
421
+ typedef struct FilteredQuery
422
+ {
423
+ Query super;
424
+ Query *query;
425
+ Filter *filter;
426
+ } FilteredQuery;
427
+
428
+ extern Query *fq_new(Query *query, Filter *filter);
429
+
430
+ /***************************************************************************
431
+ * MatchAllQuery
432
+ ***************************************************************************/
433
+
434
+ extern Query *maq_new();
435
+
436
+ /***************************************************************************
437
+ * RangeQuery
438
+ ***************************************************************************/
439
+
440
+ extern Query *rq_new(const char *field, const char *lower_term,
441
+ const char *upper_term, bool include_lower,
442
+ bool include_upper);
443
+ extern Query *rq_new_less(const char *field, const char *upper_term,
444
+ bool include_upper);
445
+ extern Query *rq_new_more(const char *field, const char *lower_term,
446
+ bool include_lower);
447
+
448
+ /***************************************************************************
449
+ * SpanQuery
450
+ ***************************************************************************/
451
+
452
+ /* ** SpanEnum ** */
453
+ typedef struct SpanEnum SpanEnum;
454
+ struct SpanEnum
455
+ {
456
+ Query *query;
457
+ bool (*next)(SpanEnum *self);
458
+ bool (*skip_to)(SpanEnum *self, int target_doc);
459
+ int (*doc)(SpanEnum *self);
460
+ int (*start)(SpanEnum *self);
461
+ int (*end)(SpanEnum *self);
462
+ char *(*to_s)(SpanEnum *self);
463
+ void (*destroy)(SpanEnum *self);
464
+ };
465
+
466
+ /* ** SpanQuery ** */
467
+ typedef struct SpanQuery
468
+ {
469
+ Query super;
470
+ char *field;
471
+ SpanEnum *(*get_spans)(Query *self, IndexReader *ir);
472
+ HashSet *(*get_terms)(Query *self);
473
+ } SpanQuery;
474
+
475
+ /***************************************************************************
476
+ * SpanTermQuery
477
+ ***************************************************************************/
478
+
479
+ typedef struct SpanTermQuery
480
+ {
481
+ SpanQuery super;
482
+ char *term;
483
+ } SpanTermQuery;
484
+ extern Query *spantq_new(const char *field, const char *term);
485
+
486
+ /***************************************************************************
487
+ * SpanMultiTermQuery
488
+ ***************************************************************************/
489
+
490
+ #define SPAN_MULTI_TERM_QUERY_CAPA 1024
491
+ typedef struct SpanMultiTermQuery
492
+ {
493
+ SpanQuery super;
494
+ char **terms;
495
+ int term_cnt;
496
+ int term_capa;
497
+ } SpanMultiTermQuery;
498
+
499
+ extern Query *spanmtq_new(const char *field);
500
+ extern Query *spanmtq_new_conf(const char *field, int max_size);
501
+ extern void spanmtq_add_term(Query *self, const char *term);
502
+
503
+ /***************************************************************************
504
+ * SpanFirstQuery
505
+ ***************************************************************************/
506
+
507
+ typedef struct SpanFirstQuery
508
+ {
509
+ SpanQuery super;
510
+ int end;
511
+ Query *match;
512
+ } SpanFirstQuery;
513
+
514
+ extern Query *spanfq_new(Query *match, int end);
515
+ extern Query *spanfq_new_nr(Query *match, int end);
516
+
517
+ /***************************************************************************
518
+ * SpanOrQuery
519
+ ***************************************************************************/
520
+
521
+ typedef struct SpanOrQuery
522
+ {
523
+ SpanQuery super;
524
+ Query **clauses;
525
+ int c_cnt;
526
+ int c_capa;
527
+ } SpanOrQuery;
528
+
529
+ extern Query *spanoq_new();
530
+ extern Query *spanoq_add_clause(Query *self, Query *clause);
531
+ extern Query *spanoq_add_clause_nr(Query *self, Query *clause);
532
+
533
+ /***************************************************************************
534
+ * SpanNearQuery
535
+ ***************************************************************************/
536
+
537
+ typedef struct SpanNearQuery
538
+ {
539
+ SpanQuery super;
540
+ Query **clauses;
541
+ int c_cnt;
542
+ int c_capa;
543
+ int slop;
544
+ bool in_order : 1;
545
+ } SpanNearQuery;
546
+
547
+ extern Query *spannq_new(int slop, bool in_order);
548
+ extern Query *spannq_add_clause(Query *self, Query *clause);
549
+ extern Query *spannq_add_clause_nr(Query *self, Query *clause);
550
+
551
+ /***************************************************************************
552
+ * SpanNotQuery
553
+ ***************************************************************************/
554
+
555
+ typedef struct SpanNotQuery
556
+ {
557
+ SpanQuery super;
558
+ Query *inc;
559
+ Query *exc;
560
+ } SpanNotQuery;
561
+
562
+ extern Query *spanxq_new(Query *inc, Query *exc);
563
+ extern Query *spanxq_new_nr(Query *inc, Query *exc);
564
+
565
+
566
+ /***************************************************************************
567
+ * SpanPrefixQuery
568
+ ***************************************************************************/
569
+
570
+ #define SPAN_PREFIX_QUERY_MAX_TERMS 256
571
+
572
+ typedef struct SpanPrefixQuery
573
+ {
574
+ SpanQuery super;
575
+ char *prefix;
576
+ int max_terms;
577
+ } SpanPrefixQuery;
578
+
579
+ extern Query *spanprq_new(const char *field, const char *prefix);
580
+
581
+
582
+ /***************************************************************************
583
+ *
584
+ * Scorer
585
+ *
586
+ ***************************************************************************/
587
+
588
+ #define SCORER_NULLIFY(mscorer) do {\
589
+ (mscorer)->destroy(mscorer);\
590
+ (mscorer) = NULL;\
591
+ } while (0)
592
+
593
+ struct Scorer
594
+ {
595
+ Similarity *similarity;
596
+ int doc;
597
+ float (*score)(Scorer *self);
598
+ bool (*next)(Scorer *self);
599
+ bool (*skip_to)(Scorer *self, int doc_num);
600
+ Explanation *(*explain)(Scorer *self, int doc_num);
601
+ void (*destroy)(Scorer *self);
602
+ };
603
+
604
+ #define scorer_new(type, similarity) scorer_create(sizeof(type), similarity)
605
+ /* Internal Scorer Function */
606
+ extern void scorer_destroy_i(Scorer *self);
607
+ extern Scorer *scorer_create(size_t size, Similarity *similarity);
608
+ extern bool scorer_less_than(void *p1, void *p2);
609
+ extern bool scorer_doc_less_than(const Scorer *s1, const Scorer *s2);
610
+ extern int scorer_doc_cmp(const void *p1, const void *p2);
611
+
612
+ /***************************************************************************
613
+ *
614
+ * Sort
615
+ *
616
+ ***************************************************************************/
617
+
618
+ enum SORT_TYPE
619
+ {
620
+ SORT_TYPE_SCORE,
621
+ SORT_TYPE_DOC,
622
+ SORT_TYPE_BYTE,
623
+ SORT_TYPE_INTEGER,
624
+ SORT_TYPE_FLOAT,
625
+ SORT_TYPE_STRING,
626
+ SORT_TYPE_AUTO
627
+ };
628
+
629
+ /***************************************************************************
630
+ * Comparable
631
+ ***************************************************************************/
632
+
633
+ typedef struct Comparable
634
+ {
635
+ int type;
636
+ union {
637
+ int i;
638
+ float f;
639
+ char *s;
640
+ void *p;
641
+ } val;
642
+ bool reverse : 1;
643
+ } Comparable;
644
+
645
+ /***************************************************************************
646
+ * SortField
647
+ ***************************************************************************/
648
+
649
+ typedef struct SortField
650
+ {
651
+ mutex_t mutex;
652
+ char *field;
653
+ enum SORT_TYPE type;
654
+ bool reverse : 1;
655
+ void *index;
656
+ int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
657
+ void (*get_val)(void *index_ptr, Hit *hit1, Comparable *comparable);
658
+ void *(*create_index)(int size);
659
+ void (*destroy_index)(void *p);
660
+ void (*handle_term)(void *index, TermDocEnum *tde, char *text);
661
+ } SortField;
662
+
663
+ extern SortField *sort_field_new(char *field, enum SORT_TYPE type, bool reverse);
664
+ extern SortField *sort_field_score_new(bool reverse);
665
+ extern SortField *sort_field_doc_new(bool reverse);
666
+ extern SortField *sort_field_int_new(char *field, bool reverse);
667
+ extern SortField *sort_field_byte_new(char *field, bool reverse);
668
+ extern SortField *sort_field_float_new(char *field, bool reverse);
669
+ extern SortField *sort_field_string_new(char *field, bool reverse);
670
+ extern SortField *sort_field_auto_new(char *field, bool reverse);
671
+ extern void sort_field_destroy(void *p);
672
+ extern char *sort_field_to_s(SortField *self);
673
+
674
+ extern const SortField SORT_FIELD_SCORE;
675
+ extern const SortField SORT_FIELD_SCORE_REV;
676
+ extern const SortField SORT_FIELD_DOC;
677
+ extern const SortField SORT_FIELD_DOC_REV;
678
+
679
+ /***************************************************************************
680
+ * Sort
681
+ ***************************************************************************/
682
+
683
+ typedef struct Sort
684
+ {
685
+ SortField **sort_fields;
686
+ int size;
687
+ int capa;
688
+ int start;
689
+ bool destroy_all : 1;
690
+ } Sort;
691
+
692
+ extern Sort *sort_new();
693
+ extern void sort_destroy(void *p);
694
+ extern void sort_add_sort_field(Sort *self, SortField *sf);
695
+ extern void sort_clear(Sort *self);
696
+ extern char *sort_to_s(Sort *self);
697
+
698
+ /***************************************************************************
699
+ * FieldSortedHitQueue
700
+ ***************************************************************************/
701
+
702
+ extern Hit *fshq_pq_pop(PriorityQueue *pq);
703
+ extern void fshq_pq_down(PriorityQueue *pq);
704
+ extern void fshq_pq_insert(PriorityQueue *pq, Hit *hit);
705
+ extern void fshq_pq_destroy(PriorityQueue *pq);
706
+ extern PriorityQueue *fshq_pq_new(int size, Sort *sort, IndexReader *ir);
707
+ extern Hit *fshq_pq_pop_fd(PriorityQueue *pq);
708
+
709
+ /***************************************************************************
710
+ * FieldDoc
711
+ ***************************************************************************/
712
+
713
+ typedef struct FieldDoc
714
+ {
715
+ Hit hit;
716
+ int size;
717
+ Comparable comparables[1];
718
+ } FieldDoc;
719
+
720
+ extern void fd_destroy(FieldDoc *fd);
721
+
722
+ /***************************************************************************
723
+ * FieldDocSortedHitQueue
724
+ ***************************************************************************/
725
+
726
+ extern bool fdshq_lt(FieldDoc *fd1, FieldDoc *fd2);
727
+
728
+ /***************************************************************************
729
+ *
730
+ * Searcher
731
+ *
732
+ ***************************************************************************/
733
+
734
+ typedef bool (*filter_ft)(int doc_num, float score, Searcher *self);
735
+
736
+ struct Searcher
737
+ {
738
+ Similarity *similarity;
739
+ int (*doc_freq)(Searcher *self, const char *field,
740
+ const char *term);
741
+ Document *(*get_doc)(Searcher *self, int doc_num);
742
+ LazyDoc *(*get_lazy_doc)(Searcher *self, int doc_num);
743
+ int (*max_doc)(Searcher *self);
744
+ Weight *(*create_weight)(Searcher *self, Query *query);
745
+ TopDocs *(*search)(Searcher *self, Query *query, int first_doc,
746
+ int num_docs, Filter *filter, Sort *sort,
747
+ filter_ft filter_func,
748
+ bool load_fields);
749
+ TopDocs *(*search_w)(Searcher *self, Weight *weight, int first_doc,
750
+ int num_docs, Filter *filter, Sort *sort,
751
+ filter_ft filter_func,
752
+ bool load_fields);
753
+ void (*search_each)(Searcher *self, Query *query, Filter *filter,
754
+ filter_ft filter_func,
755
+ void (*fn)(Searcher *, int, float, void *),
756
+ void *arg);
757
+ void (*search_each_w)(Searcher *self, Weight *weight,
758
+ Filter *filter,
759
+ filter_ft filter_func,
760
+ void (*fn)(Searcher *, int, float, void *),
761
+ void *arg);
762
+ Query *(*rewrite)(Searcher *self, Query *original);
763
+ Explanation *(*explain)(Searcher *self, Query *query, int doc_num);
764
+ Explanation *(*explain_w)(Searcher *self, Weight *weight, int doc_num);
765
+ TermVector *(*get_term_vector)(Searcher *self, const int doc_num,
766
+ const char *field);
767
+ Similarity *(*get_similarity)(Searcher *self);
768
+ void (*close)(Searcher *self);
769
+ void *arg; /* used to pass values to Searcher functions */
770
+ };
771
+
772
+ #define searcher_doc_freq(s, t) s->doc_freq(s, t)
773
+ #define searcher_get_doc(s, dn) s->get_doc(s, dn)
774
+ #define searcher_get_lazy_doc(s, dn) s->get_lazy_doc(s, dn)
775
+ #define searcher_max_doc(s) s->max_doc(s)
776
+ #define searcher_rewrite(s, q) s->rewrite(s, q)
777
+ #define searcher_explain(s, q, dn) s->explain(s, q, dn)
778
+ #define searcher_explain_w(s, q, dn) s->explain_w(s, q, dn)
779
+ #define searcher_get_similarity(s) s->get_similarity(s)
780
+ #define searcher_close(s) s->close(s)
781
+ #define searcher_search(s, q, fd, nd, filt, sort, ff)\
782
+ s->search(s, q, fd, nd, filt, sort, ff, false)
783
+ #define searcher_search_fd(s, q, fd, nd, filt, sort, ff)\
784
+ s->search(s, q, fd, nd, filt, sort, ff, true)
785
+ #define searcher_search_each(s, q, filt, ff, fn, arg)\
786
+ s->search_each(s, q, filt, ff, fn, arg)
787
+ #define searcher_search_each_w(s, q, filt, ff, fn, arg)\
788
+ s->search_each_w(s, q, filt, ff, fn, arg)
789
+
790
+
791
+ extern MatchVector *searcher_get_match_vector(Searcher *self,
792
+ Query *query,
793
+ const int doc_num,
794
+ const char *field);
795
+ extern char **searcher_highlight(Searcher *self,
796
+ Query *query,
797
+ const int doc_num,
798
+ const char *field,
799
+ const int excerpt_len,
800
+ const int num_excerpts,
801
+ const char *pre_tag,
802
+ const char *post_tag,
803
+ const char *ellipsis);
804
+
805
+ /***************************************************************************
806
+ *
807
+ * IndexSearcher
808
+ *
809
+ ***************************************************************************/
810
+
811
+ typedef struct IndexSearcher {
812
+ Searcher super;
813
+ IndexReader *ir;
814
+ bool close_ir : 1;
815
+ } IndexSearcher;
816
+
817
+ extern Searcher *isea_new(IndexReader *ir);
818
+ extern int isea_doc_freq(Searcher *self, const char *field, const char *term);
819
+
820
+ /***************************************************************************
821
+ *
822
+ * MultiSearcher
823
+ *
824
+ ***************************************************************************/
825
+
826
+ typedef struct MultiSearcher
827
+ {
828
+ Searcher super;
829
+ int s_cnt;
830
+ Searcher **searchers;
831
+ int *starts;
832
+ int max_doc;
833
+ bool close_subs : 1;
834
+ } MultiSearcher;
835
+
836
+ extern Searcher *msea_new(Searcher **searchers, int s_cnt, bool close_subs);
837
+
838
+ /***************************************************************************
839
+ *
840
+ * QParser
841
+ *
842
+ ***************************************************************************/
843
+
844
+ #define QP_CONC_WORDS 2
845
+ #define QP_MAX_CLAUSES 512
846
+
847
+ typedef struct QParser
848
+ {
849
+ mutex_t mutex;
850
+ int def_slop;
851
+ int max_clauses;
852
+ int phq_pos_inc;
853
+ char *qstr;
854
+ char *qstrp;
855
+ char buf[QP_CONC_WORDS][MAX_WORD_SIZE];
856
+ char *dynbuf;
857
+ int buf_index;
858
+ HashTable *field_cache;
859
+ HashSet *fields;
860
+ HashSet *fields_buf;
861
+ HashSet *def_fields;
862
+ HashSet *all_fields;
863
+ HashSet *tokenized_fields;
864
+ Analyzer *analyzer;
865
+ HashTable *ts_cache;
866
+ Query *result;
867
+ TokenStream *non_tokenizer;
868
+ bool or_default : 1;
869
+ bool wild_lower : 1;
870
+ bool clean_str : 1;
871
+ bool handle_parse_errors : 1;
872
+ bool allow_any_fields : 1;
873
+ bool close_def_fields : 1;
874
+ bool destruct : 1;
875
+ bool recovering : 1;
876
+ bool use_keywords : 1;
877
+ } QParser;
878
+
879
+ extern QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
880
+ HashSet *tokenized_fields, Analyzer *analyzer);
881
+ extern void qp_destroy(QParser *self);
882
+ extern Query *qp_parse(QParser *self, char *qstr);
883
+ extern char *qp_clean_str(char *str);
884
+
885
+ #endif