ferret 0.3.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
data/ext/search.h ADDED
@@ -0,0 +1,1065 @@
1
+ #ifndef FRT_SEARCH_H
2
+ #define FRT_SEARCH_H
3
+
4
+ typedef struct Similarity Similarity;
5
+ typedef struct Query Query;
6
+ typedef struct Weight Weight;
7
+ typedef struct Scorer Scorer;
8
+ typedef struct Searcher Searcher;
9
+
10
+ #include "index.h"
11
+ #include "bitvector.h"
12
+
13
+ /***************************************************************************
14
+ *
15
+ * Similarity
16
+ *
17
+ ***************************************************************************/
18
+
19
+ struct Similarity {
20
+ void *data;
21
+ float norm_table[256];
22
+ float (*length_norm)(Similarity *self, char *field, int num_terms);
23
+ float (*query_norm)(Similarity *self, float sum_of_squared_weights);
24
+ float (*tf)(Similarity *self, float freq);
25
+ float (*sloppy_freq)(Similarity *self, int distance);
26
+ float (*idf_term)(Similarity *self, Term *term, Searcher *searcher);
27
+ float (*idf_phrase)(Similarity *self, Term **terms, int tcnt, Searcher *searcher);
28
+ float (*idf)(Similarity *self, int doc_freq, int num_docs);
29
+ float (*coord)(Similarity *self, int overlap, int max_overlap);
30
+ float (*decode_norm)(Similarity *self, uchar b);
31
+ float (*encode_norm)(Similarity *self, float f);
32
+ void (*destroy)(void *p);
33
+ };
34
+
35
+ #define sim_length_norm(msim, field, num_terms) msim->length_norm(msim, field, num_terms)
36
+ #define sim_query_norm(msim, sosw) msim->query_norm(msim, sosw)
37
+ #define sim_tf(msim, freq) msim->tf(msim, freq)
38
+ #define sim_sloppy_freq(msim, distance) msim->sloppy_freq(msim, distance)
39
+ #define sim_idf_term(msim, term, searcher) msim->idf_term(msim, term, searcher)
40
+ #define sim_idf_phrase(msim, terms, tcnt, searcher) msim->idf_phrase(msim, terms, tcnt, searcher)
41
+ #define sim_idf(msim, doc_freq, num_docs) msim->idf(msim, doc_freq, num_docs)
42
+ #define sim_coord(msim, overlap, max_overlap) msim->coord(msim, overlap, max_overlap)
43
+ #define sim_decode_norm(msim, b) msim->decode_norm(msim, b)
44
+ #define sim_encode_norm(msim, f) msim->encode_norm(msim, f)
45
+ #define sim_destroy(msim) msim->destroy(msim)
46
+
47
+ float byte_to_float(uchar b);
48
+ uchar float_to_byte(float f);
49
+
50
+ Similarity *sim_create_default();
51
+
52
+ /***************************************************************************
53
+ *
54
+ * Explanation
55
+ *
56
+ ***************************************************************************/
57
+
58
+ #define EXPLANATION_DETAILS_START_SIZE 4
59
+ typedef struct Explanation {
60
+ float value;
61
+ char *description;
62
+ struct Explanation **details;
63
+ int dcnt;
64
+ int dcapa;
65
+ } Explanation;
66
+
67
+ Explanation *expl_create(float value, char *description);
68
+ void expl_destoy(void *p);
69
+ Explanation *expl_add_detail(Explanation *self, Explanation *detail);
70
+ char *expl_to_s(Explanation *self, int depth);
71
+ char *expl_to_html(Explanation *self);
72
+
73
+ /***************************************************************************
74
+ *
75
+ * Hit
76
+ *
77
+ ***************************************************************************/
78
+
79
+ typedef struct Hit {
80
+ int doc;
81
+ float score;
82
+ } Hit;
83
+
84
+ bool hit_less_than(void *p1, void *p2);
85
+
86
+ /***************************************************************************
87
+ *
88
+ * TopDocs
89
+ *
90
+ ***************************************************************************/
91
+
92
+ typedef struct TopDocs {
93
+ int total_hits;
94
+ int size;
95
+ Hit **hits;
96
+ } TopDocs;
97
+
98
+ TopDocs *td_create(int total_hits, int size, Hit **hits);
99
+ void td_destroy(void *p);
100
+ char *td_to_s(TopDocs *td);
101
+
102
+ /***************************************************************************
103
+ *
104
+ * Filter
105
+ *
106
+ ***************************************************************************/
107
+
108
+ typedef struct Filter {
109
+ void *data;
110
+ char *name;
111
+ HshTable *cache;
112
+ BitVector *(*get_bv)(struct Filter *self, IndexReader *ir);
113
+ char *(*to_s)(struct Filter *self);
114
+ void (*destroy)(void *p);
115
+ } Filter;
116
+
117
+ Filter *filt_create(char *name);
118
+ char *filt_to_s(Filter *self);
119
+ BitVector *filt_get_bv(Filter *self, IndexReader *ir);
120
+ void filt_destroy(void *p);
121
+
122
+ /***************************************************************************
123
+ *
124
+ * RangeFilter
125
+ *
126
+ ***************************************************************************/
127
+
128
+ Filter *rfilt_create(const char *field, char *lower_term, char *upper_term,
129
+ bool include_lower, bool include_upper);
130
+ void rfilt_destroy(void *p);
131
+
132
+ /***************************************************************************
133
+ *
134
+ * QueryFilter
135
+ *
136
+ ***************************************************************************/
137
+
138
+ typedef struct QueryFilter {
139
+ Query *query;
140
+ } QueryFilter;
141
+
142
+ Filter *qfilt_create(Query *query);
143
+
144
+
145
+ /***************************************************************************
146
+ *
147
+ * Weight
148
+ *
149
+ ***************************************************************************/
150
+
151
+ struct Weight {
152
+ void *data;
153
+ float value;
154
+ float qweight;
155
+ float qnorm;
156
+ float idf;
157
+ Query *query;
158
+ Similarity *similarity;
159
+ Query *(*get_query)(Weight *self);
160
+ float (*get_value)(Weight *self);
161
+ void (*normalize)(Weight *self, float normalization_factor);
162
+ Scorer *(*scorer)(Weight *self, IndexReader *ir);
163
+ Explanation *(*explain)(Weight *self, IndexReader *ir, int doc_num);
164
+ float (*sum_of_squared_weights)(Weight *self);
165
+ char *(*to_s)(Weight *self);
166
+ void (*destroy)(void *p);
167
+ };
168
+
169
+ Query *w_get_query(Weight *self);
170
+ float w_get_value(Weight *self);
171
+ float w_sum_of_squared_weights(Weight *self);
172
+ void w_normalize(Weight *self, float normalization_factor);
173
+
174
+ /***************************************************************************
175
+ *
176
+ * TermWeight
177
+ *
178
+ ***************************************************************************/
179
+
180
+ Weight *tw_create(Query *query, Searcher *searcher);
181
+
182
+ /***************************************************************************
183
+ *
184
+ * BooleanWeight
185
+ *
186
+ ***************************************************************************/
187
+
188
+ typedef struct BooleanWeight {
189
+ Weight **weights;
190
+ int w_cnt;
191
+ } BooleanWeight;
192
+ Weight *bw_create(Query *query, Searcher *searcher);
193
+
194
+ /***************************************************************************
195
+ *
196
+ * PhraseWeight
197
+ *
198
+ ***************************************************************************/
199
+
200
+ Weight *phw_create(Query *query, Searcher *searcher);
201
+
202
+ /***************************************************************************
203
+ *
204
+ * ConstantScoreWeight
205
+ *
206
+ ***************************************************************************/
207
+
208
+ Weight *csw_create(Query *query, Searcher *searcher);
209
+
210
+ /***************************************************************************
211
+ *
212
+ * MatchAllWeight
213
+ *
214
+ ***************************************************************************/
215
+
216
+ Weight *maw_create(Query *query, Searcher *searcher);
217
+
218
+ /***************************************************************************
219
+ *
220
+ * SpanWeight
221
+ *
222
+ ***************************************************************************/
223
+
224
+ Weight *spanw_create(Query *query, Searcher *searcher);
225
+
226
+ /***************************************************************************
227
+ *
228
+ * Query
229
+ *
230
+ ***************************************************************************/
231
+
232
+ enum QUERY_TYPE {
233
+ TERM_QUERY,
234
+ BOOLEAN_QUERY,
235
+ PHRASE_QUERY,
236
+ MULTI_PHRASE_QUERY,
237
+ CONSTANT_QUERY,
238
+ MATCH_ALL_QUERY,
239
+ RANGE_QUERY,
240
+ WILD_CARD_QUERY,
241
+ FUZZY_QUERY,
242
+ PREFIX_QUERY,
243
+ SPAN_TERM_QUERY,
244
+ SPAN_FIRST_QUERY,
245
+ SPAN_OR_QUERY,
246
+ SPAN_NOT_QUERY,
247
+ SPAN_NEAR_QUERY
248
+ };
249
+
250
+ struct Query {
251
+ bool destroy_all : 1;
252
+ uchar type;
253
+ void *data;
254
+ float boost;
255
+ float original_boost;
256
+ Weight *weight;
257
+ Query *rewritten;
258
+ Weight *(*create_weight)(Query *self, Searcher *searcher);
259
+ Query *(*rewrite)(Query *self, IndexReader *ir);
260
+ void (*extract_terms)(Query *self, Array *terms);
261
+ Similarity *(*get_similarity)(Query *self, Searcher *searcher);
262
+ char *(*to_s)(Query *self, char *field);
263
+ void (*destroy)(void *p);
264
+ };
265
+
266
+ Weight *q_weight(Query *self, Searcher *searcher);
267
+ void q_destroy(Query *self);
268
+ Similarity *q_get_similarity(Query *self, Searcher *searcher);
269
+ void q_extract_terms(Query *self, Array *terms);
270
+ Query *q_create();
271
+
272
+ /***************************************************************************
273
+ *
274
+ * TermQuery
275
+ *
276
+ ***************************************************************************/
277
+
278
+ typedef struct TermQuery {
279
+ Term *term;
280
+ } TermQuery;
281
+
282
+ Query *tq_create(Term *term);
283
+
284
+ /***************************************************************************
285
+ *
286
+ * BooleanQuery
287
+ *
288
+ ***************************************************************************/
289
+
290
+ /***************************************************************************
291
+ * BooleanClause
292
+ ***************************************************************************/
293
+
294
+ enum BC_TYPE {
295
+ BC_SHOULD,
296
+ BC_MUST,
297
+ BC_MUST_NOT
298
+ };
299
+
300
+ typedef struct BooleanClause {
301
+ Query *query;
302
+ Query *rewritten;
303
+ unsigned int occur : 4;
304
+ bool is_prohibited : 1;
305
+ bool is_required : 1;
306
+ } BooleanClause;
307
+
308
+ BooleanClause *bc_create(Query *query, unsigned int occur);
309
+ void bc_set_occur(BooleanClause *self, unsigned int occur);
310
+
311
+ /***************************************************************************
312
+ * BooleanQuery
313
+ ***************************************************************************/
314
+
315
+ #define DEFAULT_MAX_CLAUSE_COUNT 1024
316
+ #define BOOLEAN_CLAUSES_START_CAPA 4
317
+ #define QUERY_STRING_START_SIZE 64
318
+
319
+ typedef struct BooleanQuery {
320
+ bool coord_disabled;
321
+ int max_clause_cnt;
322
+ int clause_cnt;
323
+ int clause_capa;
324
+ BooleanClause **clauses;
325
+ Similarity *similarity;
326
+ } BooleanQuery;
327
+
328
+ Query *bq_create(bool coord_disabled);
329
+ void bq_add_query(Query *self, Query *sub_query, unsigned int occur);
330
+
331
+ /***************************************************************************
332
+ *
333
+ * PhraseQuery
334
+ *
335
+ ***************************************************************************/
336
+
337
+ #define PHQ_INIT_CAPA 4
338
+ typedef struct PhraseQuery {
339
+ int slop;
340
+ Term **terms;
341
+ int *positions;
342
+ int t_cnt;
343
+ int t_capa;
344
+ char *field;
345
+ } PhraseQuery;
346
+
347
+ Query *phq_create();
348
+ void phq_add_term(Query *self, Term *term, int pos_inc);
349
+
350
+ /***************************************************************************
351
+ *
352
+ * MultiPhraseQuery
353
+ *
354
+ ***************************************************************************/
355
+
356
+ typedef struct MultiPhraseQuery {
357
+ int slop;
358
+ Term ***terms;
359
+ int *positions;
360
+ int *pt_cnt;
361
+ int t_cnt;
362
+ int t_capa;
363
+ char *field;
364
+ } MultiPhraseQuery;
365
+
366
+ Query *mphq_create();
367
+ void mphq_add_terms(Query *self, Term **terms, int t_cnt, int pos_inc);
368
+
369
+ /***************************************************************************
370
+ *
371
+ * PrefixQuery
372
+ *
373
+ ***************************************************************************/
374
+
375
+ Query *prefixq_create(Term *prefix);
376
+
377
+ /***************************************************************************
378
+ *
379
+ * WildCardQuery
380
+ *
381
+ ***************************************************************************/
382
+
383
+ #define WILD_CHAR '?'
384
+ #define WILD_STRING '*'
385
+ Query *wcq_create(Term *term);
386
+ bool wc_match(char *pattern, char *text);
387
+
388
+ /***************************************************************************
389
+ *
390
+ * FuzzyQuery
391
+ *
392
+ ***************************************************************************/
393
+
394
+ #define DEF_MIN_SIM 0.5
395
+ #define DEF_PRE_LEN 0
396
+ #define TYPICAL_LONGEST_WORD 20
397
+
398
+ typedef struct FuzzyQuery {
399
+ Term *term;
400
+ char *text; /* term text after prefix */
401
+ int text_len;
402
+ int pre_len;
403
+ float min_sim;
404
+ float scale_factor;
405
+ int max_distances[TYPICAL_LONGEST_WORD];
406
+ int *da;
407
+ int da_capa;
408
+ } FuzzyQuery;
409
+
410
+ Query *fuzq_create(Term *term);
411
+ Query *fuzq_create_mp(Term *term, float min_sim, int pre_len);
412
+
413
+ /***************************************************************************
414
+ *
415
+ * ConstantScoreQuery
416
+ *
417
+ ***************************************************************************/
418
+
419
+ Query *csq_create(Filter *filter);
420
+
421
+ /***************************************************************************
422
+ *
423
+ * MatchAllQuery
424
+ *
425
+ ***************************************************************************/
426
+
427
+ Query *maq_create();
428
+
429
+ /***************************************************************************
430
+ *
431
+ * ConstantScoreQuery
432
+ *
433
+ ***************************************************************************/
434
+
435
+ Query *maq_create();
436
+
437
+ /***************************************************************************
438
+ *
439
+ * RangeQuery
440
+ *
441
+ ***************************************************************************/
442
+
443
+ typedef struct Range {
444
+ char *field;
445
+ char *lower_term;
446
+ char *upper_term;
447
+ bool include_lower;
448
+ bool include_upper;
449
+ } Range;
450
+
451
+ Query *rq_create(const char *field, char *lower_term, char *upper_term,
452
+ bool include_lower, bool include_upper);
453
+ Query *rq_create_less(const char *field, char *upper_term, bool include_upper);
454
+ Query *rq_create_more(const char *field, char *lower_term, bool include_lower);
455
+
456
+ /***************************************************************************
457
+ *
458
+ * SpanQuery
459
+ *
460
+ ***************************************************************************/
461
+
462
+ /***************************************************************************
463
+ * SpanEnum
464
+ ***************************************************************************/
465
+
466
+ typedef struct SpanEnum SpanEnum;
467
+ struct SpanEnum {
468
+ void *data;
469
+ Query *query;
470
+ bool (*next)(SpanEnum *self);
471
+ bool (*skip_to)(SpanEnum *self, int target_doc);
472
+ int (*doc)(SpanEnum *self);
473
+ int (*start)(SpanEnum *self);
474
+ int (*end)(SpanEnum *self);
475
+ char *(*to_s)(SpanEnum *self);
476
+ void (*destroy)(void *p);
477
+ };
478
+
479
+ /***************************************************************************
480
+ * SpanTermEnum
481
+ ***************************************************************************/
482
+
483
+ typedef struct SpanTermEnum SpanTermEnum;
484
+ struct SpanTermEnum {
485
+ TermDocEnum *positions;
486
+ int position;
487
+ int doc;
488
+ int count;
489
+ int freq;
490
+ };
491
+
492
+ SpanEnum *spante_create(Query *query, IndexReader *ir);
493
+
494
+ /***************************************************************************
495
+ * SpanFirstEnum
496
+ ***************************************************************************/
497
+
498
+ SpanEnum *spanfe_create(Query *query, IndexReader *ir);
499
+
500
+ /***************************************************************************
501
+ * SpanOrEnum
502
+ ***************************************************************************/
503
+
504
+ typedef struct SpanOrEnum {
505
+ PriorityQueue *queue;
506
+ SpanEnum **span_enums;
507
+ int s_cnt;
508
+ bool first_time;
509
+ } SpanOrEnum;
510
+ SpanEnum *spanoe_create(Query *query, IndexReader *ir);
511
+
512
+ /***************************************************************************
513
+ * SpanEnumCell
514
+ ***************************************************************************/
515
+
516
+ typedef struct SpanEnumCell {
517
+ SpanEnum *parent;
518
+ SpanEnum *se;
519
+ int index;
520
+ int length;
521
+ } SpanEnumCell;
522
+ SpanEnum *spanec_create(Query *parent, Query *child, int index);
523
+
524
+ /***************************************************************************
525
+ * SpanNearEnum
526
+ ***************************************************************************/
527
+
528
+ typedef struct SpanNearEnum {
529
+ SpanEnum **span_enums;
530
+ int s_cnt;
531
+ int slop;
532
+ int current;
533
+ bool first_time : 1;
534
+ bool in_order : 1;
535
+ int doc;
536
+ int start;
537
+ int end;
538
+ } SpanNearEnum;
539
+
540
+ SpanEnum *spanne_create(Query *query, IndexReader *ir);
541
+
542
+ /***************************************************************************
543
+ * SpanNotEnum
544
+ ***************************************************************************/
545
+
546
+ typedef struct SpanNotEnum {
547
+ SpanEnum *inc;
548
+ SpanEnum *exc;
549
+ bool more_inc : 1;
550
+ bool more_exc : 1;
551
+ } SpanNotEnum;
552
+
553
+ SpanEnum *spanxe_create(Query *query, IndexReader *ir);
554
+
555
+ /***************************************************************************
556
+ * SpanQuery
557
+ ***************************************************************************/
558
+
559
+ typedef struct SpanQuery SpanQuery;
560
+ struct SpanQuery {
561
+ void *data;
562
+ char *field;
563
+ SpanEnum *(*get_spans)(Query *self, IndexReader *ir);
564
+ Array *(*get_terms)(Query *self);
565
+ };
566
+
567
+ /***************************************************************************
568
+ * SpanTermQuery
569
+ ***************************************************************************/
570
+
571
+ Query *spantq_create(Term *term);
572
+
573
+ /***************************************************************************
574
+ * SpanFirstQuery
575
+ ***************************************************************************/
576
+
577
+ typedef struct SpanFirstQuery {
578
+ int end;
579
+ Query *match;
580
+ } SpanFirstQuery;
581
+
582
+ Query *spanfq_create(Query *match, int end);
583
+
584
+ /***************************************************************************
585
+ * SpanOrQuery
586
+ ***************************************************************************/
587
+
588
+ typedef struct SpanOrQuery {
589
+ Query **clauses;
590
+ int c_cnt;
591
+ } SpanOrQuery;
592
+
593
+ Query *spanoq_create(Query **clauses, int c_cnt);
594
+
595
+ /***************************************************************************
596
+ * SpanNearQuery
597
+ ***************************************************************************/
598
+
599
+ typedef struct SpanNearQuery {
600
+ Query **clauses;
601
+ int c_cnt;
602
+ int slop;
603
+ bool in_order;
604
+ } SpanNearQuery;
605
+
606
+ Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order);
607
+
608
+
609
+ /***************************************************************************
610
+ * SpanNotQuery
611
+ ***************************************************************************/
612
+
613
+ typedef struct SpanNotQuery {
614
+ Query *inc;
615
+ Query *exc;
616
+ } SpanNotQuery;
617
+
618
+ Query *spanxq_create(Query *inc, Query *exc);
619
+
620
+ /***************************************************************************
621
+ *
622
+ * Scorer
623
+ *
624
+ ***************************************************************************/
625
+
626
+ #define SCORER_NULLIFY(mscorer) mscorer->destroy(mscorer); mscorer = NULL
627
+
628
+ struct Scorer {
629
+ void *data;
630
+ Similarity *similarity;
631
+ int doc;
632
+ float (*score)(Scorer *self);
633
+ bool (*next)(Scorer *self);
634
+ bool (*skip_to)(Scorer *self, int doc_num);
635
+ Explanation *(*explain)(Scorer *self, int doc_num);
636
+ void (*destroy)(void *p);
637
+ };
638
+
639
+ void scorer_destroy(void *p);
640
+ Scorer *scorer_create(Similarity *similarity);
641
+ bool scorer_less_than(void *p1, void *p2);
642
+ bool scorer_doc_less_than(void *p1, void *p2);
643
+ int scorer_doc_cmp(const void *p1, const void *p2);
644
+
645
+ /***************************************************************************
646
+ *
647
+ * TermScorer
648
+ *
649
+ ***************************************************************************/
650
+
651
+ #define SCORE_CACHE_SIZE 32
652
+ #define TDE_READ_SIZE 32
653
+
654
+ typedef struct TermScorer {
655
+ int docs[TDE_READ_SIZE];
656
+ int freqs[TDE_READ_SIZE];
657
+ int pointer;
658
+ int pointer_max;
659
+ float score_cache[SCORE_CACHE_SIZE];
660
+ Weight *weight;
661
+ TermDocEnum *tde;
662
+ uchar *norms;
663
+ float weight_value;
664
+ } TermScorer;
665
+
666
+ Scorer *tsc_create(Weight *weight, TermDocEnum *tde, uchar *norms);
667
+
668
+ /***************************************************************************
669
+ *
670
+ * BooleanScorer
671
+ *
672
+ ***************************************************************************/
673
+
674
+ /***************************************************************************
675
+ * Coordinator
676
+ ***************************************************************************/
677
+
678
+ typedef struct Coordinator {
679
+ int max_coord;
680
+ float *coord_factors;
681
+ Similarity *similarity;
682
+ int num_matches;
683
+ } Coordinator;
684
+
685
+ Coordinator *coo_create(Similarity *similarity);
686
+ Coordinator *coo_init(Coordinator *self);
687
+
688
+ /***************************************************************************
689
+ * DisjunctionSumScorer
690
+ ***************************************************************************/
691
+
692
+ typedef struct DisjunctionSumScorer{
693
+ float cum_score;
694
+ int num_matches;
695
+ int min_num_matches;
696
+ Scorer **sub_scorers;
697
+ int ss_cnt;
698
+ PriorityQueue *scorer_queue;
699
+ Coordinator *coordinator;
700
+ } DisjunctionSumScorer;
701
+
702
+ /***************************************************************************
703
+ * ConjunctionScorer
704
+ ***************************************************************************/
705
+
706
+ typedef struct ConjunctionScorer{
707
+ bool first_time : 1;
708
+ bool more : 1;
709
+ int coord;
710
+ int ss_cnt;
711
+ int ss_capa;
712
+ Scorer **sub_scorers;
713
+ int first;
714
+ int last;
715
+ Coordinator *coordinator;
716
+ int last_scored_doc;
717
+ } ConjunctionScorer;
718
+
719
+ /***************************************************************************
720
+ * SingleMatchScorer
721
+ ***************************************************************************/
722
+
723
+ typedef struct SingleMatchScorer {
724
+ Coordinator *coordinator;
725
+ Scorer *scorer;
726
+ } SingleMatchScorer;
727
+
728
+ /***************************************************************************
729
+ * ReqOptSumScorer
730
+ ***************************************************************************/
731
+
732
+ typedef struct ReqOptSumScorer {
733
+ Scorer *req_scorer;
734
+ Scorer *opt_scorer;
735
+ bool first_time_opt;
736
+ } ReqOptSumScorer;
737
+
738
+ /***************************************************************************
739
+ * ReqExclScorer
740
+ ***************************************************************************/
741
+
742
+ typedef struct ReqExclScorer {
743
+ Scorer *req_scorer;
744
+ Scorer *excl_scorer;
745
+ bool first_time;
746
+ } ReqExclScorer;
747
+
748
+ /***************************************************************************
749
+ * BooleanScorer
750
+ ***************************************************************************/
751
+
752
+ typedef struct BooleanScorer {
753
+ Scorer **required_scorers;
754
+ int rs_cnt;
755
+ int rs_capa;
756
+ Scorer **optional_scorers;
757
+ int os_cnt;
758
+ int os_capa;
759
+ Scorer **prohibited_scorers;
760
+ int ps_cnt;
761
+ int ps_capa;
762
+ Scorer *counting_sum_scorer;
763
+ Coordinator *coordinator;
764
+ } BooleanScorer;
765
+
766
+ Scorer *bsc_create(Similarity *similarity);
767
+ void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur);
768
+
769
+ /***************************************************************************
770
+ *
771
+ * PhraseScorer
772
+ *
773
+ ***************************************************************************/
774
+
775
+ /***************************************************************************
776
+ * PhrasePosition
777
+ ***************************************************************************/
778
+ typedef struct PhrasePosition {
779
+ TermDocEnum *tpe;
780
+ int offset;
781
+ int count;
782
+ int doc;
783
+ int position;
784
+ } PhrasePosition;
785
+
786
+ PhrasePosition *pp_create(TermDocEnum *tpe, int offset);
787
+ /***************************************************************************
788
+ * PhraseScorer
789
+ ***************************************************************************/
790
+
791
+ typedef struct PhraseScorer {
792
+ float freq;
793
+ uchar *norms;
794
+ float value;
795
+ Weight *weight;
796
+ bool first_time : 1;
797
+ bool more : 1;
798
+ int pp_first;
799
+ int pp_last;
800
+ int pp_cnt;
801
+ PhrasePosition **phrase_pos;
802
+ float (*phrase_freq)(Scorer *self);
803
+ int slop;
804
+ } PhraseScorer;
805
+
806
+ Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
807
+ int *positions, int t_cnt, Similarity *similarity, uchar *norms);
808
+
809
+ /***************************************************************************
810
+ * ExactPhraseScorer
811
+ ***************************************************************************/
812
+
813
+ Scorer *exact_phrase_scorer_create(Weight *weight, TermDocEnum **term_pos_enum,
814
+ int *positions, int t_cnt, Similarity *similarity, uchar *norms);
815
+
816
+ /***************************************************************************
817
+ * SloppyPhraseScorer
818
+ ***************************************************************************/
819
+
820
+ Scorer *sloppy_phrase_scorer_create(Weight *weight, TermDocEnum **term_pos_enum,
821
+ int *positions, int t_cnt, Similarity *similarity, int slop, uchar *norms);
822
+
823
+ /***************************************************************************
824
+ *
825
+ * ConstantScoreScorer
826
+ *
827
+ ***************************************************************************/
828
+
829
+ typedef struct ConstantScoreScorer {
830
+ BitVector *bv;
831
+ float score;
832
+ } ConstantScoreScorer;
833
+
834
+ Scorer *cssc_create(Weight *weight, IndexReader *ir);
835
+
836
+
837
+ /***************************************************************************
838
+ *
839
+ * MatchAllScorer
840
+ *
841
+ ***************************************************************************/
842
+
843
+ typedef struct MatchAllScorer {
844
+ IndexReader *ir;
845
+ int max_doc;
846
+ float score;
847
+ } MatchAllScorer;
848
+
849
+ Scorer *masc_create(Weight *weight, IndexReader *ir);
850
+
851
+
852
+ /***************************************************************************
853
+ *
854
+ * SpanScorer
855
+ *
856
+ ***************************************************************************/
857
+
858
+ typedef struct SpanScorer {
859
+ bool first_time : 1;
860
+ bool more : 1;
861
+ IndexReader *ir;
862
+ SpanEnum *spans;
863
+ Similarity *sim;
864
+ uchar *norms;
865
+ Weight *weight;
866
+ float value;
867
+ float freq;
868
+ } SpanScorer;
869
+
870
+ Scorer *spansc_create(Weight *weight, IndexReader *ir);
871
+
872
+ /***************************************************************************
873
+ *
874
+ * Sort
875
+ *
876
+ ***************************************************************************/
877
+
878
+ enum SORT_TYPE {
879
+ SORT_TYPE_SCORE,
880
+ SORT_TYPE_DOC,
881
+ SORT_TYPE_INTEGER,
882
+ SORT_TYPE_FLOAT,
883
+ SORT_TYPE_STRING,
884
+ SORT_TYPE_AUTO
885
+ };
886
+
887
+ /***************************************************************************
888
+ * SortField
889
+ ***************************************************************************/
890
+
891
+ typedef struct SortField {
892
+ mutex_t mutex;
893
+ char *field;
894
+ int type;
895
+ bool reverse : 1;
896
+ void *index;
897
+ int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
898
+ void *(*create_index)(int size);
899
+ void (*destroy_index)(void *p);
900
+ void (*handle_term)(void *index, TermDocEnum *tde, char *text);
901
+ } SortField;
902
+
903
+ SortField *sort_field_create(char *field, int type, bool reverse);
904
+ SortField *sort_field_score_create(bool reverse);
905
+ SortField *sort_field_doc_create(bool reverse);
906
+ SortField *sort_field_int_create(char *field, bool reverse);
907
+ SortField *sort_field_float_create(char *field, bool reverse);
908
+ SortField *sort_field_string_create(char *field, bool reverse);
909
+ SortField *sort_field_auto_create(char *field, bool reverse);
910
+ void sort_field_destroy(void *p);
911
+
912
+ extern SortField SORT_FIELD_SCORE;
913
+ extern SortField SORT_FIELD_SCORE_REV;
914
+ extern SortField SORT_FIELD_DOC;
915
+ extern SortField SORT_FIELD_DOC_REV;
916
+
917
+ /***************************************************************************
918
+ * Sort
919
+ ***************************************************************************/
920
+
921
+ typedef struct Sort {
922
+ SortField **sort_fields;
923
+ int sf_cnt;
924
+ int sf_capa;
925
+ bool destroy_all : 1;
926
+ } Sort;
927
+
928
+ Sort *sort_create();
929
+ void sort_destroy(void *p);
930
+ void sort_add_sort_field(Sort *self, SortField *sf);
931
+ void sort_clear(Sort *self);
932
+
933
+ /***************************************************************************
934
+ * FieldSortedHitQueue
935
+ ***************************************************************************/
936
+
937
+ Hit *fshq_pq_pop(PriorityQueue *pq);
938
+ void fshq_pq_down(PriorityQueue *pq);
939
+ void fshq_pq_push(PriorityQueue *pq, void *elem);
940
+ void fshq_pq_destroy(void *p);
941
+ PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
942
+
943
+ /***************************************************************************
944
+ *
945
+ * Searcher
946
+ *
947
+ ***************************************************************************/
948
+
949
+ struct Searcher {
950
+ IndexReader *ir;
951
+ Similarity *similarity;
952
+ int (*doc_freq)(Searcher *self, Term *term);
953
+ int *(*doc_freqs)(Searcher *self, Term **terms, int tcnt);
954
+ Document *(*get_doc)(Searcher *self, int doc_num);
955
+ int (*max_doc)(Searcher *self);
956
+ Weight *(*create_weight)(Searcher *self, Query *query);
957
+ TopDocs *(*search)(Searcher *self, Query *query, int first_doc,
958
+ int num_docs, Filter *filter, Sort *sort);
959
+ Query *(*rewrite)(Searcher *self, Query *original);
960
+ Explanation *(*explain)(Searcher *self, Query *query, int doc_num);
961
+ Similarity *(*get_similarity)(Searcher *self);
962
+ void (*close)(Searcher *self);
963
+ };
964
+
965
+ Searcher *sea_create(IndexReader *ir);
966
+ TopDocs *sea_search(Searcher *self, Query *query, int first_doc,
967
+ int num_docs, Filter *filter, Sort *sort);
968
+ void sea_search_each(Searcher *self, Query *query, Filter *filter,
969
+ void (*fn)(Searcher *self, int doc_num, void *arg), void *arg);
970
+ Explanation *sea_explain(Searcher *self, Query *query, int doc_num);
971
+ Similarity *sea_get_similarity(Searcher *self);
972
+ Query *sea_rewrite(Searcher *self, Query *original);
973
+ void sea_close(Searcher *self);
974
+ Document *sea_get_doc(Searcher *self, int doc_num);
975
+ Weight *sea_create_weight(Searcher *self, Query *query);
976
+ int sea_doc_freq(Searcher *self, Term *term);
977
+
978
+ /***************************************************************************
979
+ *
980
+ * QParser
981
+ *
982
+ ***************************************************************************/
983
+
984
+ #define CONC_WORDS 2
985
+
986
+ typedef struct QParser {
987
+ mutex_t mutex;
988
+ bool or_default : 1;
989
+ bool wild_lower : 1;
990
+ bool clean_str : 1;
991
+ bool handle_parse_errors : 1;
992
+ bool allow_any_fields : 1;
993
+ bool close_def_fields : 1;
994
+ int def_slop;
995
+ char *qstr;
996
+ char *qstrp;
997
+ char buf[CONC_WORDS][MAX_WORD_SIZE];
998
+ int buf_index;
999
+ HashSet *fields;
1000
+ HashSet *fields_buf;
1001
+ HashSet *def_fields;
1002
+ HashSet *all_fields;
1003
+ Analyzer *analyzer;
1004
+ Query *result;
1005
+ } QParser;
1006
+
1007
+ QParser *qp_create(HashSet *all_fields, HashSet *def_fields, Analyzer *analyzer);
1008
+ void qp_destroy(void *p);
1009
+ Query *qp_parse(QParser *self, char *qstr);
1010
+ char *qp_clean_str(char *str);
1011
+
1012
+ /***************************************************************************
1013
+ *
1014
+ * Index
1015
+ *
1016
+ ***************************************************************************/
1017
+
1018
+ typedef struct Index {
1019
+ mutex_t mutex;
1020
+ Store *store;
1021
+ Analyzer *analyzer;
1022
+ IndexReader *ir;
1023
+ IndexWriter *iw;
1024
+ Searcher *sea;
1025
+ QParser *qp;
1026
+ HashSet *key;
1027
+ char *id_field;
1028
+ char *def_field;
1029
+ bool close_analyzer : 1;
1030
+ bool close_store : 1;
1031
+ /* for IndexWriter */
1032
+ bool use_compound_file : 1;
1033
+ bool auto_flush : 1;
1034
+ bool has_writes : 1;
1035
+ } Index;
1036
+
1037
+ Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
1038
+ bool create);
1039
+ void index_destroy(Index *self);
1040
+ void index_flush(Index *self);
1041
+ int index_size(Index *self);
1042
+ void index_optimize(Index *self);
1043
+ bool index_has_del(Index *self);
1044
+ bool index_is_deleted(Index *self, int doc_num);
1045
+ void index_add_doc(Index *self, Document *doc);
1046
+ void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
1047
+ void index_add_string(Index *self, char *str, Analyzer *analyzer);
1048
+ void index_add_array(Index *self, Array *ary, Analyzer *analyzer);
1049
+ TopDocs *index_search_str(Index *self, char *query, int first_doc,
1050
+ int num_docs, Filter *filter, Sort *sort);
1051
+ Query *index_get_query(Index *self, char *qstr);
1052
+ Document *index_get_doc(Index *self, int doc_num);
1053
+ Document *index_get_doc_ts(Index *self, int doc_num);
1054
+ Document *index_get_doc_id(Index *self, char *id);
1055
+ Document *index_get_doc_term(Index *self, Term *term);
1056
+ void index_delete(Index *self, int doc_num);
1057
+ void index_delete_term(Index *self, Term *term);
1058
+ void index_delete_id(Index *self, char *id);
1059
+ void index_delete_query(Index *self, Query *q, Filter *f);
1060
+ void index_delete_query_str(Index *self, char *qstr, Filter *f);
1061
+ int index_term_id(Index *self, Term *term);
1062
+ Explanation *index_explain(Index *self, Query *q, int doc_num);
1063
+ void index_auto_flush_ir(Index *self);
1064
+ void index_auto_flush_iw(Index *self);
1065
+ #endif