ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/search.h CHANGED
@@ -9,9 +9,6 @@ typedef struct Scorer Scorer;
9
9
  #include "bitvector.h"
10
10
  #include "similarity.h"
11
11
 
12
- #define term_set_create() \
13
- hs_create((hash_ft)&term_hash, (eq_ft)&term_eq, (free_ft)&term_destroy)
14
-
15
12
  /***************************************************************************
16
13
  *
17
14
  * Explanation
@@ -24,15 +21,45 @@ typedef struct Explanation
24
21
  float value;
25
22
  char *description;
26
23
  struct Explanation **details;
27
- int dcnt;
28
- int dcapa;
29
24
  } Explanation;
25
+
26
+ extern Explanation *expl_new(float value, const char *description, ...);
27
+ extern void expl_destroy(Explanation *expl);
28
+ extern Explanation *expl_add_detail(Explanation *expl, Explanation *detail);
29
+ extern char *expl_to_s_depth(Explanation *expl, int depth);
30
+ extern char *expl_to_html(Explanation *expl);
30
31
 
31
- extern Explanation *expl_create(float value, char *description);
32
- extern void expl_destoy(void *p);
33
- extern Explanation *expl_add_detail(Explanation *self, Explanation *detail);
34
- extern char *expl_to_s(Explanation *self, int depth);
35
- extern char *expl_to_html(Explanation *self);
32
+ #define expl_to_s(expl) expl_to_s_depth(expl, 0)
33
+
34
+ /***************************************************************************
35
+ *
36
+ * Highlighter
37
+ *
38
+ ***************************************************************************/
39
+
40
+ typedef struct MatchRange
41
+ {
42
+ int start;
43
+ int end;
44
+ int start_offset;
45
+ int end_offset;
46
+ double score;
47
+ } MatchRange;
48
+
49
+ #define MATCH_VECTOR_INIT_CAPA 8
50
+ typedef struct MatchVector
51
+ {
52
+ int size;
53
+ int capa;
54
+ MatchRange *matches;
55
+ } MatchVector;
56
+
57
+ extern MatchVector *matchv_new();
58
+ extern MatchVector *matchv_add(MatchVector *mp, int start, int end);
59
+ extern MatchVector *matchv_sort(MatchVector *self);
60
+ extern void matchv_destroy(MatchVector *self);
61
+ extern MatchVector *matchv_compact(MatchVector *self);
62
+ extern MatchVector *matchv_compact_with_breaks(MatchVector *self);
36
63
 
37
64
  /***************************************************************************
38
65
  *
@@ -46,8 +73,6 @@ typedef struct Hit
46
73
  float score;
47
74
  } Hit;
48
75
 
49
- extern bool hit_less_than(void *p1, void *p2);
50
-
51
76
  /***************************************************************************
52
77
  *
53
78
  * TopDocs
@@ -59,9 +84,10 @@ typedef struct TopDocs
59
84
  int total_hits;
60
85
  int size;
61
86
  Hit **hits;
87
+ float max_score;
62
88
  } TopDocs;
63
89
 
64
- extern TopDocs *td_create(int total_hits, int size, Hit **hits);
90
+ extern TopDocs *td_new(int total_hits, int size, Hit **hits, float max_score);
65
91
  extern void td_destroy(TopDocs *td);
66
92
  extern char *td_to_s(TopDocs *td);
67
93
 
@@ -73,22 +99,23 @@ extern char *td_to_s(TopDocs *td);
73
99
 
74
100
  typedef struct Filter
75
101
  {
76
- void *data;
77
- char *name;
78
- HshTable *cache;
79
- BitVector *(*get_bv)(struct Filter *self, IndexReader *ir);
80
- char *(*to_s)(struct Filter *self);
81
- uint (*hash)(struct Filter *self);
82
- int (*eq)(struct Filter *self, struct Filter *o);
83
- void (*destroy)(struct Filter *self);
102
+ char *name;
103
+ HashTable *cache;
104
+ BitVector *(*get_bv_i)(struct Filter *self, IndexReader *ir);
105
+ char *(*to_s)(struct Filter *self);
106
+ ulong (*hash)(struct Filter *self);
107
+ int (*eq)(struct Filter *self, struct Filter *o);
108
+ void (*destroy_i)(struct Filter *self);
109
+ int ref_cnt;
84
110
  } Filter;
85
111
 
86
- extern Filter *filt_create(char *name);
87
- extern char *filt_to_s_i(Filter *self);
88
- extern BitVector *filt_get_bv(Filter *self, IndexReader *ir);
89
- extern void filt_destroy(Filter *self);
90
- extern uint filt_hash(Filter *self);
91
- extern int filt_eq(Filter *self, Filter *o);
112
+ #define filt_new(type) filt_create(sizeof(type), #type)
113
+ extern Filter *filt_create(size_t size, const char *name);
114
+ extern BitVector *filt_get_bv(Filter *filt, IndexReader *ir);
115
+ extern void filt_destroy_i(Filter *filt);
116
+ extern void filt_deref(Filter *filt);
117
+ extern ulong filt_hash(Filter *filt);
118
+ extern int filt_eq(Filter *filt, Filter *o);
92
119
 
93
120
  /***************************************************************************
94
121
  *
@@ -96,8 +123,9 @@ extern int filt_eq(Filter *self, Filter *o);
96
123
  *
97
124
  ***************************************************************************/
98
125
 
99
- extern Filter *rfilt_create(const char *field, char *lower_term,
100
- char *upper_term, bool include_lower, bool include_upper);
126
+ extern Filter *rfilt_new(const char *field,
127
+ const char *lower_term, const char *upper_term,
128
+ bool include_lower, bool include_upper);
101
129
 
102
130
  /***************************************************************************
103
131
  *
@@ -105,12 +133,8 @@ extern Filter *rfilt_create(const char *field, char *lower_term,
105
133
  *
106
134
  ***************************************************************************/
107
135
 
108
- typedef struct QueryFilter
109
- {
110
- Query *query;
111
- } QueryFilter;
112
-
113
- extern Filter *qfilt_create(Query *query);
136
+ extern Filter *qfilt_new(Query *query);
137
+ extern Filter *qfilt_new_nr(Query *query);
114
138
 
115
139
  /***************************************************************************
116
140
  *
@@ -120,12 +144,11 @@ extern Filter *qfilt_create(Query *query);
120
144
 
121
145
  struct Weight
122
146
  {
123
- void *data;
124
- float value;
125
- float qweight;
126
- float qnorm;
127
- float idf;
128
- Query *query;
147
+ float value;
148
+ float qweight;
149
+ float qnorm;
150
+ float idf;
151
+ Query *query;
129
152
  Similarity *similarity;
130
153
  Query *(*get_query)(Weight *self);
131
154
  float (*get_value)(Weight *self);
@@ -137,78 +160,26 @@ struct Weight
137
160
  void (*destroy)(Weight *self);
138
161
  };
139
162
 
140
- extern Weight *w_create(Query *query);
163
+ #define w_new(type, query) w_create(sizeof(type), query)
164
+ extern Weight *w_create(size_t size, Query *query);
141
165
  extern void w_destroy(Weight *self);
142
-
143
166
  extern Query *w_get_query(Weight *self);
144
167
  extern float w_get_value(Weight *self);
145
168
  extern float w_sum_of_squared_weights(Weight *self);
146
169
  extern void w_normalize(Weight *self, float normalization_factor);
147
170
 
148
- /***************************************************************************
149
- *
150
- * TermWeight
151
- *
152
- ***************************************************************************/
153
-
154
- extern Weight *tw_create(Query *query, Searcher *searcher);
155
-
156
- /***************************************************************************
157
- *
158
- * BooleanWeight
159
- *
160
- ***************************************************************************/
161
-
162
- typedef struct BooleanWeight {
163
- Weight **weights;
164
- int w_cnt;
165
- } BooleanWeight;
166
-
167
- extern Weight *bw_create(Query *query, Searcher *searcher);
168
-
169
- /***************************************************************************
170
- *
171
- * PhraseWeight
172
- *
173
- ***************************************************************************/
174
-
175
- extern Weight *phw_create(Query *query, Searcher *searcher);
176
-
177
- /***************************************************************************
178
- *
179
- * ConstantScoreWeight
180
- *
181
- ***************************************************************************/
182
-
183
- extern Weight *csw_create(Query *query, Searcher *searcher);
184
-
185
- /***************************************************************************
186
- *
187
- * MatchAllWeight
188
- *
189
- ***************************************************************************/
190
-
191
- extern Weight *maw_create(Query *query, Searcher *searcher);
192
-
193
- /***************************************************************************
194
- *
195
- * SpanWeight
196
- *
197
- ***************************************************************************/
198
-
199
- extern Weight *spanw_create(Query *query, Searcher *searcher);
200
-
201
171
  /***************************************************************************
202
172
  *
203
173
  * Query
204
174
  *
205
175
  ***************************************************************************/
206
176
 
207
- enum QUERY_TYPE {
177
+ enum QUERY_TYPE
178
+ {
208
179
  TERM_QUERY,
180
+ MULTI_TERM_QUERY,
209
181
  BOOLEAN_QUERY,
210
182
  PHRASE_QUERY,
211
- MULTI_PHRASE_QUERY,
212
183
  CONSTANT_QUERY,
213
184
  FILTERED_QUERY,
214
185
  MATCH_ALL_QUERY,
@@ -225,57 +196,53 @@ enum QUERY_TYPE {
225
196
 
226
197
  struct Query
227
198
  {
228
- uchar type;
229
- int ref_cnt;
230
- void *data;
231
- float boost;
232
- Weight *weight;
233
- Query *(*rewrite)(Query *self, IndexReader *ir);
234
- void (*extract_terms)(Query *self, HashSet *terms);
235
- Similarity *(*get_similarity)(Query *self, Searcher *searcher);
236
- char *(*to_s)(Query *self, char *field);
237
- uint (*hash)(Query *self);
238
- int (*eq)(Query *self, Query *o);
239
- void (*destroy_i)(Query *self);
240
- Weight *(*create_weight_i)(Query *self, Searcher *searcher);
241
- bool destroy_all : 1;
199
+ int ref_cnt;
200
+ float boost;
201
+ Weight *weight;
202
+ Query *(*rewrite)(Query *self, IndexReader *ir);
203
+ void (*extract_terms)(Query *self, HashSet *terms);
204
+ Similarity *(*get_similarity)(Query *self, Searcher *searcher);
205
+ char *(*to_s)(Query *self, const char *field);
206
+ ulong (*hash)(Query *self);
207
+ int (*eq)(Query *self, Query *o);
208
+ void (*destroy_i)(Query *self);
209
+ Weight *(*create_weight_i)(Query *self, Searcher *searcher);
210
+ MatchVector *(*get_matchv_i)(Query *self, MatchVector *mv, TermVector *tv);
211
+ enum QUERY_TYPE type;
242
212
  };
243
213
 
244
214
  /* Internal Query Functions */
245
- extern Query *q_create();
246
215
  extern Similarity *q_get_similarity_i(Query *self, Searcher *searcher);
247
216
  extern void q_destroy_i(Query *self);
248
217
  extern Weight *q_create_weight_unsup(Query *self, Searcher *searcher);
249
218
 
250
-
251
219
  extern void q_deref(Query *self);
220
+ extern const char *q_get_query_name(enum QUERY_TYPE type);
252
221
  extern Weight *q_weight(Query *self, Searcher *searcher);
253
222
  extern Query *q_combine(Query **queries, int q_cnt);
254
- extern uint q_hash(Query *self);
223
+ extern ulong q_hash(Query *self);
255
224
  extern int q_eq(Query *self, Query *o);
225
+ extern Query *q_create(size_t size);
226
+ #define q_new(type) q_create(sizeof(type))
256
227
 
257
228
  /***************************************************************************
258
- *
259
229
  * TermQuery
260
- *
261
230
  ***************************************************************************/
262
231
 
263
232
  typedef struct TermQuery
264
233
  {
265
- Term *term;
234
+ Query super;
235
+ char *field;
236
+ char *term;
266
237
  } TermQuery;
267
238
 
268
- extern Query *tq_create(Term *term);
239
+ Query *tq_new(const char *field, const char *term);
269
240
 
270
241
  /***************************************************************************
271
- *
272
242
  * BooleanQuery
273
- *
274
243
  ***************************************************************************/
275
244
 
276
- /***************************************************************************
277
- * BooleanClause
278
- ***************************************************************************/
245
+ /* *** BooleanClause *** */
279
246
 
280
247
  enum BC_TYPE
281
248
  {
@@ -284,22 +251,20 @@ enum BC_TYPE
284
251
  BC_MUST_NOT
285
252
  };
286
253
 
287
- typedef struct BooleanClause {
254
+ typedef struct BooleanClause
255
+ {
288
256
  int ref_cnt;
289
257
  Query *query;
290
- Query *rewritten;
291
258
  unsigned int occur : 4;
292
259
  bool is_prohibited : 1;
293
260
  bool is_required : 1;
294
261
  } BooleanClause;
295
262
 
296
- extern BooleanClause *bc_create(Query *query, unsigned int occur);
263
+ extern BooleanClause *bc_new(Query *query, enum BC_TYPE occur);
297
264
  extern void bc_deref(BooleanClause *self);
298
- extern void bc_set_occur(BooleanClause *self, unsigned int occur);
265
+ extern void bc_set_occur(BooleanClause *self, enum BC_TYPE occur);
299
266
 
300
- /***************************************************************************
301
- * BooleanQuery
302
- ***************************************************************************/
267
+ /* *** BooleanQuery *** */
303
268
 
304
269
  #define DEFAULT_MAX_CLAUSE_COUNT 1024
305
270
  #define BOOLEAN_CLAUSES_START_CAPA 4
@@ -307,172 +272,184 @@ extern void bc_set_occur(BooleanClause *self, unsigned int occur);
307
272
 
308
273
  typedef struct BooleanQuery
309
274
  {
310
- bool coord_disabled;
311
- int max_clause_cnt;
312
- int clause_cnt;
313
- int clause_capa;
314
- float original_boost;
275
+ Query super;
276
+ bool coord_disabled;
277
+ int max_clause_cnt;
278
+ int clause_cnt;
279
+ int clause_capa;
280
+ float original_boost;
315
281
  BooleanClause **clauses;
316
- Similarity *similarity;
282
+ Similarity *similarity;
317
283
  } BooleanQuery;
318
284
 
319
- extern Query *bq_create(bool coord_disabled);
285
+ extern Query *bq_new(bool coord_disabled);
320
286
  extern BooleanClause *bq_add_query(Query *self, Query *sub_query,
321
- unsigned int occur);
287
+ enum BC_TYPE occur);
288
+ extern BooleanClause *bq_add_query_nr(Query *self, Query *sub_query,
289
+ enum BC_TYPE occur);
322
290
  extern BooleanClause *bq_add_clause(Query *self, BooleanClause *bc);
291
+ extern BooleanClause *bq_add_clause_nr(Query *self, BooleanClause *bc);
323
292
 
324
293
  /***************************************************************************
325
- *
326
294
  * PhraseQuery
327
- *
328
295
  ***************************************************************************/
329
296
 
330
297
  #define PHQ_INIT_CAPA 4
331
298
  typedef struct PhraseQuery
332
299
  {
333
- int slop;
334
- Term **terms;
335
- int *positions;
336
- int t_cnt;
337
- int t_capa;
338
- char *field;
300
+ Query super;
301
+ int slop;
302
+ char *field;
303
+ PhrasePosition *positions;
304
+ int pos_cnt;
305
+ int pos_capa;
339
306
  } PhraseQuery;
340
307
 
341
- extern Query *phq_create();
342
- extern void phq_add_term(Query *self, Term *term, int pos_inc);
308
+ extern Query *phq_new(const char *field);
309
+ extern void phq_add_term(Query *self, const char *term, int pos_inc);
310
+ extern void phq_add_term_abs(Query *self, const char *term, int position);
311
+ extern void phq_append_multi_term(Query *self, const char *term);
343
312
 
344
313
  /***************************************************************************
345
- *
346
- * MultiPhraseQuery
347
- *
314
+ * MultiTermQuery
348
315
  ***************************************************************************/
349
316
 
350
- typedef struct MultiPhraseQuery
317
+ #define MULTI_TERM_QUERY_MAX_TERMS 256
318
+ typedef struct MultiTermQuery
351
319
  {
352
- int slop;
353
- Term ***terms;
354
- int *positions;
355
- int *pt_cnt;
356
- int t_cnt;
357
- int t_capa;
358
- char *field;
359
- } MultiPhraseQuery;
320
+ Query super;
321
+ char *field;
322
+ PriorityQueue *boosted_terms;
323
+ float min_boost;
324
+ } MultiTermQuery;
360
325
 
361
- extern Query *mphq_create();
362
- extern void mphq_add_terms(Query *self, Term **ts, int t_cnt, int pos_inc);
326
+ extern void multi_tq_add_term(Query *self, const char *term);
327
+ extern void multi_tq_add_term_boost(Query *self, const char *term, float boost);
328
+ extern Query *multi_tq_new(const char *field);
329
+ extern Query *multi_tq_new_conf(const char *field, int max_terms,
330
+ float min_boost);
331
+
332
+ #define MTQMaxTerms(query) (((MTQSubQuery *)(query))->max_terms)
333
+ typedef struct MTQSubQuery
334
+ {
335
+ Query super;
336
+ int max_terms;
337
+ } MTQSubQuery;
363
338
 
364
339
  /***************************************************************************
365
- *
366
340
  * PrefixQuery
367
- *
368
341
  ***************************************************************************/
369
342
 
370
- extern Query *prefixq_create(Term *prefix);
343
+ #define PREFIX_QUERY_MAX_TERMS 256
344
+
345
+
346
+ typedef struct PrefixQuery
347
+ {
348
+ MTQSubQuery super;
349
+ char *field;
350
+ char *prefix;
351
+ } PrefixQuery;
352
+
353
+ extern Query *prefixq_new(const char *field, const char *prefix);
371
354
 
372
355
  /***************************************************************************
373
- *
374
356
  * WildCardQuery
375
- *
376
357
  ***************************************************************************/
377
358
 
378
359
  #define WILD_CHAR '?'
379
360
  #define WILD_STRING '*'
361
+ #define WILD_CARD_QUERY_MAX_TERMS 256
362
+
363
+ typedef struct WildCardQuery
364
+ {
365
+ MTQSubQuery super;
366
+ char *field;
367
+ char *pattern;
368
+ } WildCardQuery;
369
+
380
370
 
381
- extern Query *wcq_create(Term *term);
382
- extern bool wc_match(char *pattern, char *text);
371
+ extern Query *wcq_new(const char *field, const char *pattern);
372
+ extern bool wc_match(const char *pattern, const char *text);
383
373
 
384
374
  /***************************************************************************
385
- *
386
375
  * FuzzyQuery
387
- *
388
376
  ***************************************************************************/
389
377
 
390
- #define DEF_MIN_SIM 0.5
378
+ #define DEF_MIN_SIM 0.5f
391
379
  #define DEF_PRE_LEN 0
380
+ #define DEF_MAX_TERMS 256
392
381
  #define TYPICAL_LONGEST_WORD 20
393
382
 
394
383
  typedef struct FuzzyQuery
395
384
  {
396
- Term *term;
397
- char *text; /* term text after prefix */
398
- int text_len;
399
- int pre_len;
400
- float min_sim;
401
- float scale_factor;
402
- int max_distances[TYPICAL_LONGEST_WORD];
403
- int *da;
404
- int da_capa;
385
+ MTQSubQuery super;
386
+ char *field;
387
+ char *term;
388
+ const char *text; /* term text after prefix */
389
+ int text_len;
390
+ int pre_len;
391
+ float min_sim;
392
+ float scale_factor;
393
+ int max_distances[TYPICAL_LONGEST_WORD];
394
+ int *da;
405
395
  } FuzzyQuery;
406
396
 
407
- extern Query *fuzq_create(Term *term);
408
- extern Query *fuzq_create_mp(Term *term, float min_sim, int pre_len);
397
+ extern Query *fuzq_new(const char *term, const char *field);
398
+ extern Query *fuzq_new_conf(const char *field, const char *term,
399
+ float min_sim, int pre_len, int max_terms);
409
400
 
410
401
  /***************************************************************************
411
- *
412
402
  * ConstantScoreQuery
413
- *
414
403
  ***************************************************************************/
415
404
 
416
- extern Query *csq_create(Filter *filter);
405
+ typedef struct ConstantScoreQuery
406
+ {
407
+ Query super;
408
+ Filter *filter;
409
+ } ConstantScoreQuery;
410
+
411
+ extern Query *csq_new(Filter *filter);
412
+ extern Query *csq_new_nr(Filter *filter);
417
413
 
418
414
  /***************************************************************************
419
- *
420
- * FilteredQueryQuery
421
- *
415
+ * FilteredQuery
422
416
  ***************************************************************************/
423
417
 
424
418
  typedef struct FilteredQuery
425
419
  {
426
- Query *query;
420
+ Query super;
421
+ Query *query;
427
422
  Filter *filter;
428
423
  } FilteredQuery;
429
424
 
430
- extern Query *fq_create(Query *query, Filter *filter);
425
+ extern Query *fq_new(Query *query, Filter *filter);
431
426
 
432
427
  /***************************************************************************
433
- *
434
428
  * MatchAllQuery
435
- *
436
429
  ***************************************************************************/
437
430
 
438
- extern Query *maq_create();
431
+ extern Query *maq_new();
439
432
 
440
433
  /***************************************************************************
441
- *
442
434
  * RangeQuery
443
- *
444
435
  ***************************************************************************/
445
436
 
446
- typedef struct Range
447
- {
448
- char *field;
449
- char *lower_term;
450
- char *upper_term;
451
- bool include_lower : 1;
452
- bool include_upper : 1;
453
- } Range;
454
-
455
- extern Query *rq_create(const char *field, char *lower_term,
456
- char *upper_term, bool include_lower, bool include_upper);
457
- extern Query *rq_create_less(const char *field, char *upper_term,
458
- bool include_upper);
459
- extern Query *rq_create_more(const char *field, char *lower_term,
460
- bool include_lower);
437
+ extern Query *rq_new(const char *field, const char *lower_term,
438
+ const char *upper_term, bool include_lower,
439
+ bool include_upper);
440
+ extern Query *rq_new_less(const char *field, const char *upper_term,
441
+ bool include_upper);
442
+ extern Query *rq_new_more(const char *field, const char *lower_term,
443
+ bool include_lower);
461
444
 
462
445
  /***************************************************************************
463
- *
464
446
  * SpanQuery
465
- *
466
- ***************************************************************************/
467
-
468
- /***************************************************************************
469
- * SpanEnum
470
447
  ***************************************************************************/
471
448
 
449
+ /* ** SpanEnum ** */
472
450
  typedef struct SpanEnum SpanEnum;
473
451
  struct SpanEnum
474
452
  {
475
- void *data;
476
453
  Query *query;
477
454
  bool (*next)(SpanEnum *self);
478
455
  bool (*skip_to)(SpanEnum *self, int target_doc);
@@ -483,107 +460,26 @@ struct SpanEnum
483
460
  void (*destroy)(SpanEnum *self);
484
461
  };
485
462
 
486
- /***************************************************************************
487
- * SpanTermEnum
488
- ***************************************************************************/
489
-
490
- typedef struct SpanTermEnum SpanTermEnum;
491
- struct SpanTermEnum
492
- {
493
- TermDocEnum *positions;
494
- int position;
495
- int doc;
496
- int count;
497
- int freq;
498
- };
499
-
500
- extern SpanEnum *spante_create(Query *query, IndexReader *ir);
501
-
502
- /***************************************************************************
503
- * SpanFirstEnum
504
- ***************************************************************************/
505
-
506
- extern SpanEnum *spanfe_create(Query *query, IndexReader *ir);
507
-
508
- /***************************************************************************
509
- * SpanOrEnum
510
- ***************************************************************************/
511
-
512
- typedef struct SpanOrEnum
513
- {
514
- PriorityQueue *queue;
515
- SpanEnum **span_enums;
516
- int s_cnt;
517
- bool first_time;
518
- } SpanOrEnum;
519
-
520
- extern SpanEnum *spanoe_create(Query *query, IndexReader *ir);
521
-
522
- /***************************************************************************
523
- * SpanEnumCell
524
- ***************************************************************************/
525
-
526
- typedef struct SpanEnumCell
527
- {
528
- SpanEnum *parent;
529
- SpanEnum *se;
530
- int index;
531
- int length;
532
- } SpanEnumCell;
533
-
534
- extern SpanEnum *spanec_create(Query *parent, Query *child, int index);
535
-
536
- /***************************************************************************
537
- * SpanNearEnum
538
- ***************************************************************************/
539
-
540
- typedef struct SpanNearEnum
541
- {
542
- SpanEnum **span_enums;
543
- int s_cnt;
544
- int slop;
545
- int current;
546
- bool first_time : 1;
547
- bool in_order : 1;
548
- int doc;
549
- int start;
550
- int end;
551
- } SpanNearEnum;
552
-
553
- extern SpanEnum *spanne_create(Query *query, IndexReader *ir);
554
-
555
- /***************************************************************************
556
- * SpanNotEnum
557
- ***************************************************************************/
558
-
559
- typedef struct SpanNotEnum
463
+ /* ** SpanQuery ** */
464
+ typedef struct SpanQuery
560
465
  {
561
- SpanEnum *inc;
562
- SpanEnum *exc;
563
- bool more_inc : 1;
564
- bool more_exc : 1;
565
- } SpanNotEnum;
566
-
567
- extern SpanEnum *spanxe_create(Query *query, IndexReader *ir);
466
+ Query super;
467
+ char *field;
468
+ SpanEnum *(*get_spans)(Query *self, IndexReader *ir);
469
+ HashSet *(*get_terms)(Query *self);
470
+ } SpanQuery;
568
471
 
569
472
  /***************************************************************************
570
- * SpanQuery
473
+ * SpanTermQuery
571
474
  ***************************************************************************/
572
475
 
573
- typedef struct SpanQuery SpanQuery;
574
- struct SpanQuery
476
+ typedef struct SpanTermQuery
575
477
  {
576
- void *data;
577
- char *field;
578
- SpanEnum *(*get_spans)(Query *self, IndexReader *ir);
579
- HashSet *(*get_terms)(Query *self);
580
- };
478
+ SpanQuery super;
479
+ char *term;
480
+ } SpanTermQuery;
481
+ extern Query *spantq_new(const char *field, const char *term);
581
482
 
582
- /***************************************************************************
583
- * SpanTermQuery
584
- ***************************************************************************/
585
-
586
- extern Query *spantq_create(Term *term);
587
483
 
588
484
  /***************************************************************************
589
485
  * SpanFirstQuery
@@ -591,11 +487,13 @@ extern Query *spantq_create(Term *term);
591
487
 
592
488
  typedef struct SpanFirstQuery
593
489
  {
594
- int end;
595
- Query *match;
490
+ SpanQuery super;
491
+ int end;
492
+ Query *match;
596
493
  } SpanFirstQuery;
597
494
 
598
- extern Query *spanfq_create(Query *match, int end);
495
+ extern Query *spanfq_new(Query *match, int end);
496
+ extern Query *spanfq_new_nr(Query *match, int end);
599
497
 
600
498
  /***************************************************************************
601
499
  * SpanOrQuery
@@ -603,11 +501,15 @@ extern Query *spanfq_create(Query *match, int end);
603
501
 
604
502
  typedef struct SpanOrQuery
605
503
  {
606
- Query **clauses;
607
- int c_cnt;
504
+ SpanQuery super;
505
+ Query **clauses;
506
+ int c_cnt;
507
+ int c_capa;
608
508
  } SpanOrQuery;
609
509
 
610
- extern Query *spanoq_create(Query **clauses, int c_cnt);
510
+ extern Query *spanoq_new();
511
+ extern Query *spanoq_add_clause(Query *self, Query *clause);
512
+ extern Query *spanoq_add_clause_nr(Query *self, Query *clause);
611
513
 
612
514
  /***************************************************************************
613
515
  * SpanNearQuery
@@ -615,15 +517,17 @@ extern Query *spanoq_create(Query **clauses, int c_cnt);
615
517
 
616
518
  typedef struct SpanNearQuery
617
519
  {
618
- Query **clauses;
619
- int c_cnt;
620
- int slop;
621
- bool in_order;
520
+ SpanQuery super;
521
+ Query **clauses;
522
+ int c_cnt;
523
+ int c_capa;
524
+ int slop;
525
+ bool in_order : 1;
622
526
  } SpanNearQuery;
623
527
 
624
- extern Query *spannq_create(Query **clauses, int c_cnt, int slop,
625
- bool in_order);
626
-
528
+ extern Query *spannq_new(int slop, bool in_order);
529
+ extern Query *spannq_add_clause(Query *self, Query *clause);
530
+ extern Query *spannq_add_clause_nr(Query *self, Query *clause);
627
531
 
628
532
  /***************************************************************************
629
533
  * SpanNotQuery
@@ -631,11 +535,15 @@ extern Query *spannq_create(Query **clauses, int c_cnt, int slop,
631
535
 
632
536
  typedef struct SpanNotQuery
633
537
  {
634
- Query *inc;
635
- Query *exc;
538
+ SpanQuery super;
539
+ Query *inc;
540
+ Query *exc;
636
541
  } SpanNotQuery;
637
542
 
638
- extern Query *spanxq_create(Query *inc, Query *exc);
543
+ extern Query *spanxq_new(Query *inc, Query *exc);
544
+ extern Query *spanxq_new_nr(Query *inc, Query *exc);
545
+
546
+
639
547
 
640
548
  /***************************************************************************
641
549
  *
@@ -643,13 +551,15 @@ extern Query *spanxq_create(Query *inc, Query *exc);
643
551
  *
644
552
  ***************************************************************************/
645
553
 
646
- #define SCORER_NULLIFY(mscorer) mscorer->destroy(mscorer); mscorer = NULL
554
+ #define SCORER_NULLIFY(mscorer) do {\
555
+ (mscorer)->destroy(mscorer);\
556
+ (mscorer) = NULL;\
557
+ } while (0)
647
558
 
648
559
  struct Scorer
649
560
  {
650
- void *data;
651
561
  Similarity *similarity;
652
- int doc;
562
+ int doc;
653
563
  float (*score)(Scorer *self);
654
564
  bool (*next)(Scorer *self);
655
565
  bool (*skip_to)(Scorer *self, int doc_num);
@@ -657,300 +567,80 @@ struct Scorer
657
567
  void (*destroy)(Scorer *self);
658
568
  };
659
569
 
570
+ #define scorer_new(type, similarity) scorer_create(sizeof(type), similarity)
660
571
  /* Internal Scorer Function */
661
572
  extern void scorer_destroy_i(Scorer *self);
662
-
663
- extern Scorer *scorer_create(Similarity *similarity);
573
+ extern Scorer *scorer_create(size_t size, Similarity *similarity);
664
574
  extern bool scorer_less_than(void *p1, void *p2);
665
- extern bool scorer_doc_less_than(void *p1, void *p2);
575
+ extern bool scorer_doc_less_than(const Scorer *s1, const Scorer *s2);
666
576
  extern int scorer_doc_cmp(const void *p1, const void *p2);
667
577
 
668
- /***************************************************************************
669
- *
670
- * TermScorer
671
- *
672
- ***************************************************************************/
673
-
674
- #define SCORE_CACHE_SIZE 32
675
- #define TDE_READ_SIZE 32
676
-
677
- typedef struct TermScorer
678
- {
679
- int docs[TDE_READ_SIZE];
680
- int freqs[TDE_READ_SIZE];
681
- int pointer;
682
- int pointer_max;
683
- float score_cache[SCORE_CACHE_SIZE];
684
- Weight *weight;
685
- TermDocEnum *tde;
686
- uchar *norms;
687
- float weight_value;
688
- } TermScorer;
689
-
690
- extern Scorer *tsc_create(Weight *weight, TermDocEnum *tde, uchar *norms);
691
-
692
- /***************************************************************************
693
- *
694
- * BooleanScorer
695
- *
696
- ***************************************************************************/
697
-
698
- /***************************************************************************
699
- * Coordinator
700
- ***************************************************************************/
701
-
702
- typedef struct Coordinator
703
- {
704
- int max_coord;
705
- float *coord_factors;
706
- Similarity *similarity;
707
- int num_matches;
708
- } Coordinator;
709
-
710
- /***************************************************************************
711
- * DisjunctionSumScorer
712
- ***************************************************************************/
713
-
714
- typedef struct DisjunctionSumScorer
715
- {
716
- float cum_score;
717
- int num_matches;
718
- int min_num_matches;
719
- Scorer **sub_scorers;
720
- int ss_cnt;
721
- PriorityQueue *scorer_queue;
722
- Coordinator *coordinator;
723
- } DisjunctionSumScorer;
724
-
725
- /***************************************************************************
726
- * ConjunctionScorer
727
- ***************************************************************************/
728
-
729
- typedef struct ConjunctionScorer
730
- {
731
- bool first_time : 1;
732
- bool more : 1;
733
- float coord;
734
- int ss_cnt;
735
- int ss_capa;
736
- Scorer **sub_scorers;
737
- int first;
738
- int last;
739
- Coordinator *coordinator;
740
- int last_scored_doc;
741
- } ConjunctionScorer;
742
-
743
- /***************************************************************************
744
- * SingleMatchScorer
745
- ***************************************************************************/
746
-
747
- typedef struct SingleMatchScorer
748
- {
749
- Coordinator *coordinator;
750
- Scorer *scorer;
751
- } SingleMatchScorer;
752
-
753
- /***************************************************************************
754
- * ReqOptSumScorer
755
- ***************************************************************************/
756
-
757
- typedef struct ReqOptSumScorer
758
- {
759
- Scorer *req_scorer;
760
- Scorer *opt_scorer;
761
- bool first_time_opt;
762
- } ReqOptSumScorer;
763
-
764
- /***************************************************************************
765
- * ReqExclScorer
766
- ***************************************************************************/
767
-
768
- typedef struct ReqExclScorer
769
- {
770
- Scorer *req_scorer;
771
- Scorer *excl_scorer;
772
- bool first_time;
773
- } ReqExclScorer;
774
-
775
- /***************************************************************************
776
- * BooleanScorer
777
- ***************************************************************************/
778
-
779
- typedef struct BooleanScorer
780
- {
781
- Scorer **required_scorers;
782
- int rs_cnt;
783
- int rs_capa;
784
- Scorer **optional_scorers;
785
- int os_cnt;
786
- int os_capa;
787
- Scorer **prohibited_scorers;
788
- int ps_cnt;
789
- int ps_capa;
790
- Scorer *counting_sum_scorer;
791
- Coordinator *coordinator;
792
- } BooleanScorer;
793
-
794
- extern Scorer *bsc_create(Similarity *similarity);
795
- extern void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur);
796
-
797
- /***************************************************************************
798
- *
799
- * PhraseScorer
800
- *
801
- ***************************************************************************/
802
-
803
- /***************************************************************************
804
- * PhrasePosition
805
- ***************************************************************************/
806
- typedef struct PhrasePosition
807
- {
808
- TermDocEnum *tpe;
809
- int offset;
810
- int count;
811
- int doc;
812
- int position;
813
- } PhrasePosition;
814
-
815
- extern PhrasePosition *pp_create(TermDocEnum *tpe, int offset);
816
-
817
- /***************************************************************************
818
- * PhraseScorer
819
- ***************************************************************************/
820
-
821
- typedef struct PhraseScorer
822
- {
823
- float freq;
824
- uchar *norms;
825
- float value;
826
- Weight *weight;
827
- bool first_time : 1;
828
- bool more : 1;
829
- int pp_first;
830
- int pp_last;
831
- int pp_cnt;
832
- PhrasePosition **phrase_pos;
833
- float (*phrase_freq)(Scorer *self);
834
- int slop;
835
- } PhraseScorer;
836
-
837
- extern Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
838
- int *positions, int t_cnt, Similarity *similarity, uchar *norms);
839
-
840
- /***************************************************************************
841
- * ExactPhraseScorer
842
- ***************************************************************************/
843
-
844
- extern Scorer *exact_phrase_scorer_create(Weight *weight,
845
- TermDocEnum **term_pos_enum, int *positions, int t_cnt,
846
- Similarity *similarity, uchar *norms);
847
-
848
- /***************************************************************************
849
- * SloppyPhraseScorer
850
- ***************************************************************************/
851
-
852
- extern Scorer *sloppy_phrase_scorer_create(Weight *weight,
853
- TermDocEnum **term_pos_enum, int *positions, int t_cnt,
854
- Similarity *similarity, int slop, uchar *norms);
855
-
856
- /***************************************************************************
857
- *
858
- * ConstantScoreScorer
859
- *
860
- ***************************************************************************/
861
-
862
- typedef struct ConstantScoreScorer
863
- {
864
- BitVector *bv;
865
- float score;
866
- } ConstantScoreScorer;
867
-
868
- extern Scorer *cssc_create(Weight *weight, IndexReader *ir);
869
-
870
-
871
- /***************************************************************************
872
- *
873
- * MatchAllScorer
874
- *
875
- ***************************************************************************/
876
-
877
- typedef struct MatchAllScorer
878
- {
879
- IndexReader *ir;
880
- int max_doc;
881
- float score;
882
- } MatchAllScorer;
883
-
884
- extern Scorer *masc_create(Weight *weight, IndexReader *ir);
885
-
886
-
887
- /***************************************************************************
888
- *
889
- * SpanScorer
890
- *
891
- ***************************************************************************/
892
-
893
- typedef struct SpanScorer
894
- {
895
- bool first_time : 1;
896
- bool more : 1;
897
- IndexReader *ir;
898
- SpanEnum *spans;
899
- Similarity *sim;
900
- uchar *norms;
901
- Weight *weight;
902
- float value;
903
- float freq;
904
- } SpanScorer;
905
-
906
- extern Scorer *spansc_create(Weight *weight, IndexReader *ir);
907
-
908
578
  /***************************************************************************
909
579
  *
910
580
  * Sort
911
581
  *
912
582
  ***************************************************************************/
913
583
 
914
- enum SORT_TYPE {
584
+ enum SORT_TYPE
585
+ {
915
586
  SORT_TYPE_SCORE,
916
587
  SORT_TYPE_DOC,
588
+ SORT_TYPE_BYTE,
917
589
  SORT_TYPE_INTEGER,
918
590
  SORT_TYPE_FLOAT,
919
591
  SORT_TYPE_STRING,
920
592
  SORT_TYPE_AUTO
921
593
  };
922
594
 
595
+ /***************************************************************************
596
+ * Comparable
597
+ ***************************************************************************/
598
+
599
+ typedef struct Comparable
600
+ {
601
+ int type;
602
+ union {
603
+ int i;
604
+ float f;
605
+ char *s;
606
+ void *p;
607
+ } val;
608
+ bool reverse : 1;
609
+ } Comparable;
610
+
923
611
  /***************************************************************************
924
612
  * SortField
925
613
  ***************************************************************************/
926
614
 
927
615
  typedef struct SortField
928
616
  {
929
- mutex_t mutex;
930
- char *field;
931
- int type;
932
- bool reverse : 1;
933
- void *index;
934
- int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
935
- void *(*create_index)(int size);
936
- void (*destroy_index)(void *p);
937
- void (*handle_term)(void *index, TermDocEnum *tde, char *text);
617
+ mutex_t mutex;
618
+ char *field;
619
+ enum SORT_TYPE type;
620
+ bool reverse : 1;
621
+ void *index;
622
+ int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
623
+ void (*get_val)(void *index_ptr, Hit *hit1, Comparable *comparable);
624
+ void *(*create_index)(int size);
625
+ void (*destroy_index)(void *p);
626
+ void (*handle_term)(void *index, TermDocEnum *tde, char *text);
938
627
  } SortField;
939
628
 
940
- extern SortField *sort_field_create(char *field, int type, bool reverse);
941
- extern SortField *sort_field_score_create(bool reverse);
942
- extern SortField *sort_field_doc_create(bool reverse);
943
- extern SortField *sort_field_int_create(char *field, bool reverse);
944
- extern SortField *sort_field_float_create(char *field, bool reverse);
945
- extern SortField *sort_field_string_create(char *field, bool reverse);
946
- extern SortField *sort_field_auto_create(char *field, bool reverse);
629
+ extern SortField *sort_field_new(char *field, enum SORT_TYPE type, bool reverse);
630
+ extern SortField *sort_field_score_new(bool reverse);
631
+ extern SortField *sort_field_doc_new(bool reverse);
632
+ extern SortField *sort_field_int_new(char *field, bool reverse);
633
+ extern SortField *sort_field_byte_new(char *field, bool reverse);
634
+ extern SortField *sort_field_float_new(char *field, bool reverse);
635
+ extern SortField *sort_field_string_new(char *field, bool reverse);
636
+ extern SortField *sort_field_auto_new(char *field, bool reverse);
947
637
  extern void sort_field_destroy(void *p);
948
638
  extern char *sort_field_to_s(SortField *self);
949
639
 
950
- extern SortField SORT_FIELD_SCORE;
951
- extern SortField SORT_FIELD_SCORE_REV;
952
- extern SortField SORT_FIELD_DOC;
953
- extern SortField SORT_FIELD_DOC_REV;
640
+ extern const SortField SORT_FIELD_SCORE;
641
+ extern const SortField SORT_FIELD_SCORE_REV;
642
+ extern const SortField SORT_FIELD_DOC;
643
+ extern const SortField SORT_FIELD_DOC_REV;
954
644
 
955
645
  /***************************************************************************
956
646
  * Sort
@@ -959,12 +649,13 @@ extern SortField SORT_FIELD_DOC_REV;
959
649
  typedef struct Sort
960
650
  {
961
651
  SortField **sort_fields;
962
- int sf_cnt;
963
- int sf_capa;
652
+ int size;
653
+ int capa;
654
+ int start;
964
655
  bool destroy_all : 1;
965
656
  } Sort;
966
657
 
967
- extern Sort *sort_create();
658
+ extern Sort *sort_new();
968
659
  extern void sort_destroy(void *p);
969
660
  extern void sort_add_sort_field(Sort *self, SortField *sf);
970
661
  extern void sort_clear(Sort *self);
@@ -978,7 +669,27 @@ extern Hit *fshq_pq_pop(PriorityQueue *pq);
978
669
  extern void fshq_pq_down(PriorityQueue *pq);
979
670
  extern void fshq_pq_insert(PriorityQueue *pq, Hit *hit);
980
671
  extern void fshq_pq_destroy(PriorityQueue *pq);
981
- extern PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
672
+ extern PriorityQueue *fshq_pq_new(int size, Sort *sort, IndexReader *ir);
673
+ extern Hit *fshq_pq_pop_fd(PriorityQueue *pq);
674
+
675
+ /***************************************************************************
676
+ * FieldDoc
677
+ ***************************************************************************/
678
+
679
+ typedef struct FieldDoc
680
+ {
681
+ Hit hit;
682
+ int size;
683
+ Comparable comparables[];
684
+ } FieldDoc;
685
+
686
+ extern void fd_destroy(FieldDoc *fd);
687
+
688
+ /***************************************************************************
689
+ * FieldDocSortedHitQueue
690
+ ***************************************************************************/
691
+
692
+ extern bool fdshq_lt(FieldDoc *fd1, FieldDoc *fd2);
982
693
 
983
694
  /***************************************************************************
984
695
  *
@@ -986,47 +697,91 @@ extern PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
986
697
  *
987
698
  ***************************************************************************/
988
699
 
989
- struct Searcher {
990
- void *data;
991
- IndexReader *ir;
700
+ typedef bool (*filter_ft)(int doc_num, float score, Searcher *self);
701
+
702
+ struct Searcher
703
+ {
992
704
  Similarity *similarity;
993
- bool close_ir : 1;
994
- int (*doc_freq)(Searcher *self, Term *term);
995
- int *(*doc_freqs)(Searcher *self, Term **terms, int tcnt);
705
+ int (*doc_freq)(Searcher *self, const char *field,
706
+ const char *term);
996
707
  Document *(*get_doc)(Searcher *self, int doc_num);
708
+ LazyDoc *(*get_lazy_doc)(Searcher *self, int doc_num);
997
709
  int (*max_doc)(Searcher *self);
998
710
  Weight *(*create_weight)(Searcher *self, Query *query);
999
711
  TopDocs *(*search)(Searcher *self, Query *query, int first_doc,
1000
- int num_docs, Filter *filter, Sort *sort);
712
+ int num_docs, Filter *filter, Sort *sort,
713
+ filter_ft filter_func,
714
+ bool load_fields);
715
+ TopDocs *(*search_w)(Searcher *self, Weight *weight, int first_doc,
716
+ int num_docs, Filter *filter, Sort *sort,
717
+ filter_ft filter_func,
718
+ bool load_fields);
1001
719
  void (*search_each)(Searcher *self, Query *query, Filter *filter,
1002
- void (*fn)(Searcher *, int, float, void *), void *arg);
720
+ filter_ft filter_func,
721
+ void (*fn)(Searcher *, int, float, void *),
722
+ void *arg);
1003
723
  void (*search_each_w)(Searcher *self, Weight *weight,
1004
- Filter *filter, void (*fn)(Searcher *, int, float, void *),
724
+ Filter *filter,
725
+ filter_ft filter_func,
726
+ void (*fn)(Searcher *, int, float, void *),
1005
727
  void *arg);
1006
728
  Query *(*rewrite)(Searcher *self, Query *original);
1007
729
  Explanation *(*explain)(Searcher *self, Query *query, int doc_num);
1008
730
  Explanation *(*explain_w)(Searcher *self, Weight *weight, int doc_num);
731
+ TermVector *(*get_term_vector)(Searcher *self, const int doc_num,
732
+ const char *field);
1009
733
  Similarity *(*get_similarity)(Searcher *self);
1010
734
  void (*close)(Searcher *self);
735
+ void *arg; /* used to pass values to Searcher functions */
1011
736
  };
1012
737
 
1013
- #define sea_doc_freq(s, t) s->doc_freq(s, t)
1014
- #define sea_doc_freqs(s, t, c) s->doc_freqs(s, t, c)
1015
- #define sea_get_doc(s, dn) s->get_doc(s, dn)
1016
- #define sea_max_doc(s) s->max_doc(s)
1017
- #define sea_search(s, q, fd, nd, filt, sort)\
1018
- s->search(s, q, fd, nd, filt, sort)
1019
- #define sea_search_each(s, q, filt, fn, arg)\
1020
- s->search_each(s, q, filt, fn, arg)
1021
- #define sea_search_each_w(s, q, filt, fn, arg)\
1022
- s->search_each_w(s, q, filt, fn, arg)
1023
- #define sea_rewrite(s, q) s->rewrite(s, q)
1024
- #define sea_explain(s, q, dn) s->explain(s, q, dn)
1025
- #define sea_explain_w(s, q, dn) s->explain_w(s, q, dn)
1026
- #define sea_get_similarity(s) s->get_similarity(s)
1027
- #define sea_close(s) s->close(s)
1028
-
1029
- extern Searcher *sea_create(IndexReader *ir);
738
+ #define searcher_doc_freq(s, t) s->doc_freq(s, t)
739
+ #define searcher_get_doc(s, dn) s->get_doc(s, dn)
740
+ #define searcher_get_lazy_doc(s, dn) s->get_lazy_doc(s, dn)
741
+ #define searcher_max_doc(s) s->max_doc(s)
742
+ #define searcher_rewrite(s, q) s->rewrite(s, q)
743
+ #define searcher_explain(s, q, dn) s->explain(s, q, dn)
744
+ #define searcher_explain_w(s, q, dn) s->explain_w(s, q, dn)
745
+ #define searcher_get_similarity(s) s->get_similarity(s)
746
+ #define searcher_close(s) s->close(s)
747
+ #define searcher_search(s, q, fd, nd, filt, sort, ff)\
748
+ s->search(s, q, fd, nd, filt, sort, ff, false)
749
+ #define searcher_search_fd(s, q, fd, nd, filt, sort, ff)\
750
+ s->search(s, q, fd, nd, filt, sort, ff, true)
751
+ #define searcher_search_each(s, q, filt, ff, fn, arg)\
752
+ s->search_each(s, q, filt, ff, fn, arg)
753
+ #define searcher_search_each_w(s, q, filt, ff, fn, arg)\
754
+ s->search_each_w(s, q, filt, ff, fn, arg)
755
+
756
+
757
+ extern MatchVector *searcher_get_match_vector(Searcher *self,
758
+ Query *query,
759
+ const int doc_num,
760
+ const char *field);
761
+ extern char **searcher_highlight(Searcher *self,
762
+ Query *query,
763
+ const int doc_num,
764
+ const char *field,
765
+ const int excerpt_len,
766
+ const int num_excerpts,
767
+ const char *pre_tag,
768
+ const char *post_tag,
769
+ const char *ellipsis);
770
+
771
+ /***************************************************************************
772
+ *
773
+ * IndexSearcher
774
+ *
775
+ ***************************************************************************/
776
+
777
+ typedef struct IndexSearcher {
778
+ Searcher super;
779
+ IndexReader *ir;
780
+ bool close_ir : 1;
781
+ } IndexSearcher;
782
+
783
+ extern Searcher *isea_new(IndexReader *ir);
784
+ extern int isea_doc_freq(Searcher *self, const char *field, const char *term);
1030
785
 
1031
786
  /***************************************************************************
1032
787
  *
@@ -1036,15 +791,15 @@ extern Searcher *sea_create(IndexReader *ir);
1036
791
 
1037
792
  typedef struct MultiSearcher
1038
793
  {
1039
- int s_cnt;
1040
- Searcher **searchers;
1041
- int *starts;
1042
- int max_doc;
1043
- bool close_subs : 1;
794
+ Searcher super;
795
+ int s_cnt;
796
+ Searcher **searchers;
797
+ int *starts;
798
+ int max_doc;
799
+ bool close_subs : 1;
1044
800
  } MultiSearcher;
1045
801
 
1046
- extern Searcher *msea_create(Searcher **searchers, int s_cnt,
1047
- bool close_subs);
802
+ extern Searcher *msea_new(Searcher **searchers, int s_cnt, bool close_subs);
1048
803
 
1049
804
  /***************************************************************************
1050
805
  *
@@ -1052,93 +807,39 @@ extern Searcher *msea_create(Searcher **searchers, int s_cnt,
1052
807
  *
1053
808
  ***************************************************************************/
1054
809
 
1055
- #define CONC_WORDS 2
810
+ #define QP_CONC_WORDS 2
811
+ #define QP_MAX_CLAUSES 512
1056
812
 
1057
813
  typedef struct QParser
1058
814
  {
1059
815
  mutex_t mutex;
1060
- bool or_default : 1;
1061
- bool wild_lower : 1;
1062
- bool clean_str : 1;
1063
- bool handle_parse_errors : 1;
1064
- bool allow_any_fields : 1;
1065
- bool close_def_fields : 1;
1066
816
  int def_slop;
817
+ int max_clauses;
818
+ int phq_pos_inc;
1067
819
  char *qstr;
1068
820
  char *qstrp;
1069
- char buf[CONC_WORDS][MAX_WORD_SIZE];
821
+ char buf[QP_CONC_WORDS][MAX_WORD_SIZE];
1070
822
  int buf_index;
823
+ HashTable *field_cache;
1071
824
  HashSet *fields;
1072
825
  HashSet *fields_buf;
1073
826
  HashSet *def_fields;
1074
827
  HashSet *all_fields;
1075
828
  Analyzer *analyzer;
829
+ HashTable *ts_cache;
1076
830
  Query *result;
831
+ bool or_default : 1;
832
+ bool wild_lower : 1;
833
+ bool clean_str : 1;
834
+ bool handle_parse_errors : 1;
835
+ bool allow_any_fields : 1;
836
+ bool close_def_fields : 1;
1077
837
  } QParser;
1078
838
 
1079
- extern QParser *qp_create(HashSet *all_fields, HashSet *def_fields,
1080
- Analyzer *analyzer);
839
+ extern QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
840
+ Analyzer *analyzer);
1081
841
  extern void qp_destroy(QParser *self);
1082
842
  extern Query *qp_parse(QParser *self, char *qstr);
1083
843
  extern char *qp_clean_str(char *str);
1084
844
 
1085
- /***************************************************************************
1086
- *
1087
- * Index
1088
- *
1089
- ***************************************************************************/
1090
-
1091
- typedef struct Index
1092
- {
1093
- mutex_t mutex;
1094
- Store *store;
1095
- Analyzer *analyzer;
1096
- IndexReader *ir;
1097
- IndexWriter *iw;
1098
- Searcher *sea;
1099
- QParser *qp;
1100
- HashSet *key;
1101
- char *id_field;
1102
- char *def_field;
1103
- /* for IndexWriter */
1104
- bool use_compound_file : 1;
1105
- bool auto_flush : 1;
1106
- bool has_writes : 1;
1107
- bool check_latest : 1;
1108
- } Index;
1109
-
1110
- extern Index *index_create(Store *store, Analyzer *analyzer,
1111
- HashSet *def_fields, bool create);
1112
- extern void index_destroy(Index *self);
1113
- extern void index_flush(Index *self);
1114
- extern int index_size(Index *self);
1115
- extern void index_optimize(Index *self);
1116
- extern bool index_has_del(Index *self);
1117
- extern bool index_is_deleted(Index *self, int doc_num);
1118
- extern void index_add_doc(Index *self, Document *doc);
1119
- extern void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
1120
- extern void index_add_string(Index *self, char *str, Analyzer *analyzer);
1121
- extern void index_add_array(Index *self, Array *ary, Analyzer *analyzer);
1122
- extern TopDocs *index_search_str(Index *self, char *query, int first_doc,
1123
- int num_docs, Filter *filter, Sort *sort);
1124
- extern Query *index_get_query(Index *self, char *qstr);
1125
- extern Document *index_get_doc(Index *self, int doc_num);
1126
- extern Document *index_get_doc_ts(Index *self, int doc_num);
1127
- extern Document *index_get_doc_id(Index *self, char *id);
1128
- extern Document *index_get_doc_term(Index *self, Term *term);
1129
- extern void index_delete(Index *self, int doc_num);
1130
- extern void index_delete_term(Index *self, Term *term);
1131
- extern void index_delete_id(Index *self, char *id);
1132
- extern void index_delete_query(Index *self, Query *q, Filter *f);
1133
- extern void index_delete_query_str(Index *self, char *qstr, Filter *f);
1134
- extern int index_term_id(Index *self, Term *term);
1135
- extern Explanation *index_explain(Index *self, Query *q, int doc_num);
1136
- extern void index_auto_flush_ir(Index *self);
1137
- extern void index_auto_flush_iw(Index *self);
1138
-
1139
- extern inline void ensure_searcher_open(Index *self);
1140
- extern inline void ensure_reader_open(Index *self);
1141
- extern inline void ensure_writer_open(Index *self);
1142
-
1143
845
  #endif
1144
-