ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/search.h CHANGED
@@ -9,9 +9,6 @@ typedef struct Scorer Scorer;
9
9
  #include "bitvector.h"
10
10
  #include "similarity.h"
11
11
 
12
- #define term_set_create() \
13
- hs_create((hash_ft)&term_hash, (eq_ft)&term_eq, (free_ft)&term_destroy)
14
-
15
12
  /***************************************************************************
16
13
  *
17
14
  * Explanation
@@ -24,15 +21,45 @@ typedef struct Explanation
24
21
  float value;
25
22
  char *description;
26
23
  struct Explanation **details;
27
- int dcnt;
28
- int dcapa;
29
24
  } Explanation;
25
+
26
+ extern Explanation *expl_new(float value, const char *description, ...);
27
+ extern void expl_destroy(Explanation *expl);
28
+ extern Explanation *expl_add_detail(Explanation *expl, Explanation *detail);
29
+ extern char *expl_to_s_depth(Explanation *expl, int depth);
30
+ extern char *expl_to_html(Explanation *expl);
30
31
 
31
- extern Explanation *expl_create(float value, char *description);
32
- extern void expl_destoy(void *p);
33
- extern Explanation *expl_add_detail(Explanation *self, Explanation *detail);
34
- extern char *expl_to_s(Explanation *self, int depth);
35
- extern char *expl_to_html(Explanation *self);
32
+ #define expl_to_s(expl) expl_to_s_depth(expl, 0)
33
+
34
+ /***************************************************************************
35
+ *
36
+ * Highlighter
37
+ *
38
+ ***************************************************************************/
39
+
40
+ typedef struct MatchRange
41
+ {
42
+ int start;
43
+ int end;
44
+ int start_offset;
45
+ int end_offset;
46
+ double score;
47
+ } MatchRange;
48
+
49
+ #define MATCH_VECTOR_INIT_CAPA 8
50
+ typedef struct MatchVector
51
+ {
52
+ int size;
53
+ int capa;
54
+ MatchRange *matches;
55
+ } MatchVector;
56
+
57
+ extern MatchVector *matchv_new();
58
+ extern MatchVector *matchv_add(MatchVector *mp, int start, int end);
59
+ extern MatchVector *matchv_sort(MatchVector *self);
60
+ extern void matchv_destroy(MatchVector *self);
61
+ extern MatchVector *matchv_compact(MatchVector *self);
62
+ extern MatchVector *matchv_compact_with_breaks(MatchVector *self);
36
63
 
37
64
  /***************************************************************************
38
65
  *
@@ -46,8 +73,6 @@ typedef struct Hit
46
73
  float score;
47
74
  } Hit;
48
75
 
49
- extern bool hit_less_than(void *p1, void *p2);
50
-
51
76
  /***************************************************************************
52
77
  *
53
78
  * TopDocs
@@ -59,9 +84,10 @@ typedef struct TopDocs
59
84
  int total_hits;
60
85
  int size;
61
86
  Hit **hits;
87
+ float max_score;
62
88
  } TopDocs;
63
89
 
64
- extern TopDocs *td_create(int total_hits, int size, Hit **hits);
90
+ extern TopDocs *td_new(int total_hits, int size, Hit **hits, float max_score);
65
91
  extern void td_destroy(TopDocs *td);
66
92
  extern char *td_to_s(TopDocs *td);
67
93
 
@@ -73,22 +99,23 @@ extern char *td_to_s(TopDocs *td);
73
99
 
74
100
  typedef struct Filter
75
101
  {
76
- void *data;
77
- char *name;
78
- HshTable *cache;
79
- BitVector *(*get_bv)(struct Filter *self, IndexReader *ir);
80
- char *(*to_s)(struct Filter *self);
81
- uint (*hash)(struct Filter *self);
82
- int (*eq)(struct Filter *self, struct Filter *o);
83
- void (*destroy)(struct Filter *self);
102
+ char *name;
103
+ HashTable *cache;
104
+ BitVector *(*get_bv_i)(struct Filter *self, IndexReader *ir);
105
+ char *(*to_s)(struct Filter *self);
106
+ ulong (*hash)(struct Filter *self);
107
+ int (*eq)(struct Filter *self, struct Filter *o);
108
+ void (*destroy_i)(struct Filter *self);
109
+ int ref_cnt;
84
110
  } Filter;
85
111
 
86
- extern Filter *filt_create(char *name);
87
- extern char *filt_to_s_i(Filter *self);
88
- extern BitVector *filt_get_bv(Filter *self, IndexReader *ir);
89
- extern void filt_destroy(Filter *self);
90
- extern uint filt_hash(Filter *self);
91
- extern int filt_eq(Filter *self, Filter *o);
112
+ #define filt_new(type) filt_create(sizeof(type), #type)
113
+ extern Filter *filt_create(size_t size, const char *name);
114
+ extern BitVector *filt_get_bv(Filter *filt, IndexReader *ir);
115
+ extern void filt_destroy_i(Filter *filt);
116
+ extern void filt_deref(Filter *filt);
117
+ extern ulong filt_hash(Filter *filt);
118
+ extern int filt_eq(Filter *filt, Filter *o);
92
119
 
93
120
  /***************************************************************************
94
121
  *
@@ -96,8 +123,9 @@ extern int filt_eq(Filter *self, Filter *o);
96
123
  *
97
124
  ***************************************************************************/
98
125
 
99
- extern Filter *rfilt_create(const char *field, char *lower_term,
100
- char *upper_term, bool include_lower, bool include_upper);
126
+ extern Filter *rfilt_new(const char *field,
127
+ const char *lower_term, const char *upper_term,
128
+ bool include_lower, bool include_upper);
101
129
 
102
130
  /***************************************************************************
103
131
  *
@@ -105,12 +133,8 @@ extern Filter *rfilt_create(const char *field, char *lower_term,
105
133
  *
106
134
  ***************************************************************************/
107
135
 
108
- typedef struct QueryFilter
109
- {
110
- Query *query;
111
- } QueryFilter;
112
-
113
- extern Filter *qfilt_create(Query *query);
136
+ extern Filter *qfilt_new(Query *query);
137
+ extern Filter *qfilt_new_nr(Query *query);
114
138
 
115
139
  /***************************************************************************
116
140
  *
@@ -120,12 +144,11 @@ extern Filter *qfilt_create(Query *query);
120
144
 
121
145
  struct Weight
122
146
  {
123
- void *data;
124
- float value;
125
- float qweight;
126
- float qnorm;
127
- float idf;
128
- Query *query;
147
+ float value;
148
+ float qweight;
149
+ float qnorm;
150
+ float idf;
151
+ Query *query;
129
152
  Similarity *similarity;
130
153
  Query *(*get_query)(Weight *self);
131
154
  float (*get_value)(Weight *self);
@@ -137,78 +160,26 @@ struct Weight
137
160
  void (*destroy)(Weight *self);
138
161
  };
139
162
 
140
- extern Weight *w_create(Query *query);
163
+ #define w_new(type, query) w_create(sizeof(type), query)
164
+ extern Weight *w_create(size_t size, Query *query);
141
165
  extern void w_destroy(Weight *self);
142
-
143
166
  extern Query *w_get_query(Weight *self);
144
167
  extern float w_get_value(Weight *self);
145
168
  extern float w_sum_of_squared_weights(Weight *self);
146
169
  extern void w_normalize(Weight *self, float normalization_factor);
147
170
 
148
- /***************************************************************************
149
- *
150
- * TermWeight
151
- *
152
- ***************************************************************************/
153
-
154
- extern Weight *tw_create(Query *query, Searcher *searcher);
155
-
156
- /***************************************************************************
157
- *
158
- * BooleanWeight
159
- *
160
- ***************************************************************************/
161
-
162
- typedef struct BooleanWeight {
163
- Weight **weights;
164
- int w_cnt;
165
- } BooleanWeight;
166
-
167
- extern Weight *bw_create(Query *query, Searcher *searcher);
168
-
169
- /***************************************************************************
170
- *
171
- * PhraseWeight
172
- *
173
- ***************************************************************************/
174
-
175
- extern Weight *phw_create(Query *query, Searcher *searcher);
176
-
177
- /***************************************************************************
178
- *
179
- * ConstantScoreWeight
180
- *
181
- ***************************************************************************/
182
-
183
- extern Weight *csw_create(Query *query, Searcher *searcher);
184
-
185
- /***************************************************************************
186
- *
187
- * MatchAllWeight
188
- *
189
- ***************************************************************************/
190
-
191
- extern Weight *maw_create(Query *query, Searcher *searcher);
192
-
193
- /***************************************************************************
194
- *
195
- * SpanWeight
196
- *
197
- ***************************************************************************/
198
-
199
- extern Weight *spanw_create(Query *query, Searcher *searcher);
200
-
201
171
  /***************************************************************************
202
172
  *
203
173
  * Query
204
174
  *
205
175
  ***************************************************************************/
206
176
 
207
- enum QUERY_TYPE {
177
+ enum QUERY_TYPE
178
+ {
208
179
  TERM_QUERY,
180
+ MULTI_TERM_QUERY,
209
181
  BOOLEAN_QUERY,
210
182
  PHRASE_QUERY,
211
- MULTI_PHRASE_QUERY,
212
183
  CONSTANT_QUERY,
213
184
  FILTERED_QUERY,
214
185
  MATCH_ALL_QUERY,
@@ -225,57 +196,53 @@ enum QUERY_TYPE {
225
196
 
226
197
  struct Query
227
198
  {
228
- uchar type;
229
- int ref_cnt;
230
- void *data;
231
- float boost;
232
- Weight *weight;
233
- Query *(*rewrite)(Query *self, IndexReader *ir);
234
- void (*extract_terms)(Query *self, HashSet *terms);
235
- Similarity *(*get_similarity)(Query *self, Searcher *searcher);
236
- char *(*to_s)(Query *self, char *field);
237
- uint (*hash)(Query *self);
238
- int (*eq)(Query *self, Query *o);
239
- void (*destroy_i)(Query *self);
240
- Weight *(*create_weight_i)(Query *self, Searcher *searcher);
241
- bool destroy_all : 1;
199
+ int ref_cnt;
200
+ float boost;
201
+ Weight *weight;
202
+ Query *(*rewrite)(Query *self, IndexReader *ir);
203
+ void (*extract_terms)(Query *self, HashSet *terms);
204
+ Similarity *(*get_similarity)(Query *self, Searcher *searcher);
205
+ char *(*to_s)(Query *self, const char *field);
206
+ ulong (*hash)(Query *self);
207
+ int (*eq)(Query *self, Query *o);
208
+ void (*destroy_i)(Query *self);
209
+ Weight *(*create_weight_i)(Query *self, Searcher *searcher);
210
+ MatchVector *(*get_matchv_i)(Query *self, MatchVector *mv, TermVector *tv);
211
+ enum QUERY_TYPE type;
242
212
  };
243
213
 
244
214
  /* Internal Query Functions */
245
- extern Query *q_create();
246
215
  extern Similarity *q_get_similarity_i(Query *self, Searcher *searcher);
247
216
  extern void q_destroy_i(Query *self);
248
217
  extern Weight *q_create_weight_unsup(Query *self, Searcher *searcher);
249
218
 
250
-
251
219
  extern void q_deref(Query *self);
220
+ extern const char *q_get_query_name(enum QUERY_TYPE type);
252
221
  extern Weight *q_weight(Query *self, Searcher *searcher);
253
222
  extern Query *q_combine(Query **queries, int q_cnt);
254
- extern uint q_hash(Query *self);
223
+ extern ulong q_hash(Query *self);
255
224
  extern int q_eq(Query *self, Query *o);
225
+ extern Query *q_create(size_t size);
226
+ #define q_new(type) q_create(sizeof(type))
256
227
 
257
228
  /***************************************************************************
258
- *
259
229
  * TermQuery
260
- *
261
230
  ***************************************************************************/
262
231
 
263
232
  typedef struct TermQuery
264
233
  {
265
- Term *term;
234
+ Query super;
235
+ char *field;
236
+ char *term;
266
237
  } TermQuery;
267
238
 
268
- extern Query *tq_create(Term *term);
239
+ Query *tq_new(const char *field, const char *term);
269
240
 
270
241
  /***************************************************************************
271
- *
272
242
  * BooleanQuery
273
- *
274
243
  ***************************************************************************/
275
244
 
276
- /***************************************************************************
277
- * BooleanClause
278
- ***************************************************************************/
245
+ /* *** BooleanClause *** */
279
246
 
280
247
  enum BC_TYPE
281
248
  {
@@ -284,22 +251,20 @@ enum BC_TYPE
284
251
  BC_MUST_NOT
285
252
  };
286
253
 
287
- typedef struct BooleanClause {
254
+ typedef struct BooleanClause
255
+ {
288
256
  int ref_cnt;
289
257
  Query *query;
290
- Query *rewritten;
291
258
  unsigned int occur : 4;
292
259
  bool is_prohibited : 1;
293
260
  bool is_required : 1;
294
261
  } BooleanClause;
295
262
 
296
- extern BooleanClause *bc_create(Query *query, unsigned int occur);
263
+ extern BooleanClause *bc_new(Query *query, enum BC_TYPE occur);
297
264
  extern void bc_deref(BooleanClause *self);
298
- extern void bc_set_occur(BooleanClause *self, unsigned int occur);
265
+ extern void bc_set_occur(BooleanClause *self, enum BC_TYPE occur);
299
266
 
300
- /***************************************************************************
301
- * BooleanQuery
302
- ***************************************************************************/
267
+ /* *** BooleanQuery *** */
303
268
 
304
269
  #define DEFAULT_MAX_CLAUSE_COUNT 1024
305
270
  #define BOOLEAN_CLAUSES_START_CAPA 4
@@ -307,172 +272,184 @@ extern void bc_set_occur(BooleanClause *self, unsigned int occur);
307
272
 
308
273
  typedef struct BooleanQuery
309
274
  {
310
- bool coord_disabled;
311
- int max_clause_cnt;
312
- int clause_cnt;
313
- int clause_capa;
314
- float original_boost;
275
+ Query super;
276
+ bool coord_disabled;
277
+ int max_clause_cnt;
278
+ int clause_cnt;
279
+ int clause_capa;
280
+ float original_boost;
315
281
  BooleanClause **clauses;
316
- Similarity *similarity;
282
+ Similarity *similarity;
317
283
  } BooleanQuery;
318
284
 
319
- extern Query *bq_create(bool coord_disabled);
285
+ extern Query *bq_new(bool coord_disabled);
320
286
  extern BooleanClause *bq_add_query(Query *self, Query *sub_query,
321
- unsigned int occur);
287
+ enum BC_TYPE occur);
288
+ extern BooleanClause *bq_add_query_nr(Query *self, Query *sub_query,
289
+ enum BC_TYPE occur);
322
290
  extern BooleanClause *bq_add_clause(Query *self, BooleanClause *bc);
291
+ extern BooleanClause *bq_add_clause_nr(Query *self, BooleanClause *bc);
323
292
 
324
293
  /***************************************************************************
325
- *
326
294
  * PhraseQuery
327
- *
328
295
  ***************************************************************************/
329
296
 
330
297
  #define PHQ_INIT_CAPA 4
331
298
  typedef struct PhraseQuery
332
299
  {
333
- int slop;
334
- Term **terms;
335
- int *positions;
336
- int t_cnt;
337
- int t_capa;
338
- char *field;
300
+ Query super;
301
+ int slop;
302
+ char *field;
303
+ PhrasePosition *positions;
304
+ int pos_cnt;
305
+ int pos_capa;
339
306
  } PhraseQuery;
340
307
 
341
- extern Query *phq_create();
342
- extern void phq_add_term(Query *self, Term *term, int pos_inc);
308
+ extern Query *phq_new(const char *field);
309
+ extern void phq_add_term(Query *self, const char *term, int pos_inc);
310
+ extern void phq_add_term_abs(Query *self, const char *term, int position);
311
+ extern void phq_append_multi_term(Query *self, const char *term);
343
312
 
344
313
  /***************************************************************************
345
- *
346
- * MultiPhraseQuery
347
- *
314
+ * MultiTermQuery
348
315
  ***************************************************************************/
349
316
 
350
- typedef struct MultiPhraseQuery
317
+ #define MULTI_TERM_QUERY_MAX_TERMS 256
318
+ typedef struct MultiTermQuery
351
319
  {
352
- int slop;
353
- Term ***terms;
354
- int *positions;
355
- int *pt_cnt;
356
- int t_cnt;
357
- int t_capa;
358
- char *field;
359
- } MultiPhraseQuery;
320
+ Query super;
321
+ char *field;
322
+ PriorityQueue *boosted_terms;
323
+ float min_boost;
324
+ } MultiTermQuery;
360
325
 
361
- extern Query *mphq_create();
362
- extern void mphq_add_terms(Query *self, Term **ts, int t_cnt, int pos_inc);
326
+ extern void multi_tq_add_term(Query *self, const char *term);
327
+ extern void multi_tq_add_term_boost(Query *self, const char *term, float boost);
328
+ extern Query *multi_tq_new(const char *field);
329
+ extern Query *multi_tq_new_conf(const char *field, int max_terms,
330
+ float min_boost);
331
+
332
+ #define MTQMaxTerms(query) (((MTQSubQuery *)(query))->max_terms)
333
+ typedef struct MTQSubQuery
334
+ {
335
+ Query super;
336
+ int max_terms;
337
+ } MTQSubQuery;
363
338
 
364
339
  /***************************************************************************
365
- *
366
340
  * PrefixQuery
367
- *
368
341
  ***************************************************************************/
369
342
 
370
- extern Query *prefixq_create(Term *prefix);
343
+ #define PREFIX_QUERY_MAX_TERMS 256
344
+
345
+
346
+ typedef struct PrefixQuery
347
+ {
348
+ MTQSubQuery super;
349
+ char *field;
350
+ char *prefix;
351
+ } PrefixQuery;
352
+
353
+ extern Query *prefixq_new(const char *field, const char *prefix);
371
354
 
372
355
  /***************************************************************************
373
- *
374
356
  * WildCardQuery
375
- *
376
357
  ***************************************************************************/
377
358
 
378
359
  #define WILD_CHAR '?'
379
360
  #define WILD_STRING '*'
361
+ #define WILD_CARD_QUERY_MAX_TERMS 256
362
+
363
+ typedef struct WildCardQuery
364
+ {
365
+ MTQSubQuery super;
366
+ char *field;
367
+ char *pattern;
368
+ } WildCardQuery;
369
+
380
370
 
381
- extern Query *wcq_create(Term *term);
382
- extern bool wc_match(char *pattern, char *text);
371
+ extern Query *wcq_new(const char *field, const char *pattern);
372
+ extern bool wc_match(const char *pattern, const char *text);
383
373
 
384
374
  /***************************************************************************
385
- *
386
375
  * FuzzyQuery
387
- *
388
376
  ***************************************************************************/
389
377
 
390
- #define DEF_MIN_SIM 0.5
378
+ #define DEF_MIN_SIM 0.5f
391
379
  #define DEF_PRE_LEN 0
380
+ #define DEF_MAX_TERMS 256
392
381
  #define TYPICAL_LONGEST_WORD 20
393
382
 
394
383
  typedef struct FuzzyQuery
395
384
  {
396
- Term *term;
397
- char *text; /* term text after prefix */
398
- int text_len;
399
- int pre_len;
400
- float min_sim;
401
- float scale_factor;
402
- int max_distances[TYPICAL_LONGEST_WORD];
403
- int *da;
404
- int da_capa;
385
+ MTQSubQuery super;
386
+ char *field;
387
+ char *term;
388
+ const char *text; /* term text after prefix */
389
+ int text_len;
390
+ int pre_len;
391
+ float min_sim;
392
+ float scale_factor;
393
+ int max_distances[TYPICAL_LONGEST_WORD];
394
+ int *da;
405
395
  } FuzzyQuery;
406
396
 
407
- extern Query *fuzq_create(Term *term);
408
- extern Query *fuzq_create_mp(Term *term, float min_sim, int pre_len);
397
+ extern Query *fuzq_new(const char *term, const char *field);
398
+ extern Query *fuzq_new_conf(const char *field, const char *term,
399
+ float min_sim, int pre_len, int max_terms);
409
400
 
410
401
  /***************************************************************************
411
- *
412
402
  * ConstantScoreQuery
413
- *
414
403
  ***************************************************************************/
415
404
 
416
- extern Query *csq_create(Filter *filter);
405
+ typedef struct ConstantScoreQuery
406
+ {
407
+ Query super;
408
+ Filter *filter;
409
+ } ConstantScoreQuery;
410
+
411
+ extern Query *csq_new(Filter *filter);
412
+ extern Query *csq_new_nr(Filter *filter);
417
413
 
418
414
  /***************************************************************************
419
- *
420
- * FilteredQueryQuery
421
- *
415
+ * FilteredQuery
422
416
  ***************************************************************************/
423
417
 
424
418
  typedef struct FilteredQuery
425
419
  {
426
- Query *query;
420
+ Query super;
421
+ Query *query;
427
422
  Filter *filter;
428
423
  } FilteredQuery;
429
424
 
430
- extern Query *fq_create(Query *query, Filter *filter);
425
+ extern Query *fq_new(Query *query, Filter *filter);
431
426
 
432
427
  /***************************************************************************
433
- *
434
428
  * MatchAllQuery
435
- *
436
429
  ***************************************************************************/
437
430
 
438
- extern Query *maq_create();
431
+ extern Query *maq_new();
439
432
 
440
433
  /***************************************************************************
441
- *
442
434
  * RangeQuery
443
- *
444
435
  ***************************************************************************/
445
436
 
446
- typedef struct Range
447
- {
448
- char *field;
449
- char *lower_term;
450
- char *upper_term;
451
- bool include_lower : 1;
452
- bool include_upper : 1;
453
- } Range;
454
-
455
- extern Query *rq_create(const char *field, char *lower_term,
456
- char *upper_term, bool include_lower, bool include_upper);
457
- extern Query *rq_create_less(const char *field, char *upper_term,
458
- bool include_upper);
459
- extern Query *rq_create_more(const char *field, char *lower_term,
460
- bool include_lower);
437
+ extern Query *rq_new(const char *field, const char *lower_term,
438
+ const char *upper_term, bool include_lower,
439
+ bool include_upper);
440
+ extern Query *rq_new_less(const char *field, const char *upper_term,
441
+ bool include_upper);
442
+ extern Query *rq_new_more(const char *field, const char *lower_term,
443
+ bool include_lower);
461
444
 
462
445
  /***************************************************************************
463
- *
464
446
  * SpanQuery
465
- *
466
- ***************************************************************************/
467
-
468
- /***************************************************************************
469
- * SpanEnum
470
447
  ***************************************************************************/
471
448
 
449
+ /* ** SpanEnum ** */
472
450
  typedef struct SpanEnum SpanEnum;
473
451
  struct SpanEnum
474
452
  {
475
- void *data;
476
453
  Query *query;
477
454
  bool (*next)(SpanEnum *self);
478
455
  bool (*skip_to)(SpanEnum *self, int target_doc);
@@ -483,107 +460,26 @@ struct SpanEnum
483
460
  void (*destroy)(SpanEnum *self);
484
461
  };
485
462
 
486
- /***************************************************************************
487
- * SpanTermEnum
488
- ***************************************************************************/
489
-
490
- typedef struct SpanTermEnum SpanTermEnum;
491
- struct SpanTermEnum
492
- {
493
- TermDocEnum *positions;
494
- int position;
495
- int doc;
496
- int count;
497
- int freq;
498
- };
499
-
500
- extern SpanEnum *spante_create(Query *query, IndexReader *ir);
501
-
502
- /***************************************************************************
503
- * SpanFirstEnum
504
- ***************************************************************************/
505
-
506
- extern SpanEnum *spanfe_create(Query *query, IndexReader *ir);
507
-
508
- /***************************************************************************
509
- * SpanOrEnum
510
- ***************************************************************************/
511
-
512
- typedef struct SpanOrEnum
513
- {
514
- PriorityQueue *queue;
515
- SpanEnum **span_enums;
516
- int s_cnt;
517
- bool first_time;
518
- } SpanOrEnum;
519
-
520
- extern SpanEnum *spanoe_create(Query *query, IndexReader *ir);
521
-
522
- /***************************************************************************
523
- * SpanEnumCell
524
- ***************************************************************************/
525
-
526
- typedef struct SpanEnumCell
527
- {
528
- SpanEnum *parent;
529
- SpanEnum *se;
530
- int index;
531
- int length;
532
- } SpanEnumCell;
533
-
534
- extern SpanEnum *spanec_create(Query *parent, Query *child, int index);
535
-
536
- /***************************************************************************
537
- * SpanNearEnum
538
- ***************************************************************************/
539
-
540
- typedef struct SpanNearEnum
541
- {
542
- SpanEnum **span_enums;
543
- int s_cnt;
544
- int slop;
545
- int current;
546
- bool first_time : 1;
547
- bool in_order : 1;
548
- int doc;
549
- int start;
550
- int end;
551
- } SpanNearEnum;
552
-
553
- extern SpanEnum *spanne_create(Query *query, IndexReader *ir);
554
-
555
- /***************************************************************************
556
- * SpanNotEnum
557
- ***************************************************************************/
558
-
559
- typedef struct SpanNotEnum
463
+ /* ** SpanQuery ** */
464
+ typedef struct SpanQuery
560
465
  {
561
- SpanEnum *inc;
562
- SpanEnum *exc;
563
- bool more_inc : 1;
564
- bool more_exc : 1;
565
- } SpanNotEnum;
566
-
567
- extern SpanEnum *spanxe_create(Query *query, IndexReader *ir);
466
+ Query super;
467
+ char *field;
468
+ SpanEnum *(*get_spans)(Query *self, IndexReader *ir);
469
+ HashSet *(*get_terms)(Query *self);
470
+ } SpanQuery;
568
471
 
569
472
  /***************************************************************************
570
- * SpanQuery
473
+ * SpanTermQuery
571
474
  ***************************************************************************/
572
475
 
573
- typedef struct SpanQuery SpanQuery;
574
- struct SpanQuery
476
+ typedef struct SpanTermQuery
575
477
  {
576
- void *data;
577
- char *field;
578
- SpanEnum *(*get_spans)(Query *self, IndexReader *ir);
579
- HashSet *(*get_terms)(Query *self);
580
- };
478
+ SpanQuery super;
479
+ char *term;
480
+ } SpanTermQuery;
481
+ extern Query *spantq_new(const char *field, const char *term);
581
482
 
582
- /***************************************************************************
583
- * SpanTermQuery
584
- ***************************************************************************/
585
-
586
- extern Query *spantq_create(Term *term);
587
483
 
588
484
  /***************************************************************************
589
485
  * SpanFirstQuery
@@ -591,11 +487,13 @@ extern Query *spantq_create(Term *term);
591
487
 
592
488
  typedef struct SpanFirstQuery
593
489
  {
594
- int end;
595
- Query *match;
490
+ SpanQuery super;
491
+ int end;
492
+ Query *match;
596
493
  } SpanFirstQuery;
597
494
 
598
- extern Query *spanfq_create(Query *match, int end);
495
+ extern Query *spanfq_new(Query *match, int end);
496
+ extern Query *spanfq_new_nr(Query *match, int end);
599
497
 
600
498
  /***************************************************************************
601
499
  * SpanOrQuery
@@ -603,11 +501,15 @@ extern Query *spanfq_create(Query *match, int end);
603
501
 
604
502
  typedef struct SpanOrQuery
605
503
  {
606
- Query **clauses;
607
- int c_cnt;
504
+ SpanQuery super;
505
+ Query **clauses;
506
+ int c_cnt;
507
+ int c_capa;
608
508
  } SpanOrQuery;
609
509
 
610
- extern Query *spanoq_create(Query **clauses, int c_cnt);
510
+ extern Query *spanoq_new();
511
+ extern Query *spanoq_add_clause(Query *self, Query *clause);
512
+ extern Query *spanoq_add_clause_nr(Query *self, Query *clause);
611
513
 
612
514
  /***************************************************************************
613
515
  * SpanNearQuery
@@ -615,15 +517,17 @@ extern Query *spanoq_create(Query **clauses, int c_cnt);
615
517
 
616
518
  typedef struct SpanNearQuery
617
519
  {
618
- Query **clauses;
619
- int c_cnt;
620
- int slop;
621
- bool in_order;
520
+ SpanQuery super;
521
+ Query **clauses;
522
+ int c_cnt;
523
+ int c_capa;
524
+ int slop;
525
+ bool in_order : 1;
622
526
  } SpanNearQuery;
623
527
 
624
- extern Query *spannq_create(Query **clauses, int c_cnt, int slop,
625
- bool in_order);
626
-
528
+ extern Query *spannq_new(int slop, bool in_order);
529
+ extern Query *spannq_add_clause(Query *self, Query *clause);
530
+ extern Query *spannq_add_clause_nr(Query *self, Query *clause);
627
531
 
628
532
  /***************************************************************************
629
533
  * SpanNotQuery
@@ -631,11 +535,15 @@ extern Query *spannq_create(Query **clauses, int c_cnt, int slop,
631
535
 
632
536
  typedef struct SpanNotQuery
633
537
  {
634
- Query *inc;
635
- Query *exc;
538
+ SpanQuery super;
539
+ Query *inc;
540
+ Query *exc;
636
541
  } SpanNotQuery;
637
542
 
638
- extern Query *spanxq_create(Query *inc, Query *exc);
543
+ extern Query *spanxq_new(Query *inc, Query *exc);
544
+ extern Query *spanxq_new_nr(Query *inc, Query *exc);
545
+
546
+
639
547
 
640
548
  /***************************************************************************
641
549
  *
@@ -643,13 +551,15 @@ extern Query *spanxq_create(Query *inc, Query *exc);
643
551
  *
644
552
  ***************************************************************************/
645
553
 
646
- #define SCORER_NULLIFY(mscorer) mscorer->destroy(mscorer); mscorer = NULL
554
+ #define SCORER_NULLIFY(mscorer) do {\
555
+ (mscorer)->destroy(mscorer);\
556
+ (mscorer) = NULL;\
557
+ } while (0)
647
558
 
648
559
  struct Scorer
649
560
  {
650
- void *data;
651
561
  Similarity *similarity;
652
- int doc;
562
+ int doc;
653
563
  float (*score)(Scorer *self);
654
564
  bool (*next)(Scorer *self);
655
565
  bool (*skip_to)(Scorer *self, int doc_num);
@@ -657,300 +567,80 @@ struct Scorer
657
567
  void (*destroy)(Scorer *self);
658
568
  };
659
569
 
570
+ #define scorer_new(type, similarity) scorer_create(sizeof(type), similarity)
660
571
  /* Internal Scorer Function */
661
572
  extern void scorer_destroy_i(Scorer *self);
662
-
663
- extern Scorer *scorer_create(Similarity *similarity);
573
+ extern Scorer *scorer_create(size_t size, Similarity *similarity);
664
574
  extern bool scorer_less_than(void *p1, void *p2);
665
- extern bool scorer_doc_less_than(void *p1, void *p2);
575
+ extern bool scorer_doc_less_than(const Scorer *s1, const Scorer *s2);
666
576
  extern int scorer_doc_cmp(const void *p1, const void *p2);
667
577
 
668
- /***************************************************************************
669
- *
670
- * TermScorer
671
- *
672
- ***************************************************************************/
673
-
674
- #define SCORE_CACHE_SIZE 32
675
- #define TDE_READ_SIZE 32
676
-
677
- typedef struct TermScorer
678
- {
679
- int docs[TDE_READ_SIZE];
680
- int freqs[TDE_READ_SIZE];
681
- int pointer;
682
- int pointer_max;
683
- float score_cache[SCORE_CACHE_SIZE];
684
- Weight *weight;
685
- TermDocEnum *tde;
686
- uchar *norms;
687
- float weight_value;
688
- } TermScorer;
689
-
690
- extern Scorer *tsc_create(Weight *weight, TermDocEnum *tde, uchar *norms);
691
-
692
- /***************************************************************************
693
- *
694
- * BooleanScorer
695
- *
696
- ***************************************************************************/
697
-
698
- /***************************************************************************
699
- * Coordinator
700
- ***************************************************************************/
701
-
702
- typedef struct Coordinator
703
- {
704
- int max_coord;
705
- float *coord_factors;
706
- Similarity *similarity;
707
- int num_matches;
708
- } Coordinator;
709
-
710
- /***************************************************************************
711
- * DisjunctionSumScorer
712
- ***************************************************************************/
713
-
714
- typedef struct DisjunctionSumScorer
715
- {
716
- float cum_score;
717
- int num_matches;
718
- int min_num_matches;
719
- Scorer **sub_scorers;
720
- int ss_cnt;
721
- PriorityQueue *scorer_queue;
722
- Coordinator *coordinator;
723
- } DisjunctionSumScorer;
724
-
725
- /***************************************************************************
726
- * ConjunctionScorer
727
- ***************************************************************************/
728
-
729
- typedef struct ConjunctionScorer
730
- {
731
- bool first_time : 1;
732
- bool more : 1;
733
- float coord;
734
- int ss_cnt;
735
- int ss_capa;
736
- Scorer **sub_scorers;
737
- int first;
738
- int last;
739
- Coordinator *coordinator;
740
- int last_scored_doc;
741
- } ConjunctionScorer;
742
-
743
- /***************************************************************************
744
- * SingleMatchScorer
745
- ***************************************************************************/
746
-
747
- typedef struct SingleMatchScorer
748
- {
749
- Coordinator *coordinator;
750
- Scorer *scorer;
751
- } SingleMatchScorer;
752
-
753
- /***************************************************************************
754
- * ReqOptSumScorer
755
- ***************************************************************************/
756
-
757
- typedef struct ReqOptSumScorer
758
- {
759
- Scorer *req_scorer;
760
- Scorer *opt_scorer;
761
- bool first_time_opt;
762
- } ReqOptSumScorer;
763
-
764
- /***************************************************************************
765
- * ReqExclScorer
766
- ***************************************************************************/
767
-
768
- typedef struct ReqExclScorer
769
- {
770
- Scorer *req_scorer;
771
- Scorer *excl_scorer;
772
- bool first_time;
773
- } ReqExclScorer;
774
-
775
- /***************************************************************************
776
- * BooleanScorer
777
- ***************************************************************************/
778
-
779
- typedef struct BooleanScorer
780
- {
781
- Scorer **required_scorers;
782
- int rs_cnt;
783
- int rs_capa;
784
- Scorer **optional_scorers;
785
- int os_cnt;
786
- int os_capa;
787
- Scorer **prohibited_scorers;
788
- int ps_cnt;
789
- int ps_capa;
790
- Scorer *counting_sum_scorer;
791
- Coordinator *coordinator;
792
- } BooleanScorer;
793
-
794
- extern Scorer *bsc_create(Similarity *similarity);
795
- extern void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur);
796
-
797
- /***************************************************************************
798
- *
799
- * PhraseScorer
800
- *
801
- ***************************************************************************/
802
-
803
- /***************************************************************************
804
- * PhrasePosition
805
- ***************************************************************************/
806
- typedef struct PhrasePosition
807
- {
808
- TermDocEnum *tpe;
809
- int offset;
810
- int count;
811
- int doc;
812
- int position;
813
- } PhrasePosition;
814
-
815
- extern PhrasePosition *pp_create(TermDocEnum *tpe, int offset);
816
-
817
- /***************************************************************************
818
- * PhraseScorer
819
- ***************************************************************************/
820
-
821
- typedef struct PhraseScorer
822
- {
823
- float freq;
824
- uchar *norms;
825
- float value;
826
- Weight *weight;
827
- bool first_time : 1;
828
- bool more : 1;
829
- int pp_first;
830
- int pp_last;
831
- int pp_cnt;
832
- PhrasePosition **phrase_pos;
833
- float (*phrase_freq)(Scorer *self);
834
- int slop;
835
- } PhraseScorer;
836
-
837
- extern Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
838
- int *positions, int t_cnt, Similarity *similarity, uchar *norms);
839
-
840
- /***************************************************************************
841
- * ExactPhraseScorer
842
- ***************************************************************************/
843
-
844
- extern Scorer *exact_phrase_scorer_create(Weight *weight,
845
- TermDocEnum **term_pos_enum, int *positions, int t_cnt,
846
- Similarity *similarity, uchar *norms);
847
-
848
- /***************************************************************************
849
- * SloppyPhraseScorer
850
- ***************************************************************************/
851
-
852
- extern Scorer *sloppy_phrase_scorer_create(Weight *weight,
853
- TermDocEnum **term_pos_enum, int *positions, int t_cnt,
854
- Similarity *similarity, int slop, uchar *norms);
855
-
856
- /***************************************************************************
857
- *
858
- * ConstantScoreScorer
859
- *
860
- ***************************************************************************/
861
-
862
- typedef struct ConstantScoreScorer
863
- {
864
- BitVector *bv;
865
- float score;
866
- } ConstantScoreScorer;
867
-
868
- extern Scorer *cssc_create(Weight *weight, IndexReader *ir);
869
-
870
-
871
- /***************************************************************************
872
- *
873
- * MatchAllScorer
874
- *
875
- ***************************************************************************/
876
-
877
- typedef struct MatchAllScorer
878
- {
879
- IndexReader *ir;
880
- int max_doc;
881
- float score;
882
- } MatchAllScorer;
883
-
884
- extern Scorer *masc_create(Weight *weight, IndexReader *ir);
885
-
886
-
887
- /***************************************************************************
888
- *
889
- * SpanScorer
890
- *
891
- ***************************************************************************/
892
-
893
- typedef struct SpanScorer
894
- {
895
- bool first_time : 1;
896
- bool more : 1;
897
- IndexReader *ir;
898
- SpanEnum *spans;
899
- Similarity *sim;
900
- uchar *norms;
901
- Weight *weight;
902
- float value;
903
- float freq;
904
- } SpanScorer;
905
-
906
- extern Scorer *spansc_create(Weight *weight, IndexReader *ir);
907
-
908
578
  /***************************************************************************
909
579
  *
910
580
  * Sort
911
581
  *
912
582
  ***************************************************************************/
913
583
 
914
- enum SORT_TYPE {
584
+ enum SORT_TYPE
585
+ {
915
586
  SORT_TYPE_SCORE,
916
587
  SORT_TYPE_DOC,
588
+ SORT_TYPE_BYTE,
917
589
  SORT_TYPE_INTEGER,
918
590
  SORT_TYPE_FLOAT,
919
591
  SORT_TYPE_STRING,
920
592
  SORT_TYPE_AUTO
921
593
  };
922
594
 
595
+ /***************************************************************************
596
+ * Comparable
597
+ ***************************************************************************/
598
+
599
+ typedef struct Comparable
600
+ {
601
+ int type;
602
+ union {
603
+ int i;
604
+ float f;
605
+ char *s;
606
+ void *p;
607
+ } val;
608
+ bool reverse : 1;
609
+ } Comparable;
610
+
923
611
  /***************************************************************************
924
612
  * SortField
925
613
  ***************************************************************************/
926
614
 
927
615
  typedef struct SortField
928
616
  {
929
- mutex_t mutex;
930
- char *field;
931
- int type;
932
- bool reverse : 1;
933
- void *index;
934
- int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
935
- void *(*create_index)(int size);
936
- void (*destroy_index)(void *p);
937
- void (*handle_term)(void *index, TermDocEnum *tde, char *text);
617
+ mutex_t mutex;
618
+ char *field;
619
+ enum SORT_TYPE type;
620
+ bool reverse : 1;
621
+ void *index;
622
+ int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
623
+ void (*get_val)(void *index_ptr, Hit *hit1, Comparable *comparable);
624
+ void *(*create_index)(int size);
625
+ void (*destroy_index)(void *p);
626
+ void (*handle_term)(void *index, TermDocEnum *tde, char *text);
938
627
  } SortField;
939
628
 
940
- extern SortField *sort_field_create(char *field, int type, bool reverse);
941
- extern SortField *sort_field_score_create(bool reverse);
942
- extern SortField *sort_field_doc_create(bool reverse);
943
- extern SortField *sort_field_int_create(char *field, bool reverse);
944
- extern SortField *sort_field_float_create(char *field, bool reverse);
945
- extern SortField *sort_field_string_create(char *field, bool reverse);
946
- extern SortField *sort_field_auto_create(char *field, bool reverse);
629
+ extern SortField *sort_field_new(char *field, enum SORT_TYPE type, bool reverse);
630
+ extern SortField *sort_field_score_new(bool reverse);
631
+ extern SortField *sort_field_doc_new(bool reverse);
632
+ extern SortField *sort_field_int_new(char *field, bool reverse);
633
+ extern SortField *sort_field_byte_new(char *field, bool reverse);
634
+ extern SortField *sort_field_float_new(char *field, bool reverse);
635
+ extern SortField *sort_field_string_new(char *field, bool reverse);
636
+ extern SortField *sort_field_auto_new(char *field, bool reverse);
947
637
  extern void sort_field_destroy(void *p);
948
638
  extern char *sort_field_to_s(SortField *self);
949
639
 
950
- extern SortField SORT_FIELD_SCORE;
951
- extern SortField SORT_FIELD_SCORE_REV;
952
- extern SortField SORT_FIELD_DOC;
953
- extern SortField SORT_FIELD_DOC_REV;
640
+ extern const SortField SORT_FIELD_SCORE;
641
+ extern const SortField SORT_FIELD_SCORE_REV;
642
+ extern const SortField SORT_FIELD_DOC;
643
+ extern const SortField SORT_FIELD_DOC_REV;
954
644
 
955
645
  /***************************************************************************
956
646
  * Sort
@@ -959,12 +649,13 @@ extern SortField SORT_FIELD_DOC_REV;
959
649
  typedef struct Sort
960
650
  {
961
651
  SortField **sort_fields;
962
- int sf_cnt;
963
- int sf_capa;
652
+ int size;
653
+ int capa;
654
+ int start;
964
655
  bool destroy_all : 1;
965
656
  } Sort;
966
657
 
967
- extern Sort *sort_create();
658
+ extern Sort *sort_new();
968
659
  extern void sort_destroy(void *p);
969
660
  extern void sort_add_sort_field(Sort *self, SortField *sf);
970
661
  extern void sort_clear(Sort *self);
@@ -978,7 +669,27 @@ extern Hit *fshq_pq_pop(PriorityQueue *pq);
978
669
  extern void fshq_pq_down(PriorityQueue *pq);
979
670
  extern void fshq_pq_insert(PriorityQueue *pq, Hit *hit);
980
671
  extern void fshq_pq_destroy(PriorityQueue *pq);
981
- extern PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
672
+ extern PriorityQueue *fshq_pq_new(int size, Sort *sort, IndexReader *ir);
673
+ extern Hit *fshq_pq_pop_fd(PriorityQueue *pq);
674
+
675
+ /***************************************************************************
676
+ * FieldDoc
677
+ ***************************************************************************/
678
+
679
+ typedef struct FieldDoc
680
+ {
681
+ Hit hit;
682
+ int size;
683
+ Comparable comparables[];
684
+ } FieldDoc;
685
+
686
+ extern void fd_destroy(FieldDoc *fd);
687
+
688
+ /***************************************************************************
689
+ * FieldDocSortedHitQueue
690
+ ***************************************************************************/
691
+
692
+ extern bool fdshq_lt(FieldDoc *fd1, FieldDoc *fd2);
982
693
 
983
694
  /***************************************************************************
984
695
  *
@@ -986,47 +697,91 @@ extern PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
986
697
  *
987
698
  ***************************************************************************/
988
699
 
989
- struct Searcher {
990
- void *data;
991
- IndexReader *ir;
700
+ typedef bool (*filter_ft)(int doc_num, float score, Searcher *self);
701
+
702
+ struct Searcher
703
+ {
992
704
  Similarity *similarity;
993
- bool close_ir : 1;
994
- int (*doc_freq)(Searcher *self, Term *term);
995
- int *(*doc_freqs)(Searcher *self, Term **terms, int tcnt);
705
+ int (*doc_freq)(Searcher *self, const char *field,
706
+ const char *term);
996
707
  Document *(*get_doc)(Searcher *self, int doc_num);
708
+ LazyDoc *(*get_lazy_doc)(Searcher *self, int doc_num);
997
709
  int (*max_doc)(Searcher *self);
998
710
  Weight *(*create_weight)(Searcher *self, Query *query);
999
711
  TopDocs *(*search)(Searcher *self, Query *query, int first_doc,
1000
- int num_docs, Filter *filter, Sort *sort);
712
+ int num_docs, Filter *filter, Sort *sort,
713
+ filter_ft filter_func,
714
+ bool load_fields);
715
+ TopDocs *(*search_w)(Searcher *self, Weight *weight, int first_doc,
716
+ int num_docs, Filter *filter, Sort *sort,
717
+ filter_ft filter_func,
718
+ bool load_fields);
1001
719
  void (*search_each)(Searcher *self, Query *query, Filter *filter,
1002
- void (*fn)(Searcher *, int, float, void *), void *arg);
720
+ filter_ft filter_func,
721
+ void (*fn)(Searcher *, int, float, void *),
722
+ void *arg);
1003
723
  void (*search_each_w)(Searcher *self, Weight *weight,
1004
- Filter *filter, void (*fn)(Searcher *, int, float, void *),
724
+ Filter *filter,
725
+ filter_ft filter_func,
726
+ void (*fn)(Searcher *, int, float, void *),
1005
727
  void *arg);
1006
728
  Query *(*rewrite)(Searcher *self, Query *original);
1007
729
  Explanation *(*explain)(Searcher *self, Query *query, int doc_num);
1008
730
  Explanation *(*explain_w)(Searcher *self, Weight *weight, int doc_num);
731
+ TermVector *(*get_term_vector)(Searcher *self, const int doc_num,
732
+ const char *field);
1009
733
  Similarity *(*get_similarity)(Searcher *self);
1010
734
  void (*close)(Searcher *self);
735
+ void *arg; /* used to pass values to Searcher functions */
1011
736
  };
1012
737
 
1013
- #define sea_doc_freq(s, t) s->doc_freq(s, t)
1014
- #define sea_doc_freqs(s, t, c) s->doc_freqs(s, t, c)
1015
- #define sea_get_doc(s, dn) s->get_doc(s, dn)
1016
- #define sea_max_doc(s) s->max_doc(s)
1017
- #define sea_search(s, q, fd, nd, filt, sort)\
1018
- s->search(s, q, fd, nd, filt, sort)
1019
- #define sea_search_each(s, q, filt, fn, arg)\
1020
- s->search_each(s, q, filt, fn, arg)
1021
- #define sea_search_each_w(s, q, filt, fn, arg)\
1022
- s->search_each_w(s, q, filt, fn, arg)
1023
- #define sea_rewrite(s, q) s->rewrite(s, q)
1024
- #define sea_explain(s, q, dn) s->explain(s, q, dn)
1025
- #define sea_explain_w(s, q, dn) s->explain_w(s, q, dn)
1026
- #define sea_get_similarity(s) s->get_similarity(s)
1027
- #define sea_close(s) s->close(s)
1028
-
1029
- extern Searcher *sea_create(IndexReader *ir);
738
+ #define searcher_doc_freq(s, t) s->doc_freq(s, t)
739
+ #define searcher_get_doc(s, dn) s->get_doc(s, dn)
740
+ #define searcher_get_lazy_doc(s, dn) s->get_lazy_doc(s, dn)
741
+ #define searcher_max_doc(s) s->max_doc(s)
742
+ #define searcher_rewrite(s, q) s->rewrite(s, q)
743
+ #define searcher_explain(s, q, dn) s->explain(s, q, dn)
744
+ #define searcher_explain_w(s, q, dn) s->explain_w(s, q, dn)
745
+ #define searcher_get_similarity(s) s->get_similarity(s)
746
+ #define searcher_close(s) s->close(s)
747
+ #define searcher_search(s, q, fd, nd, filt, sort, ff)\
748
+ s->search(s, q, fd, nd, filt, sort, ff, false)
749
+ #define searcher_search_fd(s, q, fd, nd, filt, sort, ff)\
750
+ s->search(s, q, fd, nd, filt, sort, ff, true)
751
+ #define searcher_search_each(s, q, filt, ff, fn, arg)\
752
+ s->search_each(s, q, filt, ff, fn, arg)
753
+ #define searcher_search_each_w(s, q, filt, ff, fn, arg)\
754
+ s->search_each_w(s, q, filt, ff, fn, arg)
755
+
756
+
757
+ extern MatchVector *searcher_get_match_vector(Searcher *self,
758
+ Query *query,
759
+ const int doc_num,
760
+ const char *field);
761
+ extern char **searcher_highlight(Searcher *self,
762
+ Query *query,
763
+ const int doc_num,
764
+ const char *field,
765
+ const int excerpt_len,
766
+ const int num_excerpts,
767
+ const char *pre_tag,
768
+ const char *post_tag,
769
+ const char *ellipsis);
770
+
771
+ /***************************************************************************
772
+ *
773
+ * IndexSearcher
774
+ *
775
+ ***************************************************************************/
776
+
777
+ typedef struct IndexSearcher {
778
+ Searcher super;
779
+ IndexReader *ir;
780
+ bool close_ir : 1;
781
+ } IndexSearcher;
782
+
783
+ extern Searcher *isea_new(IndexReader *ir);
784
+ extern int isea_doc_freq(Searcher *self, const char *field, const char *term);
1030
785
 
1031
786
  /***************************************************************************
1032
787
  *
@@ -1036,15 +791,15 @@ extern Searcher *sea_create(IndexReader *ir);
1036
791
 
1037
792
  typedef struct MultiSearcher
1038
793
  {
1039
- int s_cnt;
1040
- Searcher **searchers;
1041
- int *starts;
1042
- int max_doc;
1043
- bool close_subs : 1;
794
+ Searcher super;
795
+ int s_cnt;
796
+ Searcher **searchers;
797
+ int *starts;
798
+ int max_doc;
799
+ bool close_subs : 1;
1044
800
  } MultiSearcher;
1045
801
 
1046
- extern Searcher *msea_create(Searcher **searchers, int s_cnt,
1047
- bool close_subs);
802
+ extern Searcher *msea_new(Searcher **searchers, int s_cnt, bool close_subs);
1048
803
 
1049
804
  /***************************************************************************
1050
805
  *
@@ -1052,93 +807,39 @@ extern Searcher *msea_create(Searcher **searchers, int s_cnt,
1052
807
  *
1053
808
  ***************************************************************************/
1054
809
 
1055
- #define CONC_WORDS 2
810
+ #define QP_CONC_WORDS 2
811
+ #define QP_MAX_CLAUSES 512
1056
812
 
1057
813
  typedef struct QParser
1058
814
  {
1059
815
  mutex_t mutex;
1060
- bool or_default : 1;
1061
- bool wild_lower : 1;
1062
- bool clean_str : 1;
1063
- bool handle_parse_errors : 1;
1064
- bool allow_any_fields : 1;
1065
- bool close_def_fields : 1;
1066
816
  int def_slop;
817
+ int max_clauses;
818
+ int phq_pos_inc;
1067
819
  char *qstr;
1068
820
  char *qstrp;
1069
- char buf[CONC_WORDS][MAX_WORD_SIZE];
821
+ char buf[QP_CONC_WORDS][MAX_WORD_SIZE];
1070
822
  int buf_index;
823
+ HashTable *field_cache;
1071
824
  HashSet *fields;
1072
825
  HashSet *fields_buf;
1073
826
  HashSet *def_fields;
1074
827
  HashSet *all_fields;
1075
828
  Analyzer *analyzer;
829
+ HashTable *ts_cache;
1076
830
  Query *result;
831
+ bool or_default : 1;
832
+ bool wild_lower : 1;
833
+ bool clean_str : 1;
834
+ bool handle_parse_errors : 1;
835
+ bool allow_any_fields : 1;
836
+ bool close_def_fields : 1;
1077
837
  } QParser;
1078
838
 
1079
- extern QParser *qp_create(HashSet *all_fields, HashSet *def_fields,
1080
- Analyzer *analyzer);
839
+ extern QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
840
+ Analyzer *analyzer);
1081
841
  extern void qp_destroy(QParser *self);
1082
842
  extern Query *qp_parse(QParser *self, char *qstr);
1083
843
  extern char *qp_clean_str(char *str);
1084
844
 
1085
- /***************************************************************************
1086
- *
1087
- * Index
1088
- *
1089
- ***************************************************************************/
1090
-
1091
- typedef struct Index
1092
- {
1093
- mutex_t mutex;
1094
- Store *store;
1095
- Analyzer *analyzer;
1096
- IndexReader *ir;
1097
- IndexWriter *iw;
1098
- Searcher *sea;
1099
- QParser *qp;
1100
- HashSet *key;
1101
- char *id_field;
1102
- char *def_field;
1103
- /* for IndexWriter */
1104
- bool use_compound_file : 1;
1105
- bool auto_flush : 1;
1106
- bool has_writes : 1;
1107
- bool check_latest : 1;
1108
- } Index;
1109
-
1110
- extern Index *index_create(Store *store, Analyzer *analyzer,
1111
- HashSet *def_fields, bool create);
1112
- extern void index_destroy(Index *self);
1113
- extern void index_flush(Index *self);
1114
- extern int index_size(Index *self);
1115
- extern void index_optimize(Index *self);
1116
- extern bool index_has_del(Index *self);
1117
- extern bool index_is_deleted(Index *self, int doc_num);
1118
- extern void index_add_doc(Index *self, Document *doc);
1119
- extern void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
1120
- extern void index_add_string(Index *self, char *str, Analyzer *analyzer);
1121
- extern void index_add_array(Index *self, Array *ary, Analyzer *analyzer);
1122
- extern TopDocs *index_search_str(Index *self, char *query, int first_doc,
1123
- int num_docs, Filter *filter, Sort *sort);
1124
- extern Query *index_get_query(Index *self, char *qstr);
1125
- extern Document *index_get_doc(Index *self, int doc_num);
1126
- extern Document *index_get_doc_ts(Index *self, int doc_num);
1127
- extern Document *index_get_doc_id(Index *self, char *id);
1128
- extern Document *index_get_doc_term(Index *self, Term *term);
1129
- extern void index_delete(Index *self, int doc_num);
1130
- extern void index_delete_term(Index *self, Term *term);
1131
- extern void index_delete_id(Index *self, char *id);
1132
- extern void index_delete_query(Index *self, Query *q, Filter *f);
1133
- extern void index_delete_query_str(Index *self, char *qstr, Filter *f);
1134
- extern int index_term_id(Index *self, Term *term);
1135
- extern Explanation *index_explain(Index *self, Query *q, int doc_num);
1136
- extern void index_auto_flush_ir(Index *self);
1137
- extern void index_auto_flush_iw(Index *self);
1138
-
1139
- extern inline void ensure_searcher_open(Index *self);
1140
- extern inline void ensure_reader_open(Index *self);
1141
- extern inline void ensure_writer_open(Index *self);
1142
-
1143
845
  #endif
1144
-