ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/q_match_all.c CHANGED
@@ -3,132 +3,146 @@
3
3
 
4
4
  /***************************************************************************
5
5
  *
6
- * Weight
6
+ * MatchAllScorer
7
7
  *
8
8
  ***************************************************************************/
9
9
 
10
- char *maw_to_s(Weight *self)
10
+ #define MASc(scorer) ((MatchAllScorer *)(scorer))
11
+
12
+ typedef struct MatchAllScorer
13
+ {
14
+ Scorer super;
15
+ IndexReader *ir;
16
+ int max_doc;
17
+ float score;
18
+ } MatchAllScorer;
19
+
20
+ static float masc_score(Scorer *self)
21
+ {
22
+ return MASc(self)->score;
23
+ }
24
+
25
+ static bool masc_next(Scorer *self)
11
26
  {
12
- return strfmt("MatchAllWeight(%f)", self->value);
27
+ while (self->doc < (MASc(self)->max_doc - 1)) {
28
+ self->doc++;
29
+ if (!MASc(self)->ir->is_deleted(MASc(self)->ir, self->doc)) {
30
+ return true;
31
+ }
32
+ }
33
+ return false;
13
34
  }
14
35
 
15
- Explanation *maw_explain(Weight *self, IndexReader *ir, int doc_num)
36
+ static bool masc_skip_to(Scorer *self, int doc_num)
16
37
  {
17
- Explanation *expl;
18
- if (!ir->is_deleted(ir, doc_num)) {
19
- expl = expl_create(self->value, estrdup("MatchAllQuery: product of:"));
20
- expl_add_detail(expl, expl_create(self->query->boost, estrdup("boost")));
21
- expl_add_detail(expl, expl_create(self->qnorm, estrdup("query_norm")));
22
- } else {
23
- expl = expl_create(self->value,
24
- strfmt("MatchAllQuery: doc %d was deleted", doc_num));
25
- }
26
-
27
- return expl;
38
+ self->doc = doc_num - 1;
39
+ return masc_next(self);
28
40
  }
29
41
 
30
- Weight *maw_create(Query *query, Searcher *searcher)
42
+ static Explanation *masc_explain(Scorer *self, int doc_num)
31
43
  {
32
- Weight *self = w_create(query);
44
+ (void)self;
45
+ (void)doc_num;
46
+ return expl_new(1.0, "MatchAllScorer");
47
+ }
33
48
 
34
- self->scorer = &masc_create;
35
- self->explain = &maw_explain;
36
- self->to_s = &maw_to_s;
37
- self->sum_of_squared_weights = &w_sum_of_squared_weights;
49
+ static Scorer *masc_new(Weight *weight, IndexReader *ir)
50
+ {
51
+ Scorer *self = scorer_new(MatchAllScorer, weight->similarity);
38
52
 
39
- self->similarity = query->get_similarity(query, searcher);
40
- self->idf = 1.0;
53
+ MASc(self)->ir = ir;
54
+ MASc(self)->max_doc = ir->max_doc(ir);
55
+ MASc(self)->score = weight->value;
41
56
 
42
- return self;
57
+ self->doc = -1;
58
+ self->score = &masc_score;
59
+ self->next = &masc_next;
60
+ self->skip_to = &masc_skip_to;
61
+ self->explain = &masc_explain;
62
+ self->destroy = &scorer_destroy_i;
63
+
64
+ return self;
43
65
  }
44
66
 
45
67
  /***************************************************************************
46
68
  *
47
- * MatchAllQuery
69
+ * Weight
48
70
  *
49
71
  ***************************************************************************/
50
72
 
51
- char *maq_to_s(Query *self, char *field)
73
+ static char *maw_to_s(Weight *self)
52
74
  {
53
- if (self->boost == 1.0) {
54
- return estrdup("MatchAll");
55
- } else {
56
- return strfmt("MatchAll^%f", self->boost);
57
- }
75
+ return strfmt("MatchAllWeight(%f)", self->value);
58
76
  }
59
77
 
60
- static uint maq_hash(Query *self)
78
+ static Explanation *maw_explain(Weight *self, IndexReader *ir, int doc_num)
61
79
  {
62
- return 0;
63
- }
80
+ Explanation *expl;
81
+ if (!ir->is_deleted(ir, doc_num)) {
82
+ expl = expl_new(self->value, "MatchAllQuery: product of:");
83
+ expl_add_detail(expl, expl_new(self->query->boost, "boost"));
84
+ expl_add_detail(expl, expl_new(self->qnorm, "query_norm"));
85
+ } else {
86
+ expl = expl_new(self->value,
87
+ "MatchAllQuery: doc %d was deleted", doc_num);
88
+ }
64
89
 
65
- static int maq_eq(Query *self, Query *o)
66
- {
67
- return true;
90
+ return expl;
68
91
  }
69
92
 
70
- Query *maq_create()
93
+ static Weight *maw_new(Query *query, Searcher *searcher)
71
94
  {
72
- Query *self = q_create();
95
+ Weight *self = w_new(Weight, query);
73
96
 
74
- self->type = MATCH_ALL_QUERY;
75
- self->to_s = &maq_to_s;
76
- self->hash = &maq_hash;
77
- self->eq = &maq_eq;
78
- self->destroy_i = &q_destroy_i;
79
- self->create_weight_i = &maw_create;
97
+ self->scorer = &masc_new;
98
+ self->explain = &maw_explain;
99
+ self->to_s = &maw_to_s;
80
100
 
81
- return self;
101
+ self->similarity = query->get_similarity(query, searcher);
102
+ self->idf = 1.0;
103
+
104
+ return self;
82
105
  }
83
106
 
84
107
  /***************************************************************************
85
108
  *
86
- * MatchAllScorer
109
+ * MatchAllQuery
87
110
  *
88
111
  ***************************************************************************/
89
112
 
90
- float masc_score(Scorer *self)
113
+ char *maq_to_s(Query *self, const char *field)
91
114
  {
92
- return ((MatchAllScorer *)self->data)->score;
93
- }
94
-
95
- bool masc_next(Scorer *self)
96
- {
97
- MatchAllScorer *mas = (MatchAllScorer *)self->data;
98
- while (self->doc < (mas->max_doc - 1)) {
99
- self->doc++;
100
- if (!mas->ir->is_deleted(mas->ir, self->doc)) {
101
- return true;
115
+ (void)field;
116
+ if (self->boost == 1.0) {
117
+ return estrdup("MatchAll");
118
+ } else {
119
+ return strfmt("MatchAll^%f", self->boost);
102
120
  }
103
- }
104
- return false;
105
121
  }
106
122
 
107
- bool masc_skip_to(Scorer *self, int doc_num)
123
+ static ulong maq_hash(Query *self)
108
124
  {
109
- self->doc = doc_num - 1;
110
- return masc_next(self);
125
+ (void)self;
126
+ return 0;
111
127
  }
112
128
 
113
- Explanation *masc_explain(Scorer *self, int doc_num)
129
+ static int maq_eq(Query *self, Query *o)
114
130
  {
115
- return expl_create(1.0, estrdup("MatchAllScorer"));
131
+ (void)self; (void)o;
132
+ return true;
116
133
  }
117
134
 
118
- Scorer *masc_create(Weight *weight, IndexReader *ir)
135
+ Query *maq_new()
119
136
  {
120
- Scorer *self = scorer_create(weight->similarity);
121
- MatchAllScorer *mas = ALLOC(MatchAllScorer);
122
- mas->ir = ir;
123
- mas->max_doc = ir->max_doc(ir);
124
- mas->score = weight->value;
125
- self->data = mas;
126
-
127
- self->doc = -1;
128
- self->score = &masc_score;
129
- self->next = &masc_next;
130
- self->skip_to = &masc_skip_to;
131
- self->explain = &masc_explain;
132
- self->destroy = &scorer_destroy_i;
133
- return self;
137
+ Query *self = q_new(Query);
138
+
139
+ self->type = MATCH_ALL_QUERY;
140
+ self->to_s = &maq_to_s;
141
+ self->hash = &maq_hash;
142
+ self->eq = &maq_eq;
143
+ self->destroy_i = &q_destroy_i;
144
+ self->create_weight_i = &maw_new;
145
+
146
+ return self;
134
147
  }
148
+
@@ -0,0 +1,663 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+ #include "priorityqueue.h"
4
+ #include "helper.h"
5
+
6
+ #define MTQ(query) ((MultiTermQuery *)(query))
7
+
8
+ /***************************************************************************
9
+ *
10
+ * MultiTerm
11
+ *
12
+ ***************************************************************************/
13
+
14
+ /***************************************************************************
15
+ * BoostedTerm
16
+ ***************************************************************************/
17
+
18
+ typedef struct BoostedTerm
19
+ {
20
+ char *term;
21
+ float boost;
22
+ } BoostedTerm;
23
+
24
+ static bool boosted_term_less_than(const BoostedTerm *bt1,
25
+ const BoostedTerm *bt2)
26
+ {
27
+ if (bt1->boost == bt2->boost) {
28
+ return (strcmp(bt1->term, bt2->term) < 0);
29
+ }
30
+
31
+ return (bt1->boost < bt2->boost);
32
+ }
33
+
34
+ static void boosted_term_destroy(BoostedTerm *self)
35
+ {
36
+ free(self->term);
37
+ free(self);
38
+ }
39
+
40
+ static BoostedTerm *boosted_term_new(const char *term, float boost)
41
+ {
42
+ BoostedTerm *self = ALLOC(BoostedTerm);
43
+ self->term = estrdup(term);
44
+ self->boost = boost;
45
+ return self;
46
+ }
47
+
48
+ /***************************************************************************
49
+ * TermDocEnumWrapper
50
+ ***************************************************************************/
51
+
52
+ #define TDE_READ_SIZE 16
53
+
54
+ typedef struct TermDocEnumWrapper
55
+ {
56
+ const char *term;
57
+ TermDocEnum *tde;
58
+ float boost;
59
+ int doc;
60
+ int freq;
61
+ int docs[TDE_READ_SIZE];
62
+ int freqs[TDE_READ_SIZE];
63
+ int pointer;
64
+ int pointer_max;
65
+ } TermDocEnumWrapper;
66
+
67
+ static bool tdew_less_than(const TermDocEnumWrapper *tdew1,
68
+ const TermDocEnumWrapper *tdew2)
69
+ {
70
+ return (tdew1->doc < tdew2->doc);
71
+ }
72
+
73
+ static bool tdew_next(TermDocEnumWrapper *self)
74
+ {
75
+ self->pointer++;
76
+ if (self->pointer >= self->pointer_max) {
77
+ /* refill buffer */
78
+ self->pointer_max = self->tde->read(self->tde, self->docs, self->freqs,
79
+ TDE_READ_SIZE);
80
+ if (self->pointer_max != 0) {
81
+ self->pointer = 0;
82
+ }
83
+ else {
84
+ return false;
85
+ }
86
+ }
87
+ self->doc = self->docs[self->pointer];
88
+ self->freq = self->freqs[self->pointer];
89
+ return true;
90
+ }
91
+
92
+ static bool tdew_skip_to(TermDocEnumWrapper *self, int doc_num)
93
+ {
94
+ TermDocEnum *tde = self->tde;
95
+
96
+ while (++(self->pointer) < self->pointer_max) {
97
+ if (self->docs[self->pointer] >= doc_num) {
98
+ self->doc = self->docs[self->pointer];
99
+ self->freq = self->freqs[self->pointer];
100
+ return true;
101
+ }
102
+ }
103
+
104
+ /* not found in cache, seek underlying stream */
105
+ if (tde->skip_to(tde, doc_num)) {
106
+ self->pointer_max = 1;
107
+ self->pointer = 0;
108
+ self->docs[0] = self->doc = tde->doc_num(tde);
109
+ self->freqs[0] = self->freq = tde->freq(tde);
110
+ return true;
111
+ }
112
+ else {
113
+ return false;
114
+ }
115
+ }
116
+
117
+ static void tdew_destroy(TermDocEnumWrapper *self)
118
+ {
119
+ self->tde->close(self->tde);
120
+ free(self);
121
+ }
122
+
123
+ static TermDocEnumWrapper *tdew_new(const char *term, TermDocEnum *tde,
124
+ float boost)
125
+ {
126
+ TermDocEnumWrapper *self = ALLOC_AND_ZERO(TermDocEnumWrapper);
127
+ self->term = term;
128
+ self->tde = tde;
129
+ self->boost = boost;
130
+ self->doc = -1;
131
+ return self;
132
+ }
133
+
134
+ /***************************************************************************
135
+ * MultiTermScorer
136
+ ***************************************************************************/
137
+
138
+ #define SCORE_CACHE_SIZE 32
139
+ #define MTSc(scorer) ((MultiTermScorer *)(scorer))
140
+
141
+ typedef struct MultiTermScorer
142
+ {
143
+ Scorer super;
144
+ const char *field;
145
+ uchar *norms;
146
+ Weight *weight;
147
+ TermDocEnumWrapper **tdew_a;
148
+ int tdew_cnt;
149
+ PriorityQueue *tdew_pq;
150
+ float weight_value;
151
+ float score_cache[SCORE_CACHE_SIZE];
152
+ float total_score;
153
+ } MultiTermScorer;
154
+
155
+ static float multi_tsc_score(Scorer *self)
156
+ {
157
+ return MTSc(self)->total_score * MTSc(self)->weight_value
158
+ * sim_decode_norm(self->similarity, MTSc(self)->norms[self->doc]);
159
+ }
160
+
161
+ static bool multi_tsc_next(Scorer *self)
162
+ {
163
+ int curr_doc;
164
+ float total_score = 0.0;
165
+ TermDocEnumWrapper *tdew;
166
+ MultiTermScorer *mtsc = MTSc(self);
167
+ PriorityQueue *tdew_pq = mtsc->tdew_pq;
168
+ if (tdew_pq == NULL) {
169
+ TermDocEnumWrapper **tdew_a = mtsc->tdew_a;
170
+ int i;
171
+ tdew_pq = pq_new(mtsc->tdew_cnt, (lt_ft)tdew_less_than, (free_ft)NULL);
172
+ for (i = mtsc->tdew_cnt - 1; i >= 0; i--) {
173
+ if (tdew_next(tdew_a[i])) {
174
+ pq_push(tdew_pq, tdew_a[i]);
175
+ }
176
+ }
177
+ mtsc->tdew_pq = tdew_pq;
178
+ }
179
+
180
+ tdew = (TermDocEnumWrapper *)pq_top(tdew_pq);
181
+ if (tdew == NULL) {
182
+ return false;
183
+ }
184
+
185
+ self->doc = curr_doc = tdew->doc;
186
+ do {
187
+ int freq = tdew->freq;
188
+ if (freq < SCORE_CACHE_SIZE) {
189
+ total_score += mtsc->score_cache[freq] * tdew->boost;
190
+ }
191
+ else {
192
+ total_score += sim_tf(self->similarity, (float)freq) * tdew->boost;
193
+ }
194
+
195
+ if (tdew_next(tdew)) {
196
+ pq_down(tdew_pq);
197
+ }
198
+ else {
199
+ pq_pop(tdew_pq);
200
+ }
201
+
202
+ } while (((tdew = (TermDocEnumWrapper *)pq_top(tdew_pq)) != NULL)
203
+ && tdew->doc == curr_doc);
204
+ mtsc->total_score = total_score;
205
+ return true;
206
+ }
207
+
208
+ static bool multi_tsc_advance_to(Scorer *self, int target_doc_num)
209
+ {
210
+ PriorityQueue *tdew_pq = MTSc(self)->tdew_pq;
211
+ TermDocEnumWrapper *tdew;
212
+ if (tdew_pq == NULL) {
213
+ MultiTermScorer *mtsc = MTSc(self);
214
+ TermDocEnumWrapper **tdew_a = mtsc->tdew_a;
215
+ int i;
216
+ tdew_pq = pq_new(mtsc->tdew_cnt, (lt_ft)tdew_less_than, (free_ft)NULL);
217
+ for (i = mtsc->tdew_cnt - 1; i >= 0; i--) {
218
+ tdew_skip_to(tdew_a[i], target_doc_num);
219
+ pq_push(tdew_pq, tdew_a[i]);
220
+ }
221
+ MTSc(self)->tdew_pq = tdew_pq;
222
+ }
223
+ if (tdew_pq->size == 0) {
224
+ self->doc = -1;
225
+ return false;
226
+ }
227
+ while ((tdew = (TermDocEnumWrapper *)pq_top(tdew_pq)) != NULL
228
+ && (target_doc_num > tdew->doc)) {
229
+ if (tdew_skip_to(tdew, target_doc_num)) {
230
+ pq_down(tdew_pq);
231
+ }
232
+ else {
233
+ pq_pop(tdew_pq);
234
+ }
235
+ }
236
+ return (pq_top(tdew_pq) == NULL) ? false : true;
237
+ }
238
+
239
+ static inline bool multi_tsc_skip_to(Scorer *self, int target_doc_num)
240
+ {
241
+ return multi_tsc_advance_to(self, target_doc_num) && multi_tsc_next(self);
242
+ }
243
+
244
+ static Explanation *multi_tsc_explain(Scorer *self, int doc_num)
245
+ {
246
+ MultiTermScorer *mtsc = MTSc(self);
247
+ TermDocEnumWrapper *tdew;
248
+
249
+ if (multi_tsc_advance_to(self, doc_num) &&
250
+ (tdew = (TermDocEnumWrapper *)pq_top(mtsc->tdew_pq))->doc == doc_num) {
251
+
252
+ PriorityQueue *tdew_pq = MTSc(self)->tdew_pq;
253
+ Explanation *expl = expl_new(0.0, "The sum of:");
254
+ int curr_doc = self->doc = tdew->doc;
255
+ float total_score = 0.0;
256
+
257
+ do {
258
+ int freq = tdew->freq;
259
+ expl_add_detail(expl,
260
+ expl_new(sim_tf(self->similarity, (float)freq) * tdew->boost,
261
+ "tf(term_freq(%s:%s)=%d)^%f",
262
+ mtsc->field, tdew->term, freq, tdew->boost));
263
+
264
+ total_score += sim_tf(self->similarity, (float)freq) * tdew->boost;
265
+
266
+ /* maintain tdew queue, even though it probably won't get used
267
+ * again */
268
+ if (tdew_next(tdew)) {
269
+ pq_down(tdew_pq);
270
+ }
271
+ else {
272
+ pq_pop(tdew_pq);
273
+ }
274
+
275
+ } while (((tdew = (TermDocEnumWrapper *)pq_top(tdew_pq)) != NULL)
276
+ && tdew->doc == curr_doc);
277
+ expl->value = total_score;
278
+ return expl;
279
+ }
280
+ else {
281
+ return expl_new(0.0, "None of the required terms exist in the index");
282
+ }
283
+ }
284
+
285
+ static void multi_tsc_destroy(Scorer *self)
286
+ {
287
+ int i;
288
+ TermDocEnumWrapper **tdew_a = MTSc(self)->tdew_a;
289
+ for (i = MTSc(self)->tdew_cnt - 1; i >= 0; i--) {
290
+ tdew_destroy(tdew_a[i]);
291
+ }
292
+ free(tdew_a);
293
+ pq_destroy(MTSc(self)->tdew_pq);
294
+ scorer_destroy_i(self);
295
+ }
296
+
297
+ static Scorer *multi_tsc_new(Weight *weight, const char *field,
298
+ TermDocEnumWrapper **tdew_a, int tdew_cnt,
299
+ uchar *norms)
300
+ {
301
+ int i;
302
+ Scorer *self = scorer_new(MultiTermScorer, weight->similarity);
303
+
304
+ MTSc(self)->weight = weight;
305
+ MTSc(self)->field = field;
306
+ MTSc(self)->weight_value = weight->value;
307
+ MTSc(self)->tdew_a = tdew_a;
308
+ MTSc(self)->tdew_cnt = tdew_cnt;
309
+ MTSc(self)->norms = norms;
310
+
311
+ for (i = 0; i < SCORE_CACHE_SIZE; i++) {
312
+ MTSc(self)->score_cache[i] = sim_tf(self->similarity, (float)i);
313
+ }
314
+
315
+ self->score = &multi_tsc_score;
316
+ self->next = &multi_tsc_next;
317
+ self->skip_to = &multi_tsc_skip_to;
318
+ self->explain = &multi_tsc_explain;
319
+ self->destroy = &multi_tsc_destroy;
320
+
321
+ return self;
322
+ }
323
+
324
+ /***************************************************************************
325
+ * MultiTermWeight
326
+ ***************************************************************************/
327
+
328
+ static char *multi_tw_to_s(Weight *self)
329
+ {
330
+ return strfmt("MultiTermWeight(%f)", self->value);
331
+ }
332
+
333
+ static Scorer *multi_tw_scorer(Weight *self, IndexReader *ir)
334
+ {
335
+ Scorer *multi_tsc = NULL;
336
+ PriorityQueue *boosted_terms = MTQ(self->query)->boosted_terms;
337
+ const int field_num = fis_get_field_num(ir->fis, MTQ(self->query)->field);
338
+
339
+ if (boosted_terms->size > 0 && field_num >= 0) {
340
+ int i;
341
+ TermDocEnum *tde;
342
+ TermEnum *te = ir->terms(ir, field_num);
343
+ TermDocEnumWrapper **tdew_a = ALLOC_N(TermDocEnumWrapper *,
344
+ boosted_terms->size);
345
+ int tdew_cnt = 0;
346
+ /* Priority queues skip the first element */
347
+ for (i = boosted_terms->size; i > 0; i--) {
348
+ char *term;
349
+ BoostedTerm *bt = (BoostedTerm *)boosted_terms->heap[i];
350
+ if ((term = te->skip_to(te, bt->term)) != NULL
351
+ && strcmp(term, bt->term) == 0) {
352
+ tde = ir->term_docs(ir);
353
+ tde->seek_te(tde, te);
354
+ tdew_a[tdew_cnt++] = tdew_new(bt->term, tde, bt->boost);
355
+ }
356
+ }
357
+ te->close(te);
358
+ if (tdew_cnt) {
359
+ multi_tsc = multi_tsc_new(self, MTQ(self->query)->field, tdew_a,
360
+ tdew_cnt, ir->get_norms(ir, field_num));
361
+ }
362
+ else {
363
+ free(tdew_a);
364
+ }
365
+ }
366
+
367
+ return multi_tsc;
368
+ }
369
+
370
+ Explanation *multi_tw_explain(Weight *self, IndexReader *ir, int doc_num)
371
+ {
372
+ Explanation *expl;
373
+ Explanation *idf_expl1;
374
+ Explanation *idf_expl2;
375
+ Explanation *query_expl;
376
+ Explanation *qnorm_expl;
377
+ Explanation *field_expl;
378
+ Explanation *tf_expl;
379
+ Scorer *scorer;
380
+ uchar *field_norms;
381
+ float field_norm;
382
+ Explanation *field_norm_expl;
383
+
384
+ char *query_str;
385
+ MultiTermQuery *mtq = MTQ(self->query);
386
+ const char *field = mtq->field;
387
+ PriorityQueue *bt_pq = mtq->boosted_terms;
388
+ int i;
389
+ int total_doc_freqs = 0;
390
+ char *doc_freqs = NULL;
391
+ size_t len = 0, pos = 0;
392
+ const int field_num = fis_get_field_num(ir->fis, field);
393
+
394
+ if (field_num < 0) {
395
+ return expl_new(0.0, "field \"%s\" does not exist in the index", field);
396
+ }
397
+
398
+ query_str = self->query->to_s(self->query, "");
399
+
400
+ expl = expl_new(0.0, "weight(%s in %d), product of:", query_str, doc_num);
401
+
402
+ len = 30;
403
+ for (i = bt_pq->size; i > 0; i--) {
404
+ len += strlen(((BoostedTerm *)bt_pq->heap[i])->term) + 30;
405
+ }
406
+ doc_freqs = ALLOC_N(char, len);
407
+ for (i = bt_pq->size; i > 0; i--) {
408
+ char *term = ((BoostedTerm *)bt_pq->heap[i])->term;
409
+ int doc_freq = ir->doc_freq(ir, field_num, term);
410
+ sprintf(doc_freqs + pos, "(%s=%d) + ", term, doc_freq);
411
+ pos += strlen(doc_freqs + pos);
412
+ total_doc_freqs += doc_freq;
413
+ }
414
+ pos -= 2; /* remove " + " from the end */
415
+ sprintf(doc_freqs + pos, "= %d", total_doc_freqs);
416
+
417
+ idf_expl1 = expl_new(self->idf, "idf(%s:<%s>)", field, doc_freqs);
418
+ idf_expl2 = expl_new(self->idf, "idf(%s:<%s>)", field, doc_freqs);
419
+ free(doc_freqs);
420
+
421
+ /* explain query weight */
422
+ query_expl = expl_new(0.0, "query_weight(%s), product of:", query_str);
423
+
424
+ if (self->query->boost != 1.0) {
425
+ expl_add_detail(query_expl, expl_new(self->query->boost, "boost"));
426
+ }
427
+ expl_add_detail(query_expl, idf_expl1);
428
+
429
+ qnorm_expl = expl_new(self->qnorm, "query_norm");
430
+ expl_add_detail(query_expl, qnorm_expl);
431
+
432
+ query_expl->value = self->query->boost * self->idf * self->qnorm;
433
+
434
+ expl_add_detail(expl, query_expl);
435
+
436
+ /* explain field weight */
437
+ field_expl = expl_new(0.0, "field_weight(%s in %d), product of:",
438
+ query_str, doc_num);
439
+ free(query_str);
440
+
441
+ if ((scorer = self->scorer(self, ir)) != NULL) {
442
+ tf_expl = scorer->explain(scorer, doc_num);
443
+ scorer->destroy(scorer);
444
+ }
445
+ else {
446
+ tf_expl = expl_new(0.0, "no terms were found");
447
+ }
448
+ expl_add_detail(field_expl, tf_expl);
449
+ expl_add_detail(field_expl, idf_expl2);
450
+
451
+ field_norms = ir->get_norms(ir, field_num);
452
+ field_norm = (field_norms != NULL)
453
+ ? sim_decode_norm(self->similarity, field_norms[doc_num])
454
+ : (float)0.0;
455
+ field_norm_expl = expl_new(field_norm, "field_norm(field=%s, doc=%d)",
456
+ field, doc_num);
457
+
458
+ expl_add_detail(field_expl, field_norm_expl);
459
+
460
+ field_expl->value = tf_expl->value * self->idf * field_norm;
461
+
462
+ /* combine them */
463
+ if (query_expl->value == 1.0) {
464
+ expl_destroy(expl);
465
+ return field_expl;
466
+ }
467
+ else {
468
+ expl->value = (query_expl->value * field_expl->value);
469
+ expl_add_detail(expl, field_expl);
470
+ return expl;
471
+ }
472
+ }
473
+
474
+ static Weight *multi_tw_new(Query *query, Searcher *searcher)
475
+ {
476
+ int i;
477
+ Weight *self = w_new(Weight, query);
478
+ const char *field = MTQ(query)->field;
479
+ PriorityQueue *bt_pq = MTQ(query)->boosted_terms;
480
+
481
+ self->scorer = &multi_tw_scorer;
482
+ self->explain = &multi_tw_explain;
483
+ self->to_s = &multi_tw_to_s;
484
+
485
+ self->similarity = query->get_similarity(query, searcher);
486
+ self->value = query->boost;
487
+ self->idf = 0.0;
488
+
489
+ for (i = bt_pq->size; i > 0; i--) {
490
+ self->idf += sim_idf_term(self->similarity, field,
491
+ ((BoostedTerm *)bt_pq->heap[i])->term,
492
+ searcher);
493
+ }
494
+
495
+ return self;
496
+ }
497
+
498
+
499
+ /***************************************************************************
500
+ * MultiTermQuery
501
+ ***************************************************************************/
502
+
503
+ static char *multi_tq_to_s(Query *self, const char *curr_field)
504
+ {
505
+ int i;
506
+ PriorityQueue *boosted_terms = MTQ(self)->boosted_terms, *bt_pq_clone;
507
+ BoostedTerm *bt;
508
+ char *buffer, *bptr;
509
+ char *field = MTQ(self)->field;
510
+ int flen = (int)strlen(field);
511
+ int tlen = 0;
512
+
513
+ /* Priority queues skip the first element */
514
+ for (i = boosted_terms->size; i > 0; i--) {
515
+ tlen += (int)strlen(((BoostedTerm *)boosted_terms->heap[i])->term) + 35;
516
+ }
517
+
518
+ bptr = buffer = ALLOC_N(char, tlen + flen + 35);
519
+
520
+ if (strcmp(curr_field, field) != 0) {
521
+ sprintf(bptr, "%s:", field);
522
+ bptr += flen + 1;
523
+ }
524
+
525
+ *(bptr++) = '<';
526
+ bt_pq_clone = pq_clone(boosted_terms);
527
+ while ((bt = (BoostedTerm *)pq_pop(bt_pq_clone)) != NULL) {
528
+ sprintf(bptr, "%s", bt->term);
529
+ bptr += (int)strlen(bptr);
530
+
531
+ if (bt->boost != 1.0) {
532
+ *bptr = '^';
533
+ dbl_to_s(++bptr, bt->boost);
534
+ bptr += (int)strlen(bptr);
535
+ }
536
+
537
+ *(bptr++) = '|';
538
+ }
539
+ pq_destroy(bt_pq_clone);
540
+
541
+ if (bptr[-1] == '<') {
542
+ bptr++; /* handle zero term case */
543
+ }
544
+ bptr[-1] = '>'; /* delete last '|' char */
545
+ bptr[ 0] = '\0';
546
+
547
+ if (self->boost != 1.0) {
548
+ *bptr = '^';
549
+ dbl_to_s(++bptr, self->boost);
550
+ }
551
+
552
+ return buffer;
553
+ }
554
+
555
+ static void multi_tq_destroy_i(Query *self)
556
+ {
557
+ free(MTQ(self)->field);
558
+ pq_destroy(MTQ(self)->boosted_terms);
559
+ q_destroy_i(self);
560
+ }
561
+
562
+ static ulong multi_tq_hash(Query *self)
563
+ {
564
+ int i;
565
+ ulong hash = str_hash(MTQ(self)->field);
566
+ PriorityQueue *boosted_terms = MTQ(self)->boosted_terms;
567
+ for (i = boosted_terms->size; i > 0; i--) {
568
+ BoostedTerm *bt = (BoostedTerm *)boosted_terms->heap[i];
569
+ hash ^= str_hash(bt->term) ^ float2int(bt->boost);
570
+ }
571
+ return hash;
572
+ }
573
+
574
+ static int multi_tq_eq(Query *self, Query *o)
575
+ {
576
+ int i;
577
+ PriorityQueue *boosted_terms1 = MTQ(self)->boosted_terms;
578
+ PriorityQueue *boosted_terms2 = MTQ(o)->boosted_terms;
579
+
580
+ if (strcmp(MTQ(self)->field, MTQ(o)->field) != 0
581
+ || boosted_terms1->size != boosted_terms2->size) {
582
+ return false;
583
+ }
584
+ for (i = boosted_terms1->size; i > 0; i--) {
585
+ BoostedTerm *bt1 = (BoostedTerm *)boosted_terms1->heap[i];
586
+ BoostedTerm *bt2 = (BoostedTerm *)boosted_terms2->heap[i];
587
+ if ((strcmp(bt1->term, bt2->term) != 0) || (bt1->boost != bt2->boost)) {
588
+ return false;
589
+ }
590
+ }
591
+ return true;
592
+ }
593
+
594
+ static MatchVector *multi_tq_get_matchv_i(Query *self, MatchVector *mv,
595
+ TermVector *tv)
596
+ {
597
+ if (strcmp(tv->field, MTQ(self)->field) == 0) {
598
+ int i;
599
+ PriorityQueue *boosted_terms = MTQ(self)->boosted_terms;
600
+ for (i = boosted_terms->size; i > 0; i--) {
601
+ int j;
602
+ BoostedTerm *bt = (BoostedTerm *)boosted_terms->heap[i];
603
+ TVTerm *tv_term = tv_get_tv_term(tv, bt->term);
604
+ if (tv_term) {
605
+ for (j = 0; j < tv_term->freq; j++) {
606
+ int pos = tv_term->positions[j];
607
+ matchv_add(mv, pos, pos);
608
+ }
609
+ }
610
+ }
611
+ }
612
+ return mv;
613
+ }
614
+
615
+ Query *multi_tq_new_conf(const char *field, int max_terms, float min_boost)
616
+ {
617
+ Query *self;
618
+
619
+ if (max_terms <= 0) {
620
+ RAISE(ARG_ERROR, ":max_terms must be greater than or equal to zero. "
621
+ "%d < 0. ", max_terms);
622
+ }
623
+
624
+ self = q_new(MultiTermQuery);
625
+
626
+ MTQ(self)->field = estrdup(field);
627
+ MTQ(self)->boosted_terms = pq_new(max_terms,
628
+ (lt_ft)&boosted_term_less_than,
629
+ (free_ft)&boosted_term_destroy);
630
+ MTQ(self)->min_boost = min_boost;
631
+
632
+ self->type = MULTI_TERM_QUERY;
633
+ self->to_s = &multi_tq_to_s;
634
+ self->hash = &multi_tq_hash;
635
+ self->eq = &multi_tq_eq;
636
+ self->destroy_i = &multi_tq_destroy_i;
637
+ self->create_weight_i = &multi_tw_new;
638
+ self->get_matchv_i = &multi_tq_get_matchv_i;
639
+
640
+ return self;
641
+ }
642
+
643
+ Query *multi_tq_new(const char *field)
644
+ {
645
+ return multi_tq_new_conf(field, MULTI_TERM_QUERY_MAX_TERMS, 0.0);
646
+ }
647
+
648
+ void multi_tq_add_term_boost(Query *self, const char *term, float boost)
649
+ {
650
+ if (boost > MTQ(self)->min_boost) {
651
+ BoostedTerm *bt = boosted_term_new(term, boost);
652
+ PriorityQueue *bt_pq = MTQ(self)->boosted_terms;
653
+ pq_insert(bt_pq, bt);
654
+ if (pq_full(bt_pq)) {
655
+ MTQ(self)->min_boost = ((BoostedTerm *)pq_top(bt_pq))->boost;
656
+ }
657
+ }
658
+ }
659
+
660
+ void multi_tq_add_term(Query *self, const char *term)
661
+ {
662
+ multi_tq_add_term_boost(self, term, 1.0);
663
+ }