ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/q_match_all.c CHANGED
@@ -3,132 +3,146 @@
3
3
 
4
4
  /***************************************************************************
5
5
  *
6
- * Weight
6
+ * MatchAllScorer
7
7
  *
8
8
  ***************************************************************************/
9
9
 
10
- char *maw_to_s(Weight *self)
10
+ #define MASc(scorer) ((MatchAllScorer *)(scorer))
11
+
12
+ typedef struct MatchAllScorer
13
+ {
14
+ Scorer super;
15
+ IndexReader *ir;
16
+ int max_doc;
17
+ float score;
18
+ } MatchAllScorer;
19
+
20
+ static float masc_score(Scorer *self)
21
+ {
22
+ return MASc(self)->score;
23
+ }
24
+
25
+ static bool masc_next(Scorer *self)
11
26
  {
12
- return strfmt("MatchAllWeight(%f)", self->value);
27
+ while (self->doc < (MASc(self)->max_doc - 1)) {
28
+ self->doc++;
29
+ if (!MASc(self)->ir->is_deleted(MASc(self)->ir, self->doc)) {
30
+ return true;
31
+ }
32
+ }
33
+ return false;
13
34
  }
14
35
 
15
- Explanation *maw_explain(Weight *self, IndexReader *ir, int doc_num)
36
+ static bool masc_skip_to(Scorer *self, int doc_num)
16
37
  {
17
- Explanation *expl;
18
- if (!ir->is_deleted(ir, doc_num)) {
19
- expl = expl_create(self->value, estrdup("MatchAllQuery: product of:"));
20
- expl_add_detail(expl, expl_create(self->query->boost, estrdup("boost")));
21
- expl_add_detail(expl, expl_create(self->qnorm, estrdup("query_norm")));
22
- } else {
23
- expl = expl_create(self->value,
24
- strfmt("MatchAllQuery: doc %d was deleted", doc_num));
25
- }
26
-
27
- return expl;
38
+ self->doc = doc_num - 1;
39
+ return masc_next(self);
28
40
  }
29
41
 
30
- Weight *maw_create(Query *query, Searcher *searcher)
42
+ static Explanation *masc_explain(Scorer *self, int doc_num)
31
43
  {
32
- Weight *self = w_create(query);
44
+ (void)self;
45
+ (void)doc_num;
46
+ return expl_new(1.0, "MatchAllScorer");
47
+ }
33
48
 
34
- self->scorer = &masc_create;
35
- self->explain = &maw_explain;
36
- self->to_s = &maw_to_s;
37
- self->sum_of_squared_weights = &w_sum_of_squared_weights;
49
+ static Scorer *masc_new(Weight *weight, IndexReader *ir)
50
+ {
51
+ Scorer *self = scorer_new(MatchAllScorer, weight->similarity);
38
52
 
39
- self->similarity = query->get_similarity(query, searcher);
40
- self->idf = 1.0;
53
+ MASc(self)->ir = ir;
54
+ MASc(self)->max_doc = ir->max_doc(ir);
55
+ MASc(self)->score = weight->value;
41
56
 
42
- return self;
57
+ self->doc = -1;
58
+ self->score = &masc_score;
59
+ self->next = &masc_next;
60
+ self->skip_to = &masc_skip_to;
61
+ self->explain = &masc_explain;
62
+ self->destroy = &scorer_destroy_i;
63
+
64
+ return self;
43
65
  }
44
66
 
45
67
  /***************************************************************************
46
68
  *
47
- * MatchAllQuery
69
+ * Weight
48
70
  *
49
71
  ***************************************************************************/
50
72
 
51
- char *maq_to_s(Query *self, char *field)
73
+ static char *maw_to_s(Weight *self)
52
74
  {
53
- if (self->boost == 1.0) {
54
- return estrdup("MatchAll");
55
- } else {
56
- return strfmt("MatchAll^%f", self->boost);
57
- }
75
+ return strfmt("MatchAllWeight(%f)", self->value);
58
76
  }
59
77
 
60
- static uint maq_hash(Query *self)
78
+ static Explanation *maw_explain(Weight *self, IndexReader *ir, int doc_num)
61
79
  {
62
- return 0;
63
- }
80
+ Explanation *expl;
81
+ if (!ir->is_deleted(ir, doc_num)) {
82
+ expl = expl_new(self->value, "MatchAllQuery: product of:");
83
+ expl_add_detail(expl, expl_new(self->query->boost, "boost"));
84
+ expl_add_detail(expl, expl_new(self->qnorm, "query_norm"));
85
+ } else {
86
+ expl = expl_new(self->value,
87
+ "MatchAllQuery: doc %d was deleted", doc_num);
88
+ }
64
89
 
65
- static int maq_eq(Query *self, Query *o)
66
- {
67
- return true;
90
+ return expl;
68
91
  }
69
92
 
70
- Query *maq_create()
93
+ static Weight *maw_new(Query *query, Searcher *searcher)
71
94
  {
72
- Query *self = q_create();
95
+ Weight *self = w_new(Weight, query);
73
96
 
74
- self->type = MATCH_ALL_QUERY;
75
- self->to_s = &maq_to_s;
76
- self->hash = &maq_hash;
77
- self->eq = &maq_eq;
78
- self->destroy_i = &q_destroy_i;
79
- self->create_weight_i = &maw_create;
97
+ self->scorer = &masc_new;
98
+ self->explain = &maw_explain;
99
+ self->to_s = &maw_to_s;
80
100
 
81
- return self;
101
+ self->similarity = query->get_similarity(query, searcher);
102
+ self->idf = 1.0;
103
+
104
+ return self;
82
105
  }
83
106
 
84
107
  /***************************************************************************
85
108
  *
86
- * MatchAllScorer
109
+ * MatchAllQuery
87
110
  *
88
111
  ***************************************************************************/
89
112
 
90
- float masc_score(Scorer *self)
113
+ char *maq_to_s(Query *self, const char *field)
91
114
  {
92
- return ((MatchAllScorer *)self->data)->score;
93
- }
94
-
95
- bool masc_next(Scorer *self)
96
- {
97
- MatchAllScorer *mas = (MatchAllScorer *)self->data;
98
- while (self->doc < (mas->max_doc - 1)) {
99
- self->doc++;
100
- if (!mas->ir->is_deleted(mas->ir, self->doc)) {
101
- return true;
115
+ (void)field;
116
+ if (self->boost == 1.0) {
117
+ return estrdup("MatchAll");
118
+ } else {
119
+ return strfmt("MatchAll^%f", self->boost);
102
120
  }
103
- }
104
- return false;
105
121
  }
106
122
 
107
- bool masc_skip_to(Scorer *self, int doc_num)
123
+ static ulong maq_hash(Query *self)
108
124
  {
109
- self->doc = doc_num - 1;
110
- return masc_next(self);
125
+ (void)self;
126
+ return 0;
111
127
  }
112
128
 
113
- Explanation *masc_explain(Scorer *self, int doc_num)
129
+ static int maq_eq(Query *self, Query *o)
114
130
  {
115
- return expl_create(1.0, estrdup("MatchAllScorer"));
131
+ (void)self; (void)o;
132
+ return true;
116
133
  }
117
134
 
118
- Scorer *masc_create(Weight *weight, IndexReader *ir)
135
+ Query *maq_new()
119
136
  {
120
- Scorer *self = scorer_create(weight->similarity);
121
- MatchAllScorer *mas = ALLOC(MatchAllScorer);
122
- mas->ir = ir;
123
- mas->max_doc = ir->max_doc(ir);
124
- mas->score = weight->value;
125
- self->data = mas;
126
-
127
- self->doc = -1;
128
- self->score = &masc_score;
129
- self->next = &masc_next;
130
- self->skip_to = &masc_skip_to;
131
- self->explain = &masc_explain;
132
- self->destroy = &scorer_destroy_i;
133
- return self;
137
+ Query *self = q_new(Query);
138
+
139
+ self->type = MATCH_ALL_QUERY;
140
+ self->to_s = &maq_to_s;
141
+ self->hash = &maq_hash;
142
+ self->eq = &maq_eq;
143
+ self->destroy_i = &q_destroy_i;
144
+ self->create_weight_i = &maw_new;
145
+
146
+ return self;
134
147
  }
148
+
@@ -0,0 +1,663 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+ #include "priorityqueue.h"
4
+ #include "helper.h"
5
+
6
+ #define MTQ(query) ((MultiTermQuery *)(query))
7
+
8
+ /***************************************************************************
9
+ *
10
+ * MultiTerm
11
+ *
12
+ ***************************************************************************/
13
+
14
+ /***************************************************************************
15
+ * BoostedTerm
16
+ ***************************************************************************/
17
+
18
+ typedef struct BoostedTerm
19
+ {
20
+ char *term;
21
+ float boost;
22
+ } BoostedTerm;
23
+
24
+ static bool boosted_term_less_than(const BoostedTerm *bt1,
25
+ const BoostedTerm *bt2)
26
+ {
27
+ if (bt1->boost == bt2->boost) {
28
+ return (strcmp(bt1->term, bt2->term) < 0);
29
+ }
30
+
31
+ return (bt1->boost < bt2->boost);
32
+ }
33
+
34
+ static void boosted_term_destroy(BoostedTerm *self)
35
+ {
36
+ free(self->term);
37
+ free(self);
38
+ }
39
+
40
+ static BoostedTerm *boosted_term_new(const char *term, float boost)
41
+ {
42
+ BoostedTerm *self = ALLOC(BoostedTerm);
43
+ self->term = estrdup(term);
44
+ self->boost = boost;
45
+ return self;
46
+ }
47
+
48
+ /***************************************************************************
49
+ * TermDocEnumWrapper
50
+ ***************************************************************************/
51
+
52
+ #define TDE_READ_SIZE 16
53
+
54
+ typedef struct TermDocEnumWrapper
55
+ {
56
+ const char *term;
57
+ TermDocEnum *tde;
58
+ float boost;
59
+ int doc;
60
+ int freq;
61
+ int docs[TDE_READ_SIZE];
62
+ int freqs[TDE_READ_SIZE];
63
+ int pointer;
64
+ int pointer_max;
65
+ } TermDocEnumWrapper;
66
+
67
+ static bool tdew_less_than(const TermDocEnumWrapper *tdew1,
68
+ const TermDocEnumWrapper *tdew2)
69
+ {
70
+ return (tdew1->doc < tdew2->doc);
71
+ }
72
+
73
+ static bool tdew_next(TermDocEnumWrapper *self)
74
+ {
75
+ self->pointer++;
76
+ if (self->pointer >= self->pointer_max) {
77
+ /* refill buffer */
78
+ self->pointer_max = self->tde->read(self->tde, self->docs, self->freqs,
79
+ TDE_READ_SIZE);
80
+ if (self->pointer_max != 0) {
81
+ self->pointer = 0;
82
+ }
83
+ else {
84
+ return false;
85
+ }
86
+ }
87
+ self->doc = self->docs[self->pointer];
88
+ self->freq = self->freqs[self->pointer];
89
+ return true;
90
+ }
91
+
92
+ static bool tdew_skip_to(TermDocEnumWrapper *self, int doc_num)
93
+ {
94
+ TermDocEnum *tde = self->tde;
95
+
96
+ while (++(self->pointer) < self->pointer_max) {
97
+ if (self->docs[self->pointer] >= doc_num) {
98
+ self->doc = self->docs[self->pointer];
99
+ self->freq = self->freqs[self->pointer];
100
+ return true;
101
+ }
102
+ }
103
+
104
+ /* not found in cache, seek underlying stream */
105
+ if (tde->skip_to(tde, doc_num)) {
106
+ self->pointer_max = 1;
107
+ self->pointer = 0;
108
+ self->docs[0] = self->doc = tde->doc_num(tde);
109
+ self->freqs[0] = self->freq = tde->freq(tde);
110
+ return true;
111
+ }
112
+ else {
113
+ return false;
114
+ }
115
+ }
116
+
117
+ static void tdew_destroy(TermDocEnumWrapper *self)
118
+ {
119
+ self->tde->close(self->tde);
120
+ free(self);
121
+ }
122
+
123
+ static TermDocEnumWrapper *tdew_new(const char *term, TermDocEnum *tde,
124
+ float boost)
125
+ {
126
+ TermDocEnumWrapper *self = ALLOC_AND_ZERO(TermDocEnumWrapper);
127
+ self->term = term;
128
+ self->tde = tde;
129
+ self->boost = boost;
130
+ self->doc = -1;
131
+ return self;
132
+ }
133
+
134
+ /***************************************************************************
135
+ * MultiTermScorer
136
+ ***************************************************************************/
137
+
138
+ #define SCORE_CACHE_SIZE 32
139
+ #define MTSc(scorer) ((MultiTermScorer *)(scorer))
140
+
141
+ typedef struct MultiTermScorer
142
+ {
143
+ Scorer super;
144
+ const char *field;
145
+ uchar *norms;
146
+ Weight *weight;
147
+ TermDocEnumWrapper **tdew_a;
148
+ int tdew_cnt;
149
+ PriorityQueue *tdew_pq;
150
+ float weight_value;
151
+ float score_cache[SCORE_CACHE_SIZE];
152
+ float total_score;
153
+ } MultiTermScorer;
154
+
155
+ static float multi_tsc_score(Scorer *self)
156
+ {
157
+ return MTSc(self)->total_score * MTSc(self)->weight_value
158
+ * sim_decode_norm(self->similarity, MTSc(self)->norms[self->doc]);
159
+ }
160
+
161
+ static bool multi_tsc_next(Scorer *self)
162
+ {
163
+ int curr_doc;
164
+ float total_score = 0.0;
165
+ TermDocEnumWrapper *tdew;
166
+ MultiTermScorer *mtsc = MTSc(self);
167
+ PriorityQueue *tdew_pq = mtsc->tdew_pq;
168
+ if (tdew_pq == NULL) {
169
+ TermDocEnumWrapper **tdew_a = mtsc->tdew_a;
170
+ int i;
171
+ tdew_pq = pq_new(mtsc->tdew_cnt, (lt_ft)tdew_less_than, (free_ft)NULL);
172
+ for (i = mtsc->tdew_cnt - 1; i >= 0; i--) {
173
+ if (tdew_next(tdew_a[i])) {
174
+ pq_push(tdew_pq, tdew_a[i]);
175
+ }
176
+ }
177
+ mtsc->tdew_pq = tdew_pq;
178
+ }
179
+
180
+ tdew = (TermDocEnumWrapper *)pq_top(tdew_pq);
181
+ if (tdew == NULL) {
182
+ return false;
183
+ }
184
+
185
+ self->doc = curr_doc = tdew->doc;
186
+ do {
187
+ int freq = tdew->freq;
188
+ if (freq < SCORE_CACHE_SIZE) {
189
+ total_score += mtsc->score_cache[freq] * tdew->boost;
190
+ }
191
+ else {
192
+ total_score += sim_tf(self->similarity, (float)freq) * tdew->boost;
193
+ }
194
+
195
+ if (tdew_next(tdew)) {
196
+ pq_down(tdew_pq);
197
+ }
198
+ else {
199
+ pq_pop(tdew_pq);
200
+ }
201
+
202
+ } while (((tdew = (TermDocEnumWrapper *)pq_top(tdew_pq)) != NULL)
203
+ && tdew->doc == curr_doc);
204
+ mtsc->total_score = total_score;
205
+ return true;
206
+ }
207
+
208
+ static bool multi_tsc_advance_to(Scorer *self, int target_doc_num)
209
+ {
210
+ PriorityQueue *tdew_pq = MTSc(self)->tdew_pq;
211
+ TermDocEnumWrapper *tdew;
212
+ if (tdew_pq == NULL) {
213
+ MultiTermScorer *mtsc = MTSc(self);
214
+ TermDocEnumWrapper **tdew_a = mtsc->tdew_a;
215
+ int i;
216
+ tdew_pq = pq_new(mtsc->tdew_cnt, (lt_ft)tdew_less_than, (free_ft)NULL);
217
+ for (i = mtsc->tdew_cnt - 1; i >= 0; i--) {
218
+ tdew_skip_to(tdew_a[i], target_doc_num);
219
+ pq_push(tdew_pq, tdew_a[i]);
220
+ }
221
+ MTSc(self)->tdew_pq = tdew_pq;
222
+ }
223
+ if (tdew_pq->size == 0) {
224
+ self->doc = -1;
225
+ return false;
226
+ }
227
+ while ((tdew = (TermDocEnumWrapper *)pq_top(tdew_pq)) != NULL
228
+ && (target_doc_num > tdew->doc)) {
229
+ if (tdew_skip_to(tdew, target_doc_num)) {
230
+ pq_down(tdew_pq);
231
+ }
232
+ else {
233
+ pq_pop(tdew_pq);
234
+ }
235
+ }
236
+ return (pq_top(tdew_pq) == NULL) ? false : true;
237
+ }
238
+
239
+ static inline bool multi_tsc_skip_to(Scorer *self, int target_doc_num)
240
+ {
241
+ return multi_tsc_advance_to(self, target_doc_num) && multi_tsc_next(self);
242
+ }
243
+
244
+ static Explanation *multi_tsc_explain(Scorer *self, int doc_num)
245
+ {
246
+ MultiTermScorer *mtsc = MTSc(self);
247
+ TermDocEnumWrapper *tdew;
248
+
249
+ if (multi_tsc_advance_to(self, doc_num) &&
250
+ (tdew = (TermDocEnumWrapper *)pq_top(mtsc->tdew_pq))->doc == doc_num) {
251
+
252
+ PriorityQueue *tdew_pq = MTSc(self)->tdew_pq;
253
+ Explanation *expl = expl_new(0.0, "The sum of:");
254
+ int curr_doc = self->doc = tdew->doc;
255
+ float total_score = 0.0;
256
+
257
+ do {
258
+ int freq = tdew->freq;
259
+ expl_add_detail(expl,
260
+ expl_new(sim_tf(self->similarity, (float)freq) * tdew->boost,
261
+ "tf(term_freq(%s:%s)=%d)^%f",
262
+ mtsc->field, tdew->term, freq, tdew->boost));
263
+
264
+ total_score += sim_tf(self->similarity, (float)freq) * tdew->boost;
265
+
266
+ /* maintain tdew queue, even though it probably won't get used
267
+ * again */
268
+ if (tdew_next(tdew)) {
269
+ pq_down(tdew_pq);
270
+ }
271
+ else {
272
+ pq_pop(tdew_pq);
273
+ }
274
+
275
+ } while (((tdew = (TermDocEnumWrapper *)pq_top(tdew_pq)) != NULL)
276
+ && tdew->doc == curr_doc);
277
+ expl->value = total_score;
278
+ return expl;
279
+ }
280
+ else {
281
+ return expl_new(0.0, "None of the required terms exist in the index");
282
+ }
283
+ }
284
+
285
+ static void multi_tsc_destroy(Scorer *self)
286
+ {
287
+ int i;
288
+ TermDocEnumWrapper **tdew_a = MTSc(self)->tdew_a;
289
+ for (i = MTSc(self)->tdew_cnt - 1; i >= 0; i--) {
290
+ tdew_destroy(tdew_a[i]);
291
+ }
292
+ free(tdew_a);
293
+ pq_destroy(MTSc(self)->tdew_pq);
294
+ scorer_destroy_i(self);
295
+ }
296
+
297
+ static Scorer *multi_tsc_new(Weight *weight, const char *field,
298
+ TermDocEnumWrapper **tdew_a, int tdew_cnt,
299
+ uchar *norms)
300
+ {
301
+ int i;
302
+ Scorer *self = scorer_new(MultiTermScorer, weight->similarity);
303
+
304
+ MTSc(self)->weight = weight;
305
+ MTSc(self)->field = field;
306
+ MTSc(self)->weight_value = weight->value;
307
+ MTSc(self)->tdew_a = tdew_a;
308
+ MTSc(self)->tdew_cnt = tdew_cnt;
309
+ MTSc(self)->norms = norms;
310
+
311
+ for (i = 0; i < SCORE_CACHE_SIZE; i++) {
312
+ MTSc(self)->score_cache[i] = sim_tf(self->similarity, (float)i);
313
+ }
314
+
315
+ self->score = &multi_tsc_score;
316
+ self->next = &multi_tsc_next;
317
+ self->skip_to = &multi_tsc_skip_to;
318
+ self->explain = &multi_tsc_explain;
319
+ self->destroy = &multi_tsc_destroy;
320
+
321
+ return self;
322
+ }
323
+
324
+ /***************************************************************************
325
+ * MultiTermWeight
326
+ ***************************************************************************/
327
+
328
+ static char *multi_tw_to_s(Weight *self)
329
+ {
330
+ return strfmt("MultiTermWeight(%f)", self->value);
331
+ }
332
+
333
+ static Scorer *multi_tw_scorer(Weight *self, IndexReader *ir)
334
+ {
335
+ Scorer *multi_tsc = NULL;
336
+ PriorityQueue *boosted_terms = MTQ(self->query)->boosted_terms;
337
+ const int field_num = fis_get_field_num(ir->fis, MTQ(self->query)->field);
338
+
339
+ if (boosted_terms->size > 0 && field_num >= 0) {
340
+ int i;
341
+ TermDocEnum *tde;
342
+ TermEnum *te = ir->terms(ir, field_num);
343
+ TermDocEnumWrapper **tdew_a = ALLOC_N(TermDocEnumWrapper *,
344
+ boosted_terms->size);
345
+ int tdew_cnt = 0;
346
+ /* Priority queues skip the first element */
347
+ for (i = boosted_terms->size; i > 0; i--) {
348
+ char *term;
349
+ BoostedTerm *bt = (BoostedTerm *)boosted_terms->heap[i];
350
+ if ((term = te->skip_to(te, bt->term)) != NULL
351
+ && strcmp(term, bt->term) == 0) {
352
+ tde = ir->term_docs(ir);
353
+ tde->seek_te(tde, te);
354
+ tdew_a[tdew_cnt++] = tdew_new(bt->term, tde, bt->boost);
355
+ }
356
+ }
357
+ te->close(te);
358
+ if (tdew_cnt) {
359
+ multi_tsc = multi_tsc_new(self, MTQ(self->query)->field, tdew_a,
360
+ tdew_cnt, ir->get_norms(ir, field_num));
361
+ }
362
+ else {
363
+ free(tdew_a);
364
+ }
365
+ }
366
+
367
+ return multi_tsc;
368
+ }
369
+
370
+ Explanation *multi_tw_explain(Weight *self, IndexReader *ir, int doc_num)
371
+ {
372
+ Explanation *expl;
373
+ Explanation *idf_expl1;
374
+ Explanation *idf_expl2;
375
+ Explanation *query_expl;
376
+ Explanation *qnorm_expl;
377
+ Explanation *field_expl;
378
+ Explanation *tf_expl;
379
+ Scorer *scorer;
380
+ uchar *field_norms;
381
+ float field_norm;
382
+ Explanation *field_norm_expl;
383
+
384
+ char *query_str;
385
+ MultiTermQuery *mtq = MTQ(self->query);
386
+ const char *field = mtq->field;
387
+ PriorityQueue *bt_pq = mtq->boosted_terms;
388
+ int i;
389
+ int total_doc_freqs = 0;
390
+ char *doc_freqs = NULL;
391
+ size_t len = 0, pos = 0;
392
+ const int field_num = fis_get_field_num(ir->fis, field);
393
+
394
+ if (field_num < 0) {
395
+ return expl_new(0.0, "field \"%s\" does not exist in the index", field);
396
+ }
397
+
398
+ query_str = self->query->to_s(self->query, "");
399
+
400
+ expl = expl_new(0.0, "weight(%s in %d), product of:", query_str, doc_num);
401
+
402
+ len = 30;
403
+ for (i = bt_pq->size; i > 0; i--) {
404
+ len += strlen(((BoostedTerm *)bt_pq->heap[i])->term) + 30;
405
+ }
406
+ doc_freqs = ALLOC_N(char, len);
407
+ for (i = bt_pq->size; i > 0; i--) {
408
+ char *term = ((BoostedTerm *)bt_pq->heap[i])->term;
409
+ int doc_freq = ir->doc_freq(ir, field_num, term);
410
+ sprintf(doc_freqs + pos, "(%s=%d) + ", term, doc_freq);
411
+ pos += strlen(doc_freqs + pos);
412
+ total_doc_freqs += doc_freq;
413
+ }
414
+ pos -= 2; /* remove " + " from the end */
415
+ sprintf(doc_freqs + pos, "= %d", total_doc_freqs);
416
+
417
+ idf_expl1 = expl_new(self->idf, "idf(%s:<%s>)", field, doc_freqs);
418
+ idf_expl2 = expl_new(self->idf, "idf(%s:<%s>)", field, doc_freqs);
419
+ free(doc_freqs);
420
+
421
+ /* explain query weight */
422
+ query_expl = expl_new(0.0, "query_weight(%s), product of:", query_str);
423
+
424
+ if (self->query->boost != 1.0) {
425
+ expl_add_detail(query_expl, expl_new(self->query->boost, "boost"));
426
+ }
427
+ expl_add_detail(query_expl, idf_expl1);
428
+
429
+ qnorm_expl = expl_new(self->qnorm, "query_norm");
430
+ expl_add_detail(query_expl, qnorm_expl);
431
+
432
+ query_expl->value = self->query->boost * self->idf * self->qnorm;
433
+
434
+ expl_add_detail(expl, query_expl);
435
+
436
+ /* explain field weight */
437
+ field_expl = expl_new(0.0, "field_weight(%s in %d), product of:",
438
+ query_str, doc_num);
439
+ free(query_str);
440
+
441
+ if ((scorer = self->scorer(self, ir)) != NULL) {
442
+ tf_expl = scorer->explain(scorer, doc_num);
443
+ scorer->destroy(scorer);
444
+ }
445
+ else {
446
+ tf_expl = expl_new(0.0, "no terms were found");
447
+ }
448
+ expl_add_detail(field_expl, tf_expl);
449
+ expl_add_detail(field_expl, idf_expl2);
450
+
451
+ field_norms = ir->get_norms(ir, field_num);
452
+ field_norm = (field_norms != NULL)
453
+ ? sim_decode_norm(self->similarity, field_norms[doc_num])
454
+ : (float)0.0;
455
+ field_norm_expl = expl_new(field_norm, "field_norm(field=%s, doc=%d)",
456
+ field, doc_num);
457
+
458
+ expl_add_detail(field_expl, field_norm_expl);
459
+
460
+ field_expl->value = tf_expl->value * self->idf * field_norm;
461
+
462
+ /* combine them */
463
+ if (query_expl->value == 1.0) {
464
+ expl_destroy(expl);
465
+ return field_expl;
466
+ }
467
+ else {
468
+ expl->value = (query_expl->value * field_expl->value);
469
+ expl_add_detail(expl, field_expl);
470
+ return expl;
471
+ }
472
+ }
473
+
474
+ static Weight *multi_tw_new(Query *query, Searcher *searcher)
475
+ {
476
+ int i;
477
+ Weight *self = w_new(Weight, query);
478
+ const char *field = MTQ(query)->field;
479
+ PriorityQueue *bt_pq = MTQ(query)->boosted_terms;
480
+
481
+ self->scorer = &multi_tw_scorer;
482
+ self->explain = &multi_tw_explain;
483
+ self->to_s = &multi_tw_to_s;
484
+
485
+ self->similarity = query->get_similarity(query, searcher);
486
+ self->value = query->boost;
487
+ self->idf = 0.0;
488
+
489
+ for (i = bt_pq->size; i > 0; i--) {
490
+ self->idf += sim_idf_term(self->similarity, field,
491
+ ((BoostedTerm *)bt_pq->heap[i])->term,
492
+ searcher);
493
+ }
494
+
495
+ return self;
496
+ }
497
+
498
+
499
+ /***************************************************************************
500
+ * MultiTermQuery
501
+ ***************************************************************************/
502
+
503
+ static char *multi_tq_to_s(Query *self, const char *curr_field)
504
+ {
505
+ int i;
506
+ PriorityQueue *boosted_terms = MTQ(self)->boosted_terms, *bt_pq_clone;
507
+ BoostedTerm *bt;
508
+ char *buffer, *bptr;
509
+ char *field = MTQ(self)->field;
510
+ int flen = (int)strlen(field);
511
+ int tlen = 0;
512
+
513
+ /* Priority queues skip the first element */
514
+ for (i = boosted_terms->size; i > 0; i--) {
515
+ tlen += (int)strlen(((BoostedTerm *)boosted_terms->heap[i])->term) + 35;
516
+ }
517
+
518
+ bptr = buffer = ALLOC_N(char, tlen + flen + 35);
519
+
520
+ if (strcmp(curr_field, field) != 0) {
521
+ sprintf(bptr, "%s:", field);
522
+ bptr += flen + 1;
523
+ }
524
+
525
+ *(bptr++) = '<';
526
+ bt_pq_clone = pq_clone(boosted_terms);
527
+ while ((bt = (BoostedTerm *)pq_pop(bt_pq_clone)) != NULL) {
528
+ sprintf(bptr, "%s", bt->term);
529
+ bptr += (int)strlen(bptr);
530
+
531
+ if (bt->boost != 1.0) {
532
+ *bptr = '^';
533
+ dbl_to_s(++bptr, bt->boost);
534
+ bptr += (int)strlen(bptr);
535
+ }
536
+
537
+ *(bptr++) = '|';
538
+ }
539
+ pq_destroy(bt_pq_clone);
540
+
541
+ if (bptr[-1] == '<') {
542
+ bptr++; /* handle zero term case */
543
+ }
544
+ bptr[-1] = '>'; /* delete last '|' char */
545
+ bptr[ 0] = '\0';
546
+
547
+ if (self->boost != 1.0) {
548
+ *bptr = '^';
549
+ dbl_to_s(++bptr, self->boost);
550
+ }
551
+
552
+ return buffer;
553
+ }
554
+
555
+ static void multi_tq_destroy_i(Query *self)
556
+ {
557
+ free(MTQ(self)->field);
558
+ pq_destroy(MTQ(self)->boosted_terms);
559
+ q_destroy_i(self);
560
+ }
561
+
562
+ static ulong multi_tq_hash(Query *self)
563
+ {
564
+ int i;
565
+ ulong hash = str_hash(MTQ(self)->field);
566
+ PriorityQueue *boosted_terms = MTQ(self)->boosted_terms;
567
+ for (i = boosted_terms->size; i > 0; i--) {
568
+ BoostedTerm *bt = (BoostedTerm *)boosted_terms->heap[i];
569
+ hash ^= str_hash(bt->term) ^ float2int(bt->boost);
570
+ }
571
+ return hash;
572
+ }
573
+
574
+ static int multi_tq_eq(Query *self, Query *o)
575
+ {
576
+ int i;
577
+ PriorityQueue *boosted_terms1 = MTQ(self)->boosted_terms;
578
+ PriorityQueue *boosted_terms2 = MTQ(o)->boosted_terms;
579
+
580
+ if (strcmp(MTQ(self)->field, MTQ(o)->field) != 0
581
+ || boosted_terms1->size != boosted_terms2->size) {
582
+ return false;
583
+ }
584
+ for (i = boosted_terms1->size; i > 0; i--) {
585
+ BoostedTerm *bt1 = (BoostedTerm *)boosted_terms1->heap[i];
586
+ BoostedTerm *bt2 = (BoostedTerm *)boosted_terms2->heap[i];
587
+ if ((strcmp(bt1->term, bt2->term) != 0) || (bt1->boost != bt2->boost)) {
588
+ return false;
589
+ }
590
+ }
591
+ return true;
592
+ }
593
+
594
+ static MatchVector *multi_tq_get_matchv_i(Query *self, MatchVector *mv,
595
+ TermVector *tv)
596
+ {
597
+ if (strcmp(tv->field, MTQ(self)->field) == 0) {
598
+ int i;
599
+ PriorityQueue *boosted_terms = MTQ(self)->boosted_terms;
600
+ for (i = boosted_terms->size; i > 0; i--) {
601
+ int j;
602
+ BoostedTerm *bt = (BoostedTerm *)boosted_terms->heap[i];
603
+ TVTerm *tv_term = tv_get_tv_term(tv, bt->term);
604
+ if (tv_term) {
605
+ for (j = 0; j < tv_term->freq; j++) {
606
+ int pos = tv_term->positions[j];
607
+ matchv_add(mv, pos, pos);
608
+ }
609
+ }
610
+ }
611
+ }
612
+ return mv;
613
+ }
614
+
615
+ Query *multi_tq_new_conf(const char *field, int max_terms, float min_boost)
616
+ {
617
+ Query *self;
618
+
619
+ if (max_terms <= 0) {
620
+ RAISE(ARG_ERROR, ":max_terms must be greater than or equal to zero. "
621
+ "%d < 0. ", max_terms);
622
+ }
623
+
624
+ self = q_new(MultiTermQuery);
625
+
626
+ MTQ(self)->field = estrdup(field);
627
+ MTQ(self)->boosted_terms = pq_new(max_terms,
628
+ (lt_ft)&boosted_term_less_than,
629
+ (free_ft)&boosted_term_destroy);
630
+ MTQ(self)->min_boost = min_boost;
631
+
632
+ self->type = MULTI_TERM_QUERY;
633
+ self->to_s = &multi_tq_to_s;
634
+ self->hash = &multi_tq_hash;
635
+ self->eq = &multi_tq_eq;
636
+ self->destroy_i = &multi_tq_destroy_i;
637
+ self->create_weight_i = &multi_tw_new;
638
+ self->get_matchv_i = &multi_tq_get_matchv_i;
639
+
640
+ return self;
641
+ }
642
+
643
+ Query *multi_tq_new(const char *field)
644
+ {
645
+ return multi_tq_new_conf(field, MULTI_TERM_QUERY_MAX_TERMS, 0.0);
646
+ }
647
+
648
+ void multi_tq_add_term_boost(Query *self, const char *term, float boost)
649
+ {
650
+ if (boost > MTQ(self)->min_boost) {
651
+ BoostedTerm *bt = boosted_term_new(term, boost);
652
+ PriorityQueue *bt_pq = MTQ(self)->boosted_terms;
653
+ pq_insert(bt_pq, bt);
654
+ if (pq_full(bt_pq)) {
655
+ MTQ(self)->min_boost = ((BoostedTerm *)pq_top(bt_pq))->boost;
656
+ }
657
+ }
658
+ }
659
+
660
+ void multi_tq_add_term(Query *self, const char *term)
661
+ {
662
+ multi_tq_add_term_boost(self, term, 1.0);
663
+ }