ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/q_boolean.c CHANGED
@@ -1,1390 +1,1587 @@
1
1
  #include <string.h>
2
2
  #include "search.h"
3
+ #include "array.h"
3
4
 
4
- static char * const INVALID_BC_ERROR_MSG = "Invalid value for BooleanClause Type";
5
- static char * const TOO_MANY_CLAUSES_ERROR_MSG = "Too many clauses";
6
- static char * const MIN_NUM_MATCHES_ERROR_MSG = "Minimum nr of matches must be positive";
7
- static char * const TWO_SUB_ERROR_MSG = "There must be at least 2 sub_scorers";
8
- static char * const UNKNOWN_OCCUR_VAL_ERROR_MSG = "Unknown value for occur";
5
+ #define BQ(query) ((BooleanQuery *)(query))
6
+ #define BW(weight) ((BooleanWeight *)(weight))
9
7
 
10
8
  /***************************************************************************
11
9
  *
12
- * BooleanWeight
10
+ * BooleanScorer
13
11
  *
14
12
  ***************************************************************************/
15
13
 
16
- float bw_sum_of_squared_weights(Weight *self)
14
+ /***************************************************************************
15
+ * Coordinator
16
+ ***************************************************************************/
17
+
18
+ typedef struct Coordinator
17
19
  {
18
- BooleanWeight *bw = (BooleanWeight *)self->data;
19
- BooleanQuery *bq = (BooleanQuery *)self->query->data;
20
- Weight *weight;
20
+ int max_coord;
21
+ float *coord_factors;
22
+ Similarity *similarity;
23
+ int num_matches;
24
+ } Coordinator;
21
25
 
22
- float sum = 0.0;
23
- int i;
26
+ static Coordinator *coord_new(Similarity *similarity)
27
+ {
28
+ Coordinator *self = ALLOC_AND_ZERO(Coordinator);
29
+ self->similarity = similarity;
30
+ return self;
31
+ }
24
32
 
25
- for (i = 0; i < bw->w_cnt; i++) {
26
- if (! bq->clauses[i]->is_prohibited) {
27
- weight = bw->weights[i];
28
- sum += weight->sum_of_squared_weights(weight); // sum sub-weights
29
- }
30
- }
33
+ static Coordinator *coord_init(Coordinator *self)
34
+ {
35
+ int i;
36
+ self->coord_factors = ALLOC_N(float, self->max_coord + 1);
31
37
 
32
- sum *= self->value * self->value; // boost each sub-weight
38
+ for (i = 0; i <= self->max_coord; i++) {
39
+ self->coord_factors[i]
40
+ = sim_coord(self->similarity, i, self->max_coord);
41
+ }
33
42
 
34
- return sum;
43
+ return self;
35
44
  }
36
45
 
37
- void bw_normalize(Weight *self, float normalization_factor)
46
+ /***************************************************************************
47
+ * DisjunctionSumScorer
48
+ ***************************************************************************/
49
+
50
+ #define DSSc(scorer) ((DisjunctionSumScorer *)(scorer))
51
+
52
+ typedef struct DisjunctionSumScorer
38
53
  {
39
- BooleanWeight *bw = (BooleanWeight *)self->data;
40
- BooleanQuery *bq = (BooleanQuery *)self->query->data;
41
- Weight *weight;
42
- int i;
43
- normalization_factor *= self->value; /* multiply by query boost */
54
+ Scorer super;
55
+ float cum_score;
56
+ int num_matches;
57
+ int min_num_matches;
58
+ Scorer **sub_scorers;
59
+ int ss_cnt;
60
+ PriorityQueue *scorer_queue;
61
+ Coordinator *coordinator;
62
+ } DisjunctionSumScorer;
44
63
 
45
- for (i = 0; i < bw->w_cnt; i++) {
46
- if (! bq->clauses[i]->is_prohibited) {
47
- weight = bw->weights[i];
48
- weight->normalize(weight, normalization_factor); // sum sub-weights
49
- }
50
- }
64
+ static float dssc_score(Scorer *self)
65
+ {
66
+ return DSSc(self)->cum_score;
51
67
  }
52
68
 
53
- Scorer *bw_scorer(Weight *self, IndexReader *ir)
69
+ static void dssc_init_scorer_queue(DisjunctionSumScorer *dssc)
54
70
  {
55
- Scorer *sub_scorer, *bsc = bsc_create(self->similarity);
56
- BooleanWeight *bw = (BooleanWeight *)self->data;
57
- BooleanQuery *bq = (BooleanQuery *)self->query->data;
58
- BooleanClause *clause;
59
- Weight *weight;
60
- int i;
61
-
62
- for (i = 0; i < bw->w_cnt; i++) {
63
- clause = bq->clauses[i];
64
- weight = bw->weights[i];
65
- sub_scorer = weight->scorer(weight, ir);
66
- if (sub_scorer) {
67
- bsc_add_scorer(bsc, sub_scorer, clause->occur);
68
- } else if (clause->is_required) {
69
- bsc->destroy(bsc);
70
- return NULL;
71
+ int i;
72
+ Scorer *sub_scorer;
73
+ PriorityQueue *pq = dssc->scorer_queue
74
+ = pq_new(dssc->ss_cnt, (lt_ft)&scorer_doc_less_than, NULL);
75
+
76
+ for (i = 0; i < dssc->ss_cnt; i++) {
77
+ sub_scorer = dssc->sub_scorers[i];
78
+ if (sub_scorer->next(sub_scorer)) {
79
+ pq_insert(pq, sub_scorer);
80
+ }
71
81
  }
72
- }
82
+ }
83
+
84
+ static bool dssc_advance_after_current(Scorer *self)
85
+ {
86
+ DisjunctionSumScorer *dssc = DSSc(self);
87
+ PriorityQueue *scorer_queue = dssc->scorer_queue;
88
+
89
+ /* repeat until minimum number of matches is found */
90
+ while (true) {
91
+ Scorer *top = (Scorer *)pq_top(scorer_queue);
92
+ self->doc = top->doc;
93
+ dssc->cum_score = top->score(top);
94
+ dssc->num_matches = 1;
95
+ /* Until all sub-scorers are after self->doc */
96
+ while (true) {
97
+ if (top->next(top)) {
98
+ pq_down(scorer_queue);
99
+ }
100
+ else {
101
+ pq_pop(scorer_queue);
102
+ if (scorer_queue->size
103
+ < (dssc->min_num_matches - dssc->num_matches)) {
104
+ /* Not enough subscorers left for a match on this
105
+ * document, also no more chance of any further match */
106
+ return false;
107
+ }
108
+ if (scorer_queue->size == 0) {
109
+ /* nothing more to advance, check for last match. */
110
+ break;
111
+ }
112
+ }
113
+ top = pq_top(scorer_queue);
114
+ if (top->doc != self->doc) {
115
+ /* All remaining subscorers are after self->doc */
116
+ break;
117
+ }
118
+ else {
119
+ dssc->cum_score += top->score(top);
120
+ dssc->num_matches++;
121
+ }
122
+ }
73
123
 
74
- return bsc;
124
+ if (dssc->num_matches >= dssc->min_num_matches) {
125
+ return true;
126
+ }
127
+ else if (scorer_queue->size < dssc->min_num_matches) {
128
+ return false;
129
+ }
130
+ }
75
131
  }
76
132
 
77
- char *bw_to_s(Weight *self)
133
+ static bool dssc_next(Scorer *self)
78
134
  {
79
- return strfmt("BooleanWeight(%f)", self->value);
135
+ if (DSSc(self)->scorer_queue == NULL) {
136
+ dssc_init_scorer_queue(DSSc(self));
137
+ }
138
+
139
+ if (DSSc(self)->scorer_queue->size < DSSc(self)->min_num_matches) {
140
+ return false;
141
+ }
142
+ else {
143
+ return dssc_advance_after_current(self);
144
+ }
80
145
  }
81
146
 
82
- void bw_destroy(Weight *self)
147
+ static bool dssc_skip_to(Scorer *self, int doc_num)
83
148
  {
84
- int i;
85
- BooleanWeight *bw = (BooleanWeight *)self->data;
149
+ DisjunctionSumScorer *dssc = DSSc(self);
150
+ PriorityQueue *scorer_queue = dssc->scorer_queue;
86
151
 
87
- for (i = 0; i < bw->w_cnt; i++) {
88
- bw->weights[i]->destroy(bw->weights[i]);
89
- }
152
+ if (scorer_queue == NULL) {
153
+ dssc_init_scorer_queue(dssc);
154
+ scorer_queue = dssc->scorer_queue;
155
+ }
90
156
 
91
- free(bw->weights);
92
- free(bw);
93
- w_destroy(self);
157
+ if (scorer_queue->size < dssc->min_num_matches) {
158
+ return false;
159
+ }
160
+ if (doc_num <= self->doc) {
161
+ doc_num = self->doc + 1;
162
+ }
163
+ while (true) {
164
+ Scorer *top = pq_top(scorer_queue);
165
+ if (top->doc >= doc_num) {
166
+ return dssc_advance_after_current(self);
167
+ }
168
+ else if (top->skip_to(top, doc_num)) {
169
+ pq_down(scorer_queue);
170
+ }
171
+ else {
172
+ pq_pop(scorer_queue);
173
+ if (scorer_queue->size < dssc->min_num_matches) {
174
+ return false;
175
+ }
176
+ }
177
+ }
94
178
  }
95
179
 
96
- Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
180
+ static Explanation *dssc_explain(Scorer *self, int doc_num)
97
181
  {
98
- BooleanWeight *bw = (BooleanWeight *)self->data;
99
- BooleanQuery *bq = (BooleanQuery *)self->query->data;
100
- Explanation *sum_expl = expl_create(0.0, estrdup("sum of:"));
101
- BooleanClause *clause;
102
- Weight *weight;
103
- Explanation *explanation;
104
- int coord = 0;
105
- int max_coord = 0;
106
- float coord_factor = 0.0;
107
- float sum = 0.0;
108
- int i;
182
+ int i;
183
+ DisjunctionSumScorer *dssc = DSSc(self);
184
+ Scorer *sub_scorer;
185
+ Explanation *e
186
+ = expl_new(0.0, "At least %d of:", dssc->min_num_matches);
187
+ for (i = 0; i < dssc->ss_cnt; i++) {
188
+ sub_scorer = dssc->sub_scorers[i];
189
+ expl_add_detail(e, sub_scorer->explain(sub_scorer, doc_num));
190
+ }
191
+ return e;
192
+ }
109
193
 
110
- for (i = 0; i < bw->w_cnt; i++) {
111
- weight = bw->weights[i];
112
- clause = bq->clauses[i];
113
- explanation = weight->explain(weight, ir, doc_num);
114
- if (!clause->is_prohibited) max_coord++;
115
- if (explanation->value > 0.0) {
116
- if (!clause->is_prohibited) {
117
- expl_add_detail(sum_expl, explanation);
118
- sum += explanation->value;
119
- coord++;
120
- } else {
121
- expl_destoy(explanation);
122
- expl_destoy(sum_expl);
123
- return expl_create(0.0, estrdup("match prohibited"));
124
- }
125
- } else if (clause->is_required) {
126
- expl_destoy(explanation);
127
- expl_destoy(sum_expl);
128
- return expl_create(0.0, estrdup("match required"));
129
- } else {
130
- expl_destoy(explanation);
194
+ static void dssc_destroy(Scorer *self)
195
+ {
196
+ DisjunctionSumScorer *dssc = DSSc(self);
197
+ int i;
198
+ for (i = 0; i < dssc->ss_cnt; i++) {
199
+ dssc->sub_scorers[i]->destroy(dssc->sub_scorers[i]);
131
200
  }
132
- }
133
- sum_expl->value = sum;
134
-
135
- if (coord == 1) { /* only one clause matched */
136
- explanation = sum_expl; /* eliminate wrapper */
137
- sum_expl->dcnt = 0;
138
- sum_expl = sum_expl->details[0];
139
- expl_destoy(explanation);
140
- }
141
-
142
- coord_factor = sim_coord(self->similarity, coord, max_coord);
143
-
144
- if (coord_factor == 1.0) { /* coord is no-op */
145
- return sum_expl; /* eliminate wrapper */
146
- } else {
147
- explanation = expl_create(sum * coord_factor, estrdup("product of:"));
148
- expl_add_detail(explanation, sum_expl);
149
- expl_add_detail(explanation, expl_create(coord_factor,
150
- strfmt("coord(%d/%d)", coord, max_coord)));
151
- return explanation;
152
- }
153
- }
154
-
155
- Weight *bw_create(Query *query, Searcher *searcher)
156
- {
157
- int i;
158
- Weight *self = w_create(query);
159
- BooleanWeight *bw = ALLOC(BooleanWeight);
160
- BooleanQuery *bq = (BooleanQuery *)query->data;
161
-
162
- bw->w_cnt = bq->clause_cnt;
163
- bw->weights = ALLOC_N(Weight *, bw->w_cnt);
164
- for (i = 0; i < bw->w_cnt; i++) {
165
- bw->weights[i] = q_weight(bq->clauses[i]->query, searcher);
166
- }
167
- self->data = bw;
168
-
169
- self->normalize = &bw_normalize;
170
- self->scorer = &bw_scorer;
171
- self->explain = &bw_explain;
172
- self->to_s = &bw_to_s;
173
- self->destroy = &bw_destroy;
174
- self->sum_of_squared_weights = &bw_sum_of_squared_weights;
175
-
176
- self->similarity = query->get_similarity(query, searcher);
177
- self->value = query->boost;
178
-
179
- return self;
201
+ if (dssc->scorer_queue) {
202
+ pq_destroy(dssc->scorer_queue);
203
+ }
204
+ scorer_destroy_i(self);
180
205
  }
181
206
 
182
- /***************************************************************************
183
- *
184
- * BooleanClause
185
- *
186
- ***************************************************************************/
207
+ static Scorer *disjunction_sum_scorer_new(Scorer **sub_scorers, int ss_cnt,
208
+ int min_num_matches)
209
+ {
210
+ Scorer *self = scorer_new(DisjunctionSumScorer, NULL);
211
+ DSSc(self)->ss_cnt = ss_cnt;
212
+
213
+ /* The document number of the current match */
214
+ self->doc = -1;
215
+ DSSc(self)->cum_score = -1.0;
216
+
217
+ /* The number of subscorers that provide the current match. */
218
+ DSSc(self)->num_matches = -1;
219
+ DSSc(self)->coordinator = NULL;
187
220
 
188
- void bc_set_occur(BooleanClause *self, unsigned int occur)
189
- {
190
- self->occur = occur;
191
- switch (occur) {
192
- case BC_SHOULD:
193
- self->is_prohibited = false;
194
- self->is_required = false;
195
- break;
196
- case BC_MUST:
197
- self->is_prohibited = false;
198
- self->is_required = true;
199
- break;
200
- case BC_MUST_NOT:
201
- self->is_prohibited = true;
202
- self->is_required = false;
203
- break;
204
- default:
205
- RAISE(ARG_ERROR, INVALID_BC_ERROR_MSG);
206
- }
221
+ #ifdef DEBUG
222
+ if (min_num_matches <= 0) {
223
+ RAISE(ARG_ERROR, "The min_num_matches value <%d> should not be less "
224
+ "than 0\n", min_num_matches);
225
+ }
226
+ if (ss_cnt <= 1) {
227
+ RAISE(ARG_ERROR, "There should be at least 2 sub_scorers in a "
228
+ "DiscjunctionSumScorer. <%d> is not enough", ss_cnt);
229
+ }
230
+ #endif
231
+
232
+ DSSc(self)->min_num_matches = min_num_matches;
233
+ DSSc(self)->sub_scorers = sub_scorers;
234
+ DSSc(self)->scorer_queue = NULL;
235
+
236
+ self->score = &dssc_score;
237
+ self->next = &dssc_next;
238
+ self->skip_to = &dssc_skip_to;
239
+ self->explain = &dssc_explain;
240
+ self->destroy = &dssc_destroy;
241
+
242
+ return self;
207
243
  }
208
244
 
209
- void bc_deref(BooleanClause *self)
245
+ static float cdssc_score(Scorer *self)
210
246
  {
211
- if (--self->ref_cnt <= 0) {
212
- q_deref(self->query);
213
- free(self);
214
- }
247
+ DSSc(self)->coordinator->num_matches += DSSc(self)->num_matches;
248
+ return DSSc(self)->cum_score;
215
249
  }
216
250
 
217
- uint bc_hash(BooleanClause *self)
251
+ static Scorer *counting_disjunction_sum_scorer_new(
252
+ Coordinator *coordinator, Scorer **sub_scorers, int ss_cnt,
253
+ int min_num_matches)
218
254
  {
219
- return ((q_hash(self->query) << 2) | self->occur);
255
+ Scorer *self = disjunction_sum_scorer_new(sub_scorers, ss_cnt,
256
+ min_num_matches);
257
+ DSSc(self)->coordinator = coordinator;
258
+ self->score = &cdssc_score;
259
+ return self;
220
260
  }
221
261
 
222
- int bc_eq(BooleanClause *self, BooleanClause *o)
262
+ /***************************************************************************
263
+ * ConjunctionScorer
264
+ ***************************************************************************/
265
+
266
+ #define CSc(scorer) ((ConjunctionScorer *)(scorer))
267
+
268
+ typedef struct ConjunctionScorer
223
269
  {
224
- return ((self->occur == o->occur) && q_eq(self->query, o->query));
225
- }
270
+ Scorer super;
271
+ bool first_time : 1;
272
+ bool more : 1;
273
+ float coord;
274
+ Scorer **sub_scorers;
275
+ int ss_cnt;
276
+ int first_idx;
277
+ Coordinator *coordinator;
278
+ int last_scored_doc;
279
+ } ConjunctionScorer;
226
280
 
227
- BooleanClause *bc_create(Query *query, unsigned int occur)
281
+ static void csc_sort_scorers(ConjunctionScorer *csc)
228
282
  {
229
- BooleanClause *self = ALLOC(BooleanClause);
230
- self->ref_cnt = 1;
231
- self->query = query;
232
- bc_set_occur(self, occur);
233
- return self;
283
+ qsort(csc->sub_scorers, csc->ss_cnt, sizeof(Scorer *), &scorer_doc_cmp);
284
+ csc->first_idx = 0;
234
285
  }
235
286
 
236
- /***************************************************************************
237
- *
238
- * BooleanQuery
239
- *
240
- ***************************************************************************/
287
+ static void csc_init(Scorer *self, bool init_scorers)
288
+ {
289
+ ConjunctionScorer *csc = CSc(self);
290
+ const int sub_sc_cnt = csc->ss_cnt;
241
291
 
242
- Query *bq_rewrite(Query *self, IndexReader *ir)
243
- {
244
- BooleanQuery *bq = (BooleanQuery *)self->data;
245
- BooleanClause *clause;
246
- Query *q, *rq;
247
- int i;
248
-
249
- if (bq->clause_cnt == 1) { // optimize 1-clause queries
250
- clause = bq->clauses[0];
251
- if (! clause->is_prohibited) { // just return clause
252
- q = clause->query->rewrite(clause->query, ir); // rewrite first
253
-
254
- if (self->boost != 1.0) {// incorporate boost
255
- /* original_boost is initialized to 0.0. If it has been set to
256
- * something else it means this query has already been boosted before
257
- * so boost from the original value */
258
- if ((q == clause->query) && bq->original_boost) {
259
- /* rewrite was no-op */
260
- q->boost = bq->original_boost * self->boost;
261
- } else {
262
- bq->original_boost = q->boost; /* save original boost */
263
- q->boost *= self->boost;
292
+ /* compute coord factor */
293
+ csc->coord = sim_coord(self->similarity, sub_sc_cnt, sub_sc_cnt);
294
+
295
+ csc->more = (sub_sc_cnt > 0);
296
+
297
+ if (init_scorers) {
298
+ int i;
299
+ /* move each scorer to its first entry */
300
+ for (i = 0; i < sub_sc_cnt; i++) {
301
+ Scorer *sub_scorer = csc->sub_scorers[i];
302
+ if (!csc->more) {
303
+ break;
304
+ }
305
+ csc->more = sub_scorer->next(sub_scorer);
264
306
  }
265
- }
266
-
267
- return q;
268
- }
269
- }
270
-
271
- /* replace each clause's query with its rewritten query */
272
- for (i = 0; i < bq->clause_cnt; i++) {
273
- clause = bq->clauses[i];
274
- rq = clause->query->rewrite(clause->query, ir);
275
- q_deref(clause->query);
276
- clause->query = rq;
277
- }
278
-
279
- self->ref_cnt++;
280
- return self;
281
- }
282
-
283
- void bq_extract_terms(Query *self, HashSet *terms)
284
- {
285
- BooleanQuery *bq = (BooleanQuery *)self->data;
286
- BooleanClause *clause;
287
- int i;
288
- for (i = 0; i < bq->clause_cnt; i++) {
289
- clause = bq->clauses[i];
290
- clause->query->extract_terms(clause->query, terms);
291
- }
292
- }
293
-
294
- char *bq_to_s(Query *self, char *field)
295
- {
296
- int i;
297
- BooleanQuery *bq = (BooleanQuery *)self->data;
298
- BooleanClause *clause;
299
- Query *sub_query;
300
- char *buffer;
301
- char *clause_str;
302
- int bp = 0;
303
- int size = QUERY_STRING_START_SIZE;
304
- int needed;
305
- int clause_len;
306
-
307
- buffer = ALLOC_N(char, size);
308
- if (self->boost != 1.0) {
309
- buffer[0] = '(';
310
- bp++;
311
- }
312
-
313
- for (i = 0; i < bq->clause_cnt; i++) {
314
- clause = bq->clauses[i];
315
- clause_str = clause->query->to_s(clause->query, field);
316
- clause_len = (int)strlen(clause_str);
317
- needed = clause_len + 5;
318
- while ((size - bp) < needed) {
319
- size *= 2;
320
- REALLOC_N(buffer, char, size);
321
- }
322
-
323
- if (i > 0) {
324
- buffer[bp++] = ' ';
325
- }
326
- if (clause->is_prohibited) {
327
- buffer[bp++] = '-';
328
- } else if (clause->is_required) {
329
- buffer[bp++] = '+';
330
- }
331
-
332
- sub_query = clause->query;
333
- if (sub_query->type == BOOLEAN_QUERY) { // wrap sub-bools in parens
334
- buffer[bp++] = '(';
335
- memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
336
- bp += clause_len;
337
- buffer[bp++] = ')';
338
- } else {
339
- memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
340
- bp += clause_len;
341
- }
342
- free(clause_str);
343
- }
344
-
345
- if (self->boost != 1.0) {
346
- char *boost_str = strfmt(")^%f", self->boost);
347
- int boost_len = (int)strlen(boost_str);
348
- REALLOC_N(buffer, char, bp + boost_len + 1);
349
- memcpy(buffer + bp, boost_str, sizeof(char) * boost_len);
350
- bp += boost_len;
351
- free(boost_str);
352
- }
353
- buffer[bp] = 0;
354
- return buffer;
307
+ if (csc->more) {
308
+ csc_sort_scorers(csc);
309
+ }
310
+ }
311
+
312
+ csc->first_time = false;
355
313
  }
356
314
 
357
- static void bq_destroy(Query *self)
315
+ static float csc_score(Scorer *self)
358
316
  {
359
- BooleanQuery *bq = (BooleanQuery *)self->data;
360
- int i;
361
- for (i = 0; i < bq->clause_cnt; i++) {
362
- bc_deref(bq->clauses[i]);
363
- }
364
- free(bq->clauses);
365
- if (bq->similarity) {
366
- bq->similarity->destroy(bq->similarity);
367
- }
368
- free(bq);
369
- q_destroy_i(self);
317
+ ConjunctionScorer *csc = CSc(self);
318
+ const int sub_sc_cnt = csc->ss_cnt;
319
+ float score = 0.0; /* sum scores */
320
+ int i;
321
+ for (i = 0; i < sub_sc_cnt; i++) {
322
+ Scorer *sub_scorer = csc->sub_scorers[i];
323
+ score += sub_scorer->score(sub_scorer);
324
+ }
325
+ score *= csc->coord;
326
+ return score;
327
+ }
328
+
329
+ static bool csc_do_next(Scorer *self)
330
+ {
331
+ ConjunctionScorer *csc = CSc(self);
332
+ const int sub_sc_cnt = csc->ss_cnt;
333
+ int first_idx = csc->first_idx;
334
+ Scorer *first_sc = csc->sub_scorers[first_idx];
335
+ Scorer *last_sc = csc->sub_scorers[PREV_NUM(first_idx, sub_sc_cnt)];
336
+
337
+ /* skip to doc with all clauses */
338
+ while (csc->more && (first_sc->doc < last_sc->doc)) {
339
+ /* skip first upto last */
340
+ csc->more = first_sc->skip_to(first_sc, last_sc->doc);
341
+ /* move first to last */
342
+ last_sc = first_sc;
343
+ first_idx = NEXT_NUM(first_idx, sub_sc_cnt);
344
+ first_sc = csc->sub_scorers[first_idx];
345
+ }
346
+ self->doc = first_sc->doc;
347
+ csc->first_idx = first_idx;
348
+ return csc->more;
370
349
  }
371
350
 
372
- float bq_coord_disabled(Similarity *sim, int overlap, int max_overlap)
351
+ static bool csc_next(Scorer *self)
373
352
  {
374
- return 1.0;
353
+ ConjunctionScorer *csc = CSc(self);
354
+ if (csc->first_time) {
355
+ csc_init(self, true);
356
+ }
357
+ else if (csc->more) {
358
+ /* trigger further scanning */
359
+ const int last_idx = PREV_NUM(csc->first_idx, csc->ss_cnt);
360
+ Scorer *sub_scorer = csc->sub_scorers[last_idx];
361
+ csc->more = sub_scorer->next(sub_scorer);
362
+ }
363
+ return csc_do_next(self);
375
364
  }
376
365
 
377
- Similarity *bq_get_similarity(Query *self, Searcher *searcher)
366
+ static bool csc_skip_to(Scorer *self, int doc_num)
378
367
  {
379
- BooleanQuery *bq = (BooleanQuery *)self->data;
380
- if (!bq->similarity) {
381
- Similarity *sim = q_get_similarity_i(self, searcher);
382
- bq->similarity = ALLOC(Similarity);
383
- memcpy(bq->similarity, sim, sizeof(Similarity));
384
- bq->similarity->coord = &bq_coord_disabled;
385
- bq->similarity->destroy = (void (*)(Similarity *))&free;
386
- }
368
+ ConjunctionScorer *csc = CSc(self);
369
+ const int sub_sc_cnt = csc->ss_cnt;
370
+ int i;
371
+ bool more = csc->more;
387
372
 
388
- return bq->similarity;
389
- }
373
+ if (csc->first_time) {
374
+ csc_init(self, true);
375
+ }
390
376
 
391
- static uint bq_hash(Query *self)
392
- {
393
- int i;
394
- uint hash = 0;
395
- BooleanQuery *bq = (BooleanQuery *)self->data;
396
- for (i = 0; i < bq->clause_cnt; i++) {
397
- hash ^= bc_hash(bq->clauses[i]);
398
- }
399
- return (hash << 1) | bq->coord_disabled;
377
+ for (i = 0; i < sub_sc_cnt; i++) {
378
+ if (!more) {
379
+ break;
380
+ }
381
+ else {
382
+ Scorer *sub_scorer = csc->sub_scorers[i];
383
+ more = sub_scorer->skip_to(sub_scorer, doc_num);
384
+ }
385
+ }
386
+ if (more) {
387
+ /* resort the scorers */
388
+ csc_sort_scorers(csc);
389
+ }
390
+
391
+ more = csc->more;
392
+ return csc_do_next(self);
400
393
  }
401
394
 
402
- static int bq_eq(Query *self, Query *o)
395
+ static void csc_destroy(Scorer *self)
403
396
  {
404
- int i;
405
- BooleanQuery *bq1 = (BooleanQuery *)self->data;
406
- BooleanQuery *bq2 = (BooleanQuery *)o->data;
407
- if ((bq1->coord_disabled != bq2->coord_disabled) ||
408
- (bq1->max_clause_cnt != bq1->max_clause_cnt) ||
409
- (bq1->clause_cnt != bq2->clause_cnt)) {
410
- return false;
411
- }
412
-
413
- for (i = 0; i < bq1->clause_cnt; i++) {
414
- if (!bc_eq(bq1->clauses[i], bq2->clauses[i])) {
415
- return false;
416
- }
417
- }
418
- return true;
419
- }
420
-
421
- Query *bq_create(bool coord_disabled)
422
- {
423
- Query *self = q_create();
424
- BooleanQuery *bq = ALLOC(BooleanQuery);
425
- bq->coord_disabled = coord_disabled;
426
- if (coord_disabled) {
427
- self->get_similarity = &bq_get_similarity;
428
- }
429
- bq->max_clause_cnt = DEFAULT_MAX_CLAUSE_COUNT;
430
- bq->clause_cnt = 0;
431
- bq->clause_capa = BOOLEAN_CLAUSES_START_CAPA;
432
- bq->clauses = ALLOC_N(BooleanClause *, BOOLEAN_CLAUSES_START_CAPA);
433
- bq->similarity = NULL;
434
- bq->original_boost = 0.0;
435
- self->data = bq;
436
-
437
- self->type = BOOLEAN_QUERY;
438
- self->rewrite = &bq_rewrite;
439
- self->extract_terms = &bq_extract_terms;
440
- self->to_s = &bq_to_s;
441
- self->hash = &bq_hash;
442
- self->eq = &bq_eq;
443
- self->destroy_i = &bq_destroy;
444
- self->create_weight_i = &bw_create;
445
- return self;
397
+ ConjunctionScorer *csc = CSc(self);
398
+ const int sub_sc_cnt = csc->ss_cnt;
399
+ int i;
400
+ for (i = 0; i < sub_sc_cnt; i++) {
401
+ csc->sub_scorers[i]->destroy(csc->sub_scorers[i]);
402
+ }
403
+ free(csc->sub_scorers);
404
+ scorer_destroy_i(self);
446
405
  }
447
406
 
448
- BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
407
+ static Scorer *conjunction_scorer_new(Similarity *similarity)
449
408
  {
450
- BooleanQuery *bq = (BooleanQuery *)self->data;
451
- if (!self->destroy_all) ref(bc);
452
- if (bq->clause_cnt >= bq->clause_capa) {
453
- bq->clause_capa *= 2;
454
- REALLOC_N(bq->clauses, BooleanClause *, bq->clause_capa);
455
- }
456
- if (bq->clause_cnt > bq->max_clause_cnt) {
457
- RAISE(STATE_ERROR, TOO_MANY_CLAUSES_ERROR_MSG);
458
- }
459
- bq->clauses[bq->clause_cnt] = bc;
460
- bq->clause_cnt++;
461
- return bc;
462
- }
463
-
464
- BooleanClause *bq_add_query(Query *self, Query *sub_query, unsigned int occur)
465
- {
466
- BooleanClause *bc = bc_create(sub_query, occur);
467
- bq_add_clause(self, bc);
468
- if (!self->destroy_all) {
469
- ref(sub_query);
470
- bc_deref(bc); /* bc would have been referenced unnecessarily */
471
- }
472
- return bc;
473
- }
409
+ Scorer *self = scorer_new(ConjunctionScorer, similarity);
474
410
 
475
- /***************************************************************************
476
- *
477
- * BooleanScorer
478
- *
479
- ***************************************************************************/
411
+ CSc(self)->first_time = true;
412
+ CSc(self)->more = true;
413
+ CSc(self)->coordinator = NULL;
480
414
 
481
- /***************************************************************************
482
- * Coordinator
483
- ***************************************************************************/
415
+ self->score = &csc_score;
416
+ self->next = &csc_next;
417
+ self->skip_to = &csc_skip_to;
418
+ self->destroy = &csc_destroy;
484
419
 
485
- Coordinator *coord_create(Similarity *similarity)
420
+ return self;
421
+ }
422
+
423
+ static float ccsc_score(Scorer *self)
486
424
  {
487
- Coordinator *self = ALLOC(Coordinator);
488
- ZEROSET(self, Coordinator, 1);
489
- self->similarity = similarity;
490
- return self;
425
+ ConjunctionScorer *csc = CSc(self);
426
+
427
+ int doc;
428
+ if ((doc = self->doc) > csc->last_scored_doc) {
429
+ csc->last_scored_doc = doc;
430
+ csc->coordinator->num_matches += csc->ss_cnt;
431
+ }
432
+
433
+ return csc_score(self);
491
434
  }
492
435
 
493
- Coordinator *coord_init(Coordinator *self)
436
+ static Scorer *counting_conjunction_sum_scorer_new(
437
+ Coordinator *coordinator, Scorer **sub_scorers, int ss_cnt)
494
438
  {
495
- int i;
496
- self->coord_factors = ALLOC_N(float, self->max_coord + 1);
439
+ Scorer *self = conjunction_scorer_new(sim_create_default());
440
+ ConjunctionScorer *csc = CSc(self);
441
+ csc->coordinator = coordinator;
442
+ csc->last_scored_doc = -1;
443
+ csc->sub_scorers = ALLOC_N(Scorer *, ss_cnt);
444
+ memcpy(csc->sub_scorers, sub_scorers, sizeof(Scorer *) * ss_cnt);
445
+ csc->ss_cnt = ss_cnt;
497
446
 
498
- for (i = 0; i <= self->max_coord; i++) {
499
- self->coord_factors[i] = sim_coord(self->similarity, i, self->max_coord);
500
- }
447
+ self->score = &ccsc_score;
501
448
 
502
- return self;
449
+ return self;
503
450
  }
504
451
 
505
452
  /***************************************************************************
506
- * DisjunctionSumScorer
453
+ * SingleMatchScorer
507
454
  ***************************************************************************/
508
455
 
509
- float dssc_score(Scorer *self)
456
+ #define SMSc(scorer) ((SingleMatchScorer *)(scorer))
457
+
458
+ typedef struct SingleMatchScorer
510
459
  {
511
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
512
- return dssc->cum_score;
513
- }
460
+ Scorer super;
461
+ Coordinator *coordinator;
462
+ Scorer *scorer;
463
+ } SingleMatchScorer;
464
+
514
465
 
515
- void dssc_init_scorer_queue(DisjunctionSumScorer *dssc)
466
+ static float smsc_score(Scorer *self)
516
467
  {
517
- int i;
518
- Scorer *sub_scorer;
519
- PriorityQueue *pq = dssc->scorer_queue =
520
- pq_create(dssc->ss_cnt, &scorer_doc_less_than);
468
+ SMSc(self)->coordinator->num_matches++;
469
+ return SMSc(self)->scorer->score(SMSc(self)->scorer);
470
+ }
521
471
 
522
- for (i = 0; i < dssc->ss_cnt; i++) {
523
- sub_scorer = dssc->sub_scorers[i];
524
- if (sub_scorer->next(sub_scorer)) {
525
- pq_insert(pq, sub_scorer);
472
+ static bool smsc_next(Scorer *self)
473
+ {
474
+ Scorer *scorer = SMSc(self)->scorer;
475
+ if (scorer->next(scorer)) {
476
+ self->doc = scorer->doc;
477
+ return true;
526
478
  }
527
- }
479
+ return false;
528
480
  }
529
481
 
530
- bool dssc_advance_after_current(Scorer *self)
482
+ static bool smsc_skip_to(Scorer *self, int doc_num)
531
483
  {
532
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
533
- PriorityQueue *scorer_queue = dssc->scorer_queue;
534
- Scorer *top;
535
- while (true) { // repeat until minimum nr of matches
536
- top = (Scorer *)pq_top(scorer_queue);
537
- self->doc = top->doc;
538
- dssc->cum_score = top->score(top);
539
- dssc->num_matches = 1;
540
- while (true) { // Until all subscorers are after self->hit.doc
541
- if (top->next(top)) {
542
- pq_down(scorer_queue);
543
- } else {
544
- pq_pop(scorer_queue);
545
- if (scorer_queue->count < (dssc->min_num_matches - dssc->num_matches)) {
546
- // Not enough subscorers left for a match on this document,
547
- // and also no more chance of any further match.
548
- return false;
549
- }
550
- if (scorer_queue->count == 0) {
551
- break; // nothing more to advance, check for last match.
552
- }
553
- }
554
- top = pq_top(scorer_queue);
555
- if (top->doc != self->doc) {
556
- break; // All remaining subscorers are after self->hit.doc.
557
- } else {
558
- dssc->cum_score += top->score(top);
559
- dssc->num_matches++;
560
- }
484
+ Scorer *scorer = SMSc(self)->scorer;
485
+ if (scorer->skip_to(scorer, doc_num)) {
486
+ self->doc = scorer->doc;
487
+ return true;
561
488
  }
489
+ return false;
490
+ }
562
491
 
563
- if (dssc->num_matches >= dssc->min_num_matches) {
564
- return true;
565
- } else if (scorer_queue->count < dssc->min_num_matches) {
566
- return false;
567
- }
568
- }
492
+ static Explanation *smsc_explain(Scorer *self, int doc_num)
493
+ {
494
+ Scorer *scorer = SMSc(self)->scorer;
495
+ return scorer->explain(scorer, doc_num);
569
496
  }
570
497
 
571
- bool dssc_next(Scorer *self)
498
+ static void smsc_destroy(Scorer *self)
572
499
  {
573
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
500
+ Scorer *scorer = SMSc(self)->scorer;
501
+ scorer->destroy(scorer);
502
+ scorer_destroy_i(self);
503
+ }
574
504
 
575
- if (dssc->scorer_queue == NULL) {
576
- dssc_init_scorer_queue(dssc);
577
- }
505
+ static Scorer *single_match_scorer_new(Coordinator *coordinator,
506
+ Scorer *scorer)
507
+ {
508
+ Scorer *self = scorer_new(SingleMatchScorer, scorer->similarity);
509
+ SMSc(self)->coordinator = coordinator;
510
+ SMSc(self)->scorer = scorer;
578
511
 
579
- if (dssc->scorer_queue->count < dssc->min_num_matches) {
580
- return false;
581
- } else {
582
- return dssc_advance_after_current(self);
583
- }
512
+ self->score = &smsc_score;
513
+ self->next = &smsc_next;
514
+ self->skip_to = &smsc_skip_to;
515
+ self->explain = &smsc_explain;
516
+ self->destroy = &smsc_destroy;
517
+ return self;
584
518
  }
585
519
 
586
- bool dssc_skip_to(Scorer *self, int doc_num)
520
+ /***************************************************************************
521
+ * ReqOptSumScorer
522
+ ***************************************************************************/
523
+
524
+ #define ROSSc(scorer) ((ReqOptSumScorer *)(scorer))
525
+
526
+ typedef struct ReqOptSumScorer
587
527
  {
588
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
589
- PriorityQueue *scorer_queue = dssc->scorer_queue;
590
- Scorer *top;
528
+ Scorer super;
529
+ Scorer *req_scorer;
530
+ Scorer *opt_scorer;
531
+ bool first_time_opt;
532
+ } ReqOptSumScorer;
591
533
 
592
- if (scorer_queue == NULL) {
593
- dssc_init_scorer_queue(dssc);
594
- scorer_queue = dssc->scorer_queue;
595
- }
534
+ static float rossc_score(Scorer *self)
535
+ {
536
+ ReqOptSumScorer *rossc = ROSSc(self);
537
+ Scorer *req_scorer = rossc->req_scorer;
538
+ Scorer *opt_scorer = rossc->opt_scorer;
539
+ int cur_doc = req_scorer->doc;
540
+ float req_score = req_scorer->score(req_scorer);
596
541
 
597
- if (scorer_queue->count < dssc->min_num_matches) {
598
- return false;
599
- }
600
- if (doc_num <= self->doc) {
601
- doc_num = self->doc + 1;
602
- }
603
- while (true) {
604
- top = pq_top(scorer_queue);
605
- if (top->doc >= doc_num) {
606
- return dssc_advance_after_current(self);
607
- } else if (top->skip_to(top, doc_num)) {
608
- pq_down(scorer_queue);
609
- } else {
610
- pq_pop(scorer_queue);
611
- if (scorer_queue->count < dssc->min_num_matches) {
612
- return false;
613
- }
542
+ if (rossc->first_time_opt) {
543
+ rossc->first_time_opt = false;
544
+ if (! opt_scorer->skip_to(opt_scorer, cur_doc)) {
545
+ SCORER_NULLIFY(rossc->opt_scorer);
546
+ return req_score;
547
+ }
548
+ }
549
+ else if (opt_scorer == NULL) {
550
+ return req_score;
614
551
  }
615
- }
552
+ else if ((opt_scorer->doc < cur_doc)
553
+ && ! opt_scorer->skip_to(opt_scorer, cur_doc)) {
554
+ SCORER_NULLIFY(rossc->opt_scorer);
555
+ return req_score;
556
+ }
557
+ /* assert (@opt_scorer != nil) and (@opt_scorer.doc() >= cur_doc) */
558
+ return (opt_scorer->doc == cur_doc)
559
+ ? req_score + opt_scorer->score(opt_scorer)
560
+ : req_score;
616
561
  }
617
562
 
618
- Explanation *dssc_explain(Scorer *self, int doc_num)
563
+ static bool rossc_next(Scorer *self)
619
564
  {
620
- int i;
621
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
622
- Scorer *sub_scorer;
623
- Explanation *e = expl_create(0.0,
624
- strfmt("At least %d of:", dssc->min_num_matches));
625
- for (i = 0; i < dssc->ss_cnt; i++) {
626
- sub_scorer = dssc->sub_scorers[i];
627
- expl_add_detail(e, sub_scorer->explain(sub_scorer, doc_num));
628
- }
629
- return e;
565
+ Scorer *req_scorer = ROSSc(self)->req_scorer;
566
+ if (req_scorer->next(req_scorer)) {
567
+ self->doc = req_scorer->doc;
568
+ return true;
569
+ }
570
+ return false;
630
571
  }
631
572
 
632
- void dssc_destroy(Scorer *self)
573
+ static bool rossc_skip_to(Scorer *self, int doc_num)
633
574
  {
634
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
635
- int i;
636
- for (i = 0; i < dssc->ss_cnt; i++) {
637
- dssc->sub_scorers[i]->destroy(dssc->sub_scorers[i]);
638
- }
639
- if (dssc->scorer_queue) pq_destroy(dssc->scorer_queue);
640
- scorer_destroy_i(self);
575
+ Scorer *req_scorer = ROSSc(self)->req_scorer;
576
+ if (req_scorer->skip_to(req_scorer, doc_num)) {
577
+ self->doc = req_scorer->doc;
578
+ return true;
579
+ }
580
+ return false;
641
581
  }
642
582
 
643
- Scorer *disjunction_sum_scorer_create(Scorer **sub_scorers, int ss_cnt,
644
- int min_num_matches)
583
+ static Explanation *rossc_explain(Scorer *self, int doc_num)
645
584
  {
646
- Scorer *self = scorer_create(NULL);
647
- DisjunctionSumScorer *dssc = ALLOC(DisjunctionSumScorer);
648
- self->data = dssc;
649
- dssc->ss_cnt = ss_cnt;
650
-
651
- // The document number of the current match.
652
- self->doc = -1;
653
- dssc->cum_score = -1.0;
585
+ Scorer *req_scorer = ROSSc(self)->req_scorer;
586
+ Scorer *opt_scorer = ROSSc(self)->opt_scorer;
654
587
 
655
- // The number of subscorers that provide the current match.
656
- dssc->num_matches = -1;
657
- dssc->coordinator = NULL;
658
-
659
- if (min_num_matches <= 0) {
660
- RAISE(ARG_ERROR, MIN_NUM_MATCHES_ERROR_MSG);
661
- }
662
- if (ss_cnt <= 1) {
663
- RAISE(ARG_ERROR, TWO_SUB_ERROR_MSG);
664
- }
665
-
666
- dssc->min_num_matches = min_num_matches;
667
- dssc->sub_scorers = sub_scorers;
668
-
669
- dssc->scorer_queue = NULL;
670
-
671
- self->score = &dssc_score;
672
- self->next = &dssc_next;
673
- self->skip_to = &dssc_skip_to;
674
- self->explain = &dssc_explain;
675
- self->destroy = &dssc_destroy;
676
-
677
- return self;
588
+ Explanation *e = expl_new(self->score(self),"required, optional:");
589
+ expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
590
+ expl_add_detail(e, opt_scorer->explain(opt_scorer, doc_num));
591
+ return e;
678
592
  }
679
593
 
680
- float cdssc_score(Scorer *self)
594
+ static void rossc_destroy(Scorer *self)
681
595
  {
682
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
683
- dssc->coordinator->num_matches += dssc->num_matches;
684
- return dssc->cum_score;
596
+ ReqOptSumScorer *rossc = ROSSc(self);
597
+ if (rossc->req_scorer) {
598
+ rossc->req_scorer->destroy(rossc->req_scorer);
599
+ }
600
+ if (rossc->opt_scorer) {
601
+ rossc->opt_scorer->destroy(rossc->opt_scorer);
602
+ }
603
+ scorer_destroy_i(self);
685
604
  }
686
605
 
687
- Scorer *counting_disjunction_sum_scorer_create(Coordinator *coordinator,
688
- Scorer **sub_scorers, int ss_cnt, int min_num_matches)
606
+
607
+ static Scorer *req_opt_sum_scorer_new(Scorer *req_scorer, Scorer *opt_scorer)
689
608
  {
690
- Scorer *self = disjunction_sum_scorer_create(
691
- sub_scorers, ss_cnt, min_num_matches);
692
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
693
- dssc->coordinator = coordinator;
694
- self->score = &cdssc_score;
695
- return self;
609
+ Scorer *self = scorer_new(ReqOptSumScorer, NULL);
610
+
611
+ ROSSc(self)->req_scorer = req_scorer;
612
+ ROSSc(self)->opt_scorer = opt_scorer;
613
+ ROSSc(self)->first_time_opt = true;
614
+
615
+ self->score = &rossc_score;
616
+ self->next = &rossc_next;
617
+ self->skip_to = &rossc_skip_to;
618
+ self->explain = &rossc_explain;
619
+ self->destroy = &rossc_destroy;
620
+
621
+ return self;
696
622
  }
697
623
 
698
624
  /***************************************************************************
699
- * ConjunctionScorer
625
+ * ReqExclScorer
700
626
  ***************************************************************************/
701
627
 
702
- void csc_sort_scorers(ConjunctionScorer *csc)
628
+ #define RXSc(scorer) ((ReqExclScorer *)(scorer))
629
+ typedef struct ReqExclScorer
703
630
  {
704
- qsort(csc->sub_scorers, csc->ss_cnt, sizeof(Scorer *), &scorer_doc_cmp);
705
- csc->first = 0;
706
- csc->last = csc->ss_cnt - 1;
707
- }
631
+ Scorer super;
632
+ Scorer *req_scorer;
633
+ Scorer *excl_scorer;
634
+ bool first_time;
635
+ } ReqExclScorer;
708
636
 
709
- void csc_init(Scorer *self, bool init_scorers)
637
+ static bool rxsc_to_non_excluded(Scorer *self)
710
638
  {
711
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
712
- Scorer *sub_scorer;
713
- int i;
639
+ Scorer *req_scorer = RXSc(self)->req_scorer;
640
+ Scorer *excl_scorer = RXSc(self)->excl_scorer;
641
+ int excl_doc = excl_scorer->doc, req_doc;
714
642
 
715
- /* compute coord factor */
716
- csc->coord = sim_coord(self->similarity, csc->ss_cnt, csc->ss_cnt);
717
-
718
- csc->more = (csc->ss_cnt > 0);
643
+ do {
644
+ /* may be excluded */
645
+ req_doc = req_scorer->doc;
646
+ if (req_doc < excl_doc) {
647
+ /* req_scorer advanced to before excl_scorer, ie. not excluded */
648
+ self->doc = req_doc;
649
+ return true;
650
+ }
651
+ else if (req_doc > excl_doc) {
652
+ if (! excl_scorer->skip_to(excl_scorer, req_doc)) {
653
+ /* emptied, no more exclusions */
654
+ SCORER_NULLIFY(RXSc(self)->excl_scorer);
655
+ self->doc = req_doc;
656
+ return true;
657
+ }
658
+ excl_doc = excl_scorer->doc;
659
+ if (excl_doc > req_doc) {
660
+ self->doc = req_doc;
661
+ return true; /* not excluded */
662
+ }
663
+ }
664
+ } while (req_scorer->next(req_scorer));
665
+ /* emptied, nothing left */
666
+ SCORER_NULLIFY(RXSc(self)->req_scorer);
667
+ return false;
668
+ }
719
669
 
720
- if (init_scorers) {
721
- // move each scorer to its first entry
670
+ static bool rxsc_next(Scorer *self)
671
+ {
672
+ ReqExclScorer *rxsc = RXSc(self);
673
+ Scorer *req_scorer = rxsc->req_scorer;
674
+ Scorer *excl_scorer = rxsc->excl_scorer;
722
675
 
723
- for (i = 0; i < csc->ss_cnt; i++) {
724
- sub_scorer = csc->sub_scorers[i];
725
- if (!csc->more) break;
726
- csc->more = sub_scorer->next(sub_scorer);
676
+ if (rxsc->first_time) {
677
+ if (! excl_scorer->next(excl_scorer)) {
678
+ /* emptied at start */
679
+ SCORER_NULLIFY(rxsc->excl_scorer);
680
+ excl_scorer = NULL;
681
+ }
682
+ rxsc->first_time = false;
683
+ }
684
+ if (req_scorer == NULL) {
685
+ return false;
727
686
  }
728
- if (csc->more) csc_sort_scorers(csc);
729
- }
687
+ if (! req_scorer->next(req_scorer)) {
688
+ /* emptied, nothing left */
689
+ SCORER_NULLIFY(rxsc->req_scorer);
690
+ return false;
691
+ }
692
+ if (excl_scorer == NULL) {
693
+ self->doc = req_scorer->doc;
694
+ /* req_scorer->next() already returned true */
695
+ return true;
696
+ }
697
+ return rxsc_to_non_excluded(self);
698
+ }
699
+
700
+ static bool rxsc_skip_to(Scorer *self, int doc_num)
701
+ {
702
+ ReqExclScorer *rxsc = RXSc(self);
703
+ Scorer *req_scorer = rxsc->req_scorer;
704
+ Scorer *excl_scorer = rxsc->excl_scorer;
730
705
 
731
- csc->first_time = false;
706
+ if (rxsc->first_time) {
707
+ rxsc->first_time = false;
708
+ if (! excl_scorer->skip_to(excl_scorer, doc_num)) {
709
+ /* emptied */
710
+ SCORER_NULLIFY(rxsc->excl_scorer);
711
+ excl_scorer = NULL;
712
+ }
713
+ }
714
+ if (req_scorer == NULL) {
715
+ return false;
716
+ }
717
+ if (excl_scorer == NULL) {
718
+ if (req_scorer->skip_to(req_scorer, doc_num)) {
719
+ self->doc = req_scorer->doc;
720
+ return true;
721
+ }
722
+ return false;
723
+ }
724
+ if (! req_scorer->skip_to(req_scorer, doc_num)) {
725
+ SCORER_NULLIFY(rxsc->req_scorer);
726
+ return false;
727
+ }
728
+ return rxsc_to_non_excluded(self);
732
729
  }
733
730
 
734
- float csc_score(Scorer *self)
731
+ static float rxsc_score(Scorer *self)
735
732
  {
736
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
737
- Scorer *sub_scorer;
738
- float score = 0.0; // sum scores
739
- int i;
740
- for (i = 0; i < csc->ss_cnt; i++) {
741
- sub_scorer = csc->sub_scorers[i];
742
- score += sub_scorer->score(sub_scorer);
743
- }
744
- score *= csc->coord;
745
- return score;
733
+ Scorer *req_scorer = RXSc(self)->req_scorer;
734
+ return req_scorer->score(req_scorer);
746
735
  }
747
736
 
748
- bool csc_do_next(Scorer *self)
737
+ static Explanation *rxsc_explain(Scorer *self, int doc_num)
749
738
  {
750
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
751
- Scorer *first = csc->sub_scorers[csc->first];
752
- Scorer *last = csc->sub_scorers[csc->last];
739
+ ReqExclScorer *rxsc = RXSc(self);
740
+ Scorer *req_scorer = rxsc->req_scorer;
741
+ Scorer *excl_scorer = rxsc->excl_scorer;
742
+ Explanation *e;
753
743
 
754
- // find doc w/ all clauses
755
- while (csc->more && (first->doc < last->doc)) {
756
- csc->more = first->skip_to(first, last->doc); // skip first upto last
757
- // move first to last
758
- csc->last = csc->first;
759
- last = first;
760
- csc->first = (csc->first + 1) % csc->ss_cnt;
761
- first = csc->sub_scorers[csc->first];
762
- }
763
- self->doc = first->doc;
764
- return csc->more;
744
+ if (excl_scorer->skip_to(excl_scorer, doc_num)
745
+ && excl_scorer->doc == doc_num) {
746
+ e = expl_new(0.0, "excluded:");
747
+ }
748
+ else {
749
+ e = expl_new(0.0, "not excluded:");
750
+ expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
751
+ }
752
+ return e;
765
753
  }
766
754
 
767
- bool csc_next(Scorer *self)
755
+ static void rxsc_destroy(Scorer *self)
768
756
  {
769
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
770
- Scorer *sub_scorer;
771
- if (csc->first_time) {
772
- csc_init(self, true);
773
- } else if (csc->more) {
774
- sub_scorer = csc->sub_scorers[csc->last];
775
- csc->more = sub_scorer->next(sub_scorer); // trigger further scanning
776
- }
777
- return csc_do_next(self);
757
+ ReqExclScorer *rxsc = RXSc(self);
758
+ if (rxsc->req_scorer) {
759
+ rxsc->req_scorer->destroy(rxsc->req_scorer);
760
+ }
761
+ if (rxsc->excl_scorer) {
762
+ rxsc->excl_scorer->destroy(rxsc->excl_scorer);
763
+ }
764
+ scorer_destroy_i(self);
778
765
  }
779
766
 
780
- bool csc_skip_to(Scorer *self, int doc_num)
767
+ static Scorer *req_excl_scorer_new(Scorer *req_scorer, Scorer *excl_scorer)
781
768
  {
782
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
783
- Scorer *sub_scorer;
784
- int i;
769
+ Scorer *self = scorer_new(ReqExclScorer, NULL);
770
+ RXSc(self)->req_scorer = req_scorer;
771
+ RXSc(self)->excl_scorer = excl_scorer;
772
+ RXSc(self)->first_time = true;
785
773
 
786
- if (csc->first_time) {
787
- csc_init(self, true);
788
- }
774
+ self->score = &rxsc_score;
775
+ self->next = &rxsc_next;
776
+ self->skip_to = &rxsc_skip_to;
777
+ self->explain = &rxsc_explain;
778
+ self->destroy = &rxsc_destroy;
789
779
 
790
- for (i = 0; i < csc->ss_cnt; i++) {
791
- if (!csc->more) break;
792
- sub_scorer = csc->sub_scorers[i];
793
- csc->more = sub_scorer->skip_to(sub_scorer, doc_num);
794
- }
795
- if (csc->more) csc_sort_scorers(csc); // resort the scorers
796
-
797
- return csc_do_next(self);
780
+ return self;
798
781
  }
799
782
 
800
- void csc_destroy(Scorer *self)
783
+ /***************************************************************************
784
+ * NonMatchScorer
785
+ ***************************************************************************/
786
+
787
+ static float nmsc_score(Scorer *self)
801
788
  {
802
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
803
- int i;
804
- for (i = 0; i < csc->ss_cnt; i++) {
805
- csc->sub_scorers[i]->destroy(csc->sub_scorers[i]);
806
- }
807
- free(csc->sub_scorers);
808
- scorer_destroy_i(self);
789
+ (void)self;
790
+ return 0.0;
809
791
  }
810
792
 
811
- Scorer *conjunction_scorer_create(Similarity *similarity)
793
+ static bool nmsc_next(Scorer *self)
812
794
  {
813
- Scorer *self = scorer_create(similarity);
814
- ConjunctionScorer *csc = ALLOC(ConjunctionScorer);
815
- ZEROSET(csc, ConjunctionScorer, 1);
816
- self->data = csc;
817
- csc->first_time = true;
818
- csc->more = true;
819
- csc->coordinator = NULL;
820
-
821
- self->score = &csc_score;
822
- self->next = &csc_next;
823
- self->skip_to = &csc_skip_to;
824
- self->destroy = &csc_destroy;
825
-
826
- return self;
795
+ (void)self;
796
+ return false;
827
797
  }
828
798
 
829
- float ccsc_score(Scorer *self)
799
+ static bool nmsc_skip_to(Scorer *self, int doc_num)
830
800
  {
831
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
832
-
833
- int doc;
834
- if ((doc = self->doc) > csc->last_scored_doc) {
835
- csc->last_scored_doc = doc;
836
- csc->coordinator->num_matches += csc->ss_cnt;
837
- }
838
-
839
- return csc_score(self);
801
+ (void)self; (void)doc_num;
802
+ return false;
840
803
  }
841
804
 
842
- Scorer *counting_conjunction_sum_scorer_create(Coordinator *coordinator,
843
- Scorer **sub_scorers, int ss_cnt)
805
+ static Explanation *nmsc_explain(Scorer *self, int doc_num)
844
806
  {
845
- Scorer *self = conjunction_scorer_create(sim_create_default());
846
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
847
- csc->coordinator = coordinator;
848
- csc->last_scored_doc = -1;
849
- csc->sub_scorers = ALLOC_N(Scorer *, ss_cnt);
850
- memcpy(csc->sub_scorers, sub_scorers, sizeof(Scorer *) * ss_cnt);
851
- csc->ss_capa = csc->ss_cnt = ss_cnt;
807
+ (void)self; (void)doc_num;
808
+ return expl_new(0.0, "No documents matched");
809
+ }
852
810
 
853
- self->score = &ccsc_score;
811
+ static Scorer *non_matching_scorer_new()
812
+ {
813
+ Scorer *self = scorer_new(Scorer, NULL);
814
+ self->score = &nmsc_score;
815
+ self->next = &nmsc_next;
816
+ self->skip_to = &nmsc_skip_to;
817
+ self->explain = &nmsc_explain;
854
818
 
855
- return self;
819
+ return self;
856
820
  }
857
821
 
858
822
  /***************************************************************************
859
- * SingleMatchScorer
823
+ * BooleanScorer
860
824
  ***************************************************************************/
861
825
 
862
- float smsc_score(Scorer *self)
826
+ #define BSc(scorer) ((BooleanScorer *)(scorer))
827
+ typedef struct BooleanScorer
828
+ {
829
+ Scorer super;
830
+ Scorer **required_scorers;
831
+ int rs_cnt;
832
+ int rs_capa;
833
+ Scorer **optional_scorers;
834
+ int os_cnt;
835
+ int os_capa;
836
+ Scorer **prohibited_scorers;
837
+ int ps_cnt;
838
+ int ps_capa;
839
+ Scorer *counting_sum_scorer;
840
+ Coordinator *coordinator;
841
+ } BooleanScorer;
842
+
843
+ static Scorer *counting_sum_scorer_create3(BooleanScorer *bsc,
844
+ Scorer *req_scorer,
845
+ Scorer *opt_scorer)
863
846
  {
864
- SingleMatchScorer *smsc = (SingleMatchScorer *)self->data;
865
- smsc->coordinator->num_matches++;
866
- return smsc->scorer->score(smsc->scorer);
847
+ if (bsc->ps_cnt == 0) {
848
+ /* no prohibited */
849
+ return req_opt_sum_scorer_new(req_scorer, opt_scorer);
850
+ }
851
+ else if (bsc->ps_cnt == 1) {
852
+ /* 1 prohibited */
853
+ return req_opt_sum_scorer_new(
854
+ req_excl_scorer_new(req_scorer, bsc->prohibited_scorers[0]),
855
+ opt_scorer);
856
+ }
857
+ else {
858
+ /* more prohibited */
859
+ return req_opt_sum_scorer_new(
860
+ req_excl_scorer_new(
861
+ req_scorer,
862
+ disjunction_sum_scorer_new(bsc->prohibited_scorers,
863
+ bsc->ps_cnt, 1)),
864
+ opt_scorer);
865
+ }
867
866
  }
868
867
 
869
- bool smsc_next(Scorer *self)
868
+ static Scorer *counting_sum_scorer_create2(BooleanScorer *bsc,
869
+ Scorer *req_scorer,
870
+ Scorer **optional_scorers,
871
+ int os_cnt)
870
872
  {
871
- Scorer *scorer = ((SingleMatchScorer *)self->data)->scorer;
872
- if (scorer->next(scorer)) {
873
- self->doc = scorer->doc;
874
- return true;
875
- }
876
- return false;
873
+ if (os_cnt == 0) {
874
+ if (bsc->ps_cnt == 0) {
875
+ return req_scorer;
876
+ }
877
+ else if (bsc->ps_cnt == 1) {
878
+ return req_excl_scorer_new(req_scorer,
879
+ bsc->prohibited_scorers[0]);
880
+ }
881
+ else {
882
+ /* no optional, more than 1 prohibited */
883
+ return req_excl_scorer_new(
884
+ req_scorer,
885
+ disjunction_sum_scorer_new(bsc->prohibited_scorers,
886
+ bsc->ps_cnt, 1));
887
+ }
888
+ }
889
+ else if (os_cnt == 1) {
890
+ return counting_sum_scorer_create3(
891
+ bsc,
892
+ req_scorer,
893
+ single_match_scorer_new(bsc->coordinator, optional_scorers[0]));
894
+ }
895
+ else {
896
+ /* more optional */
897
+ return counting_sum_scorer_create3(
898
+ bsc,
899
+ req_scorer,
900
+ counting_disjunction_sum_scorer_new(bsc->coordinator,
901
+ optional_scorers, os_cnt, 1));
902
+ }
877
903
  }
878
904
 
879
- bool smsc_skip_to(Scorer *self, int doc_num)
905
+ static Scorer *counting_sum_scorer_create(BooleanScorer *bsc)
880
906
  {
881
- Scorer *scorer = ((SingleMatchScorer *)self->data)->scorer;
882
- if (scorer->skip_to(scorer, doc_num)) {
883
- self->doc = scorer->doc;
884
- return true;
885
- }
886
- return false;
907
+ if (bsc->rs_cnt == 0) {
908
+ if (bsc->os_cnt == 0) {
909
+ int i;
910
+ /* only prohibited scorers so return non_matching scorer */
911
+ for (i = 0; i < bsc->ps_cnt; i++) {
912
+ bsc->prohibited_scorers[i]->destroy(
913
+ bsc->prohibited_scorers[i]);
914
+ }
915
+ return non_matching_scorer_new();
916
+ }
917
+ else if (bsc->os_cnt == 1) {
918
+ /* the only optional scorer is required */
919
+ return counting_sum_scorer_create2(
920
+ bsc,
921
+ single_match_scorer_new(bsc->coordinator,
922
+ bsc->optional_scorers[0]),
923
+ NULL, 0); /* no optional scorers left */
924
+ }
925
+ else {
926
+ /* more than 1 optional_scorers, no required scorers */
927
+ return counting_sum_scorer_create2(
928
+ bsc,
929
+ counting_disjunction_sum_scorer_new(bsc->coordinator,
930
+ bsc->optional_scorers,
931
+ bsc->os_cnt, 1),
932
+ NULL, 0); /* no optional scorers left */
933
+ }
934
+ }
935
+ else if (bsc->rs_cnt == 1) {
936
+ /* 1 required */
937
+ return counting_sum_scorer_create2(
938
+ bsc,
939
+ single_match_scorer_new(bsc->coordinator, bsc->required_scorers[0]),
940
+ bsc->optional_scorers, bsc->os_cnt);
941
+ }
942
+ else {
943
+ /* more required scorers */
944
+ return counting_sum_scorer_create2(
945
+ bsc,
946
+ counting_conjunction_sum_scorer_new(bsc->coordinator,
947
+ bsc->required_scorers,
948
+ bsc->rs_cnt),
949
+ bsc->optional_scorers, bsc->os_cnt);
950
+ }
887
951
  }
888
952
 
889
- Explanation *smsc_explain(Scorer *self, int doc_num)
953
+ static Scorer *bsc_init_counting_sum_scorer(BooleanScorer *bsc)
890
954
  {
891
- Scorer *scorer = ((SingleMatchScorer *)self->data)->scorer;
892
- return scorer->explain(scorer, doc_num);
955
+ coord_init(bsc->coordinator);
956
+ return bsc->counting_sum_scorer = counting_sum_scorer_create(bsc);
893
957
  }
894
958
 
895
- void smsc_destroy(Scorer *self)
959
+ static void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur)
896
960
  {
897
- Scorer *scorer = ((SingleMatchScorer *)self->data)->scorer;
898
- scorer->destroy(scorer);
899
- scorer_destroy_i(self);
961
+ BooleanScorer *bsc = BSc(self);
962
+ if (occur != BC_MUST_NOT) {
963
+ bsc->coordinator->max_coord++;
964
+ }
965
+
966
+ switch (occur) {
967
+ case BC_MUST:
968
+ RECAPA(bsc, rs_cnt, rs_capa, required_scorers, Scorer *);
969
+ bsc->required_scorers[bsc->rs_cnt++] = scorer;
970
+ break;
971
+ case BC_SHOULD:
972
+ RECAPA(bsc, os_cnt, os_capa, optional_scorers, Scorer *);
973
+ bsc->optional_scorers[bsc->os_cnt++] = scorer;
974
+ break;
975
+ case BC_MUST_NOT:
976
+ RECAPA(bsc, ps_cnt, ps_capa, prohibited_scorers, Scorer *);
977
+ bsc->prohibited_scorers[bsc->ps_cnt++] = scorer;
978
+ break;
979
+ default:
980
+ RAISE(ARG_ERROR, "Invalid value for :occur. Try :should, :must or "
981
+ ":must_not instead");
982
+ }
900
983
  }
901
984
 
902
- Scorer *single_match_scorer_create(Coordinator *coordinator, Scorer *scorer)
985
+ static float bsc_score(Scorer *self)
903
986
  {
904
- Scorer *self = scorer_create(scorer->similarity);
905
- SingleMatchScorer *smsc = ALLOC(SingleMatchScorer);
906
- smsc->coordinator = coordinator;
907
- smsc->scorer = scorer;
908
- self->data = smsc;
909
-
910
- self->score = &smsc_score;
911
- self->next = &smsc_next;
912
- self->skip_to = &smsc_skip_to;
913
- self->explain = &smsc_explain;
914
- self->destroy = &smsc_destroy;
915
- return self;
987
+ BooleanScorer *bsc = BSc(self);
988
+ Coordinator *coord = bsc->coordinator;
989
+ float sum;
990
+ coord->num_matches = 0;
991
+ sum = bsc->counting_sum_scorer->score(bsc->counting_sum_scorer);
992
+ return sum * coord->coord_factors[coord->num_matches];
916
993
  }
917
994
 
918
- /***************************************************************************
919
- * ReqOptSumScorer
920
- ***************************************************************************/
995
+ static bool bsc_next(Scorer *self)
996
+ {
997
+ Scorer *cnt_sum_sc = BSc(self)->counting_sum_scorer;
921
998
 
922
- float rossc_score(Scorer *self)
923
- {
924
- ReqOptSumScorer *rossc = (ReqOptSumScorer *)self->data;
925
- Scorer *req_scorer = rossc->req_scorer;
926
- Scorer *opt_scorer = rossc->opt_scorer;
927
- int cur_doc = req_scorer->doc;
928
- float req_score = req_scorer->score(req_scorer);
929
-
930
- if (rossc->first_time_opt) {
931
- rossc->first_time_opt = false;
932
- if (! opt_scorer->skip_to(opt_scorer, cur_doc)) {
933
- SCORER_NULLIFY(rossc->opt_scorer);
934
- return req_score;
935
- }
936
- } else if (opt_scorer == NULL) {
937
- return req_score;
938
- } else if ((opt_scorer->doc < cur_doc) &&
939
- ! opt_scorer->skip_to(opt_scorer, cur_doc)) {
940
- SCORER_NULLIFY(rossc->opt_scorer);
941
- return req_score;
942
- }
943
- // assert (@opt_scorer != nil) and (@opt_scorer.doc() >= cur_doc)
944
- return (opt_scorer->doc == cur_doc)
945
- ? req_score + opt_scorer->score(opt_scorer)
946
- : req_score;
947
- }
948
-
949
- bool rossc_next(Scorer *self)
950
- {
951
- Scorer *req_scorer = ((ReqOptSumScorer *)self->data)->req_scorer;
952
- if (req_scorer->next(req_scorer)) {
953
- self->doc = req_scorer->doc;
954
- return true;
955
- }
956
- return false;
999
+ if (!cnt_sum_sc) {
1000
+ cnt_sum_sc = bsc_init_counting_sum_scorer(BSc(self));
1001
+ }
1002
+ if (cnt_sum_sc->next(cnt_sum_sc)) {
1003
+ self->doc = cnt_sum_sc->doc;
1004
+ return true;
1005
+ }
1006
+ else {
1007
+ return false;
1008
+ }
957
1009
  }
958
1010
 
959
- bool rossc_skip_to(Scorer *self, int doc_num)
1011
+ static bool bsc_skip_to(Scorer *self, int doc_num)
960
1012
  {
961
- Scorer *req_scorer = ((ReqOptSumScorer *)self->data)->req_scorer;
962
- if (req_scorer->skip_to(req_scorer, doc_num)) {
963
- self->doc = req_scorer->doc;
964
- return true;
965
- }
966
- return false;
1013
+ Scorer *cnt_sum_sc = BSc(self)->counting_sum_scorer;
1014
+
1015
+ if (!BSc(self)->counting_sum_scorer) {
1016
+ cnt_sum_sc = bsc_init_counting_sum_scorer(BSc(self));
1017
+ }
1018
+ if (cnt_sum_sc->skip_to(cnt_sum_sc, doc_num)) {
1019
+ self->doc = cnt_sum_sc->doc;
1020
+ return true;
1021
+ }
1022
+ else {
1023
+ return false;
1024
+ }
967
1025
  }
968
1026
 
969
- Explanation *rossc_explain(Scorer *self, int doc_num)
1027
+ static void bsc_destroy(Scorer *self)
970
1028
  {
971
- ReqOptSumScorer *rossc = (ReqOptSumScorer *)self->data;
972
- Scorer *req_scorer = rossc->req_scorer;
973
- Scorer *opt_scorer = rossc->opt_scorer;
1029
+ BooleanScorer *bsc = BSc(self);
1030
+ Coordinator *coord = bsc->coordinator;
1031
+
1032
+ free(coord->coord_factors);
1033
+ free(coord);
1034
+
1035
+ if (bsc->counting_sum_scorer) {
1036
+ bsc->counting_sum_scorer->destroy(bsc->counting_sum_scorer);
1037
+ }
1038
+ else {
1039
+ int i;
1040
+ for (i = 0; i < bsc->rs_cnt; i++) {
1041
+ bsc->required_scorers[i]->destroy(bsc->required_scorers[i]);
1042
+ }
1043
+
1044
+ for (i = 0; i < bsc->os_cnt; i++) {
1045
+ bsc->optional_scorers[i]->destroy(bsc->optional_scorers[i]);
1046
+ }
974
1047
 
975
- Explanation *e = expl_create(self->score(self), estrdup("required, optional:"));
976
- expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
977
- expl_add_detail(e, opt_scorer->explain(opt_scorer, doc_num));
978
- return e;
1048
+ for (i = 0; i < bsc->ps_cnt; i++) {
1049
+ bsc->prohibited_scorers[i]->destroy(bsc->prohibited_scorers[i]);
1050
+ }
1051
+ }
1052
+ free(bsc->required_scorers);
1053
+ free(bsc->optional_scorers);
1054
+ free(bsc->prohibited_scorers);
1055
+ scorer_destroy_i(self);
979
1056
  }
980
1057
 
981
- void rossc_destroy(Scorer *self)
1058
+ static Explanation *bsc_explain(Scorer *self, int doc_num)
982
1059
  {
983
- ReqOptSumScorer *rossc = (ReqOptSumScorer *)self->data;
984
- if (rossc->req_scorer) rossc->req_scorer->destroy(rossc->req_scorer);
985
- if (rossc->opt_scorer) rossc->opt_scorer->destroy(rossc->opt_scorer);
986
- scorer_destroy_i(self);
1060
+ (void)self; (void)doc_num;
1061
+ return expl_new(0.0, "This explanation is not supported");
987
1062
  }
988
1063
 
989
-
990
- Scorer *req_opt_sum_scorer_create(Scorer *req_scorer, Scorer *opt_scorer)
1064
+ static Scorer *bsc_new(Similarity *similarity)
991
1065
  {
992
- Scorer *self = scorer_create(NULL);
993
- ReqOptSumScorer *rossc = ALLOC(ReqOptSumScorer);
994
- self->data = rossc;
995
- rossc->req_scorer = req_scorer;
996
- rossc->opt_scorer = opt_scorer;
997
- rossc->first_time_opt = true;
998
-
999
- self->score = &rossc_score;
1000
- self->next = &rossc_next;
1001
- self->skip_to = &rossc_skip_to;
1002
- self->explain = &rossc_explain;
1003
- self->destroy = &rossc_destroy;
1066
+ Scorer *self = scorer_new(BooleanScorer, similarity);
1067
+ BSc(self)->coordinator = coord_new(similarity);
1068
+ BSc(self)->counting_sum_scorer = NULL;
1004
1069
 
1005
- return self;
1070
+ self->score = &bsc_score;
1071
+ self->next = &bsc_next;
1072
+ self->skip_to = &bsc_skip_to;
1073
+ self->explain = &bsc_explain;
1074
+ self->destroy = &bsc_destroy;
1075
+ return self;
1006
1076
  }
1007
1077
 
1008
1078
  /***************************************************************************
1009
- * ReqExclScorer
1079
+ *
1080
+ * BooleanWeight
1081
+ *
1010
1082
  ***************************************************************************/
1011
1083
 
1012
- bool rxsc_to_non_excluded(Scorer *self)
1013
- {
1014
- ReqExclScorer *rxsc = (ReqExclScorer *)self->data;
1015
- Scorer *req_scorer = rxsc->req_scorer;
1016
- Scorer *excl_scorer = rxsc->excl_scorer;
1017
- int excl_doc = excl_scorer->doc, req_doc;
1018
-
1019
- do {
1020
- req_doc = req_scorer->doc; // may be excluded
1021
- if (req_doc < excl_doc) {
1022
- // req_scorer advanced to before excl_scorer, ie. not excluded
1023
- self->doc = req_doc;
1024
- return true;
1025
- } else if (req_doc > excl_doc) {
1026
- if (! excl_scorer->skip_to(excl_scorer, req_doc)) {
1027
- SCORER_NULLIFY(rxsc->excl_scorer); // exhausted, no more exclusions
1028
- self->doc = req_doc;
1029
- return true;
1030
- }
1031
- excl_doc = excl_scorer->doc;
1032
- if (excl_doc > req_doc) {
1033
- self->doc = req_doc;
1034
- return true; // not excluded
1035
- }
1084
+ typedef struct BooleanWeight
1085
+ {
1086
+ Weight w;
1087
+ Weight **weights;
1088
+ int w_cnt;
1089
+ } BooleanWeight;
1090
+
1091
+
1092
+ static float bw_sum_of_squared_weights(Weight *self)
1093
+ {
1094
+ BooleanQuery *bq = BQ(self->query);
1095
+ float sum = 0.0;
1096
+ int i;
1097
+
1098
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1099
+ if (! bq->clauses[i]->is_prohibited) {
1100
+ Weight *weight = BW(self)->weights[i];
1101
+ /* sum sub-weights */
1102
+ sum += weight->sum_of_squared_weights(weight);
1103
+ }
1036
1104
  }
1037
- } while (req_scorer->next(req_scorer));
1038
- SCORER_NULLIFY(rxsc->req_scorer); // exhausted, nothing left
1039
- return false;
1105
+
1106
+ /* boost each sub-weight */
1107
+ sum *= self->value * self->value;
1108
+ return sum;
1040
1109
  }
1041
1110
 
1042
- bool rxsc_next(Scorer *self)
1111
+ static void bw_normalize(Weight *self, float normalization_factor)
1043
1112
  {
1044
- ReqExclScorer *rxsc = (ReqExclScorer *)self->data;
1045
- Scorer *req_scorer = rxsc->req_scorer;
1046
- Scorer *excl_scorer = rxsc->excl_scorer;
1113
+ BooleanQuery *bq = BQ(self->query);
1114
+ int i;
1047
1115
 
1048
- if (rxsc->first_time) {
1049
- if (! excl_scorer->next(excl_scorer)) {
1050
- SCORER_NULLIFY(rxsc->excl_scorer); // exhausted at start
1051
- excl_scorer = NULL;
1116
+ normalization_factor *= self->value; /* multiply by query boost */
1117
+
1118
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1119
+ if (! bq->clauses[i]->is_prohibited) {
1120
+ Weight *weight = BW(self)->weights[i];
1121
+ /* sum sub-weights */
1122
+ weight->normalize(weight, normalization_factor);
1123
+ }
1052
1124
  }
1053
- rxsc->first_time = false;
1054
- }
1055
- if (req_scorer == NULL) {
1056
- return false;
1057
- }
1058
- if (! req_scorer->next(req_scorer)) {
1059
- SCORER_NULLIFY(rxsc->req_scorer); // exhausted, nothing left
1060
- return false;
1061
- }
1062
- if (excl_scorer == NULL) {
1063
- self->doc = req_scorer->doc;
1064
- return true; // req_scorer->next() already returned true
1065
- }
1066
- return rxsc_to_non_excluded(self);
1067
1125
  }
1068
1126
 
1069
- bool rxsc_skip_to(Scorer *self, int doc_num)
1127
+ static Scorer *bw_scorer(Weight *self, IndexReader *ir)
1070
1128
  {
1071
- ReqExclScorer *rxsc = (ReqExclScorer *)self->data;
1072
- Scorer *req_scorer = rxsc->req_scorer;
1073
- Scorer *excl_scorer = rxsc->excl_scorer;
1129
+ Scorer *bsc = bsc_new(self->similarity);
1130
+ BooleanQuery *bq = BQ(self->query);
1131
+ int i;
1074
1132
 
1075
- if (rxsc->first_time) {
1076
- rxsc->first_time = false;
1077
- if (! excl_scorer->skip_to(excl_scorer, doc_num)) {
1078
- SCORER_NULLIFY(rxsc->excl_scorer); // exhausted
1079
- excl_scorer = NULL;
1080
- }
1081
- }
1082
- if (req_scorer == NULL) {
1083
- return false;
1084
- }
1085
- if (excl_scorer == NULL) {
1086
- if (req_scorer->skip_to(req_scorer, doc_num)) {
1087
- self->doc = req_scorer->doc;
1088
- return true;
1133
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1134
+ BooleanClause *clause = bq->clauses[i];
1135
+ Weight *weight = BW(self)->weights[i];
1136
+ Scorer *sub_scorer = weight->scorer(weight, ir);
1137
+ if (sub_scorer) {
1138
+ bsc_add_scorer(bsc, sub_scorer, clause->occur);
1139
+ }
1140
+ else if (clause->is_required) {
1141
+ bsc->destroy(bsc);
1142
+ return NULL;
1143
+ }
1089
1144
  }
1090
- return false;
1091
- }
1092
- if (! req_scorer->skip_to(req_scorer, doc_num)) {
1093
- SCORER_NULLIFY(rxsc->req_scorer);
1094
- return false;
1095
- }
1096
- return rxsc_to_non_excluded(self);
1145
+
1146
+ return bsc;
1097
1147
  }
1098
1148
 
1099
- float rxsc_score(Scorer *self)
1149
+ static char *bw_to_s(Weight *self)
1100
1150
  {
1101
- Scorer *req_scorer = ((ReqExclScorer *)self->data)->req_scorer;
1102
- return req_scorer->score(req_scorer);
1151
+ return strfmt("BooleanWeight(%f)", self->value);
1103
1152
  }
1104
1153
 
1105
- Explanation *rxsc_explain(Scorer *self, int doc_num)
1154
+ static void bw_destroy(Weight *self)
1106
1155
  {
1107
- ReqExclScorer *rxsc = (ReqExclScorer *)self->data;
1108
- Scorer *req_scorer = rxsc->req_scorer;
1109
- Scorer *excl_scorer = rxsc->excl_scorer;
1156
+ int i;
1110
1157
 
1111
- Explanation *e;
1112
- if (excl_scorer->skip_to(excl_scorer, doc_num) && excl_scorer->doc == doc_num) {
1113
- e = expl_create(0.0, estrdup("excluded:"));
1114
- } else {
1115
- e = expl_create(0.0, estrdup("not excluded:"));
1116
- expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
1117
- }
1118
- return e;
1158
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1159
+ BW(self)->weights[i]->destroy(BW(self)->weights[i]);
1160
+ }
1161
+
1162
+ free(BW(self)->weights);
1163
+ w_destroy(self);
1119
1164
  }
1120
1165
 
1121
- void rxsc_destroy(Scorer *self)
1166
+ static Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
1122
1167
  {
1123
- ReqExclScorer *rxsc = (ReqExclScorer *)self->data;
1124
- if (rxsc->req_scorer) rxsc->req_scorer->destroy(rxsc->req_scorer);
1125
- if (rxsc->excl_scorer) rxsc->excl_scorer->destroy(rxsc->excl_scorer);
1126
- scorer_destroy_i(self);
1168
+ BooleanQuery *bq = BQ(self->query);
1169
+ Explanation *sum_expl = expl_new(0.0, "sum of:");
1170
+ Explanation *explanation;
1171
+ int coord = 0;
1172
+ int max_coord = 0;
1173
+ float coord_factor = 0.0;
1174
+ float sum = 0.0;
1175
+ int i;
1176
+
1177
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1178
+ Weight *weight = BW(self)->weights[i];
1179
+ BooleanClause *clause = bq->clauses[i];
1180
+ explanation = weight->explain(weight, ir, doc_num);
1181
+ if (!clause->is_prohibited) {
1182
+ max_coord++;
1183
+ }
1184
+ if (explanation->value > 0.0) {
1185
+ if (!clause->is_prohibited) {
1186
+ expl_add_detail(sum_expl, explanation);
1187
+ sum += explanation->value;
1188
+ coord++;
1189
+ }
1190
+ else {
1191
+ expl_destroy(explanation);
1192
+ expl_destroy(sum_expl);
1193
+ return expl_new(0.0, "match prohibited");
1194
+ }
1195
+ }
1196
+ else if (clause->is_required) {
1197
+ expl_destroy(explanation);
1198
+ expl_destroy(sum_expl);
1199
+ return expl_new(0.0, "match required");
1200
+ }
1201
+ else {
1202
+ expl_destroy(explanation);
1203
+ }
1204
+ }
1205
+ sum_expl->value = sum;
1206
+
1207
+ if (coord == 1) { /* only one clause matched */
1208
+ explanation = sum_expl; /* eliminate wrapper */
1209
+ ary_size(sum_expl->details) = 0;
1210
+ sum_expl = sum_expl->details[0];
1211
+ expl_destroy(explanation);
1212
+ }
1213
+
1214
+ coord_factor = sim_coord(self->similarity, coord, max_coord);
1215
+
1216
+ if (coord_factor == 1.0) { /* coord is no-op */
1217
+ return sum_expl; /* eliminate wrapper */
1218
+ }
1219
+ else {
1220
+ explanation = expl_new(sum * coord_factor, "product of:");
1221
+ expl_add_detail(explanation, sum_expl);
1222
+ expl_add_detail(explanation, expl_new(coord_factor, "coord(%d/%d)",
1223
+ coord, max_coord));
1224
+ return explanation;
1225
+ }
1127
1226
  }
1128
1227
 
1129
- Scorer *req_excl_scorer_create(Scorer *req_scorer, Scorer *excl_scorer)
1228
+ static Weight *bw_new(Query *query, Searcher *searcher)
1130
1229
  {
1131
- Scorer *self = scorer_create(NULL);
1132
- ReqExclScorer *rxsc = ALLOC(ReqExclScorer);
1133
- self->data = rxsc;
1134
- rxsc->req_scorer = req_scorer;
1135
- rxsc->excl_scorer = excl_scorer;
1136
- rxsc->first_time = true;
1137
-
1138
- self->score = &rxsc_score;
1139
- self->next = &rxsc_next;
1140
- self->skip_to = &rxsc_skip_to;
1141
- self->explain = &rxsc_explain;
1142
- self->destroy = &rxsc_destroy;
1230
+ int i;
1231
+ Weight *self = w_new(BooleanWeight, query);
1232
+
1233
+ BW(self)->w_cnt = BQ(query)->clause_cnt;
1234
+ BW(self)->weights = ALLOC_N(Weight *, BW(self)->w_cnt);
1235
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1236
+ BW(self)->weights[i] = q_weight(BQ(query)->clauses[i]->query, searcher);
1237
+ }
1143
1238
 
1144
- return self;
1239
+ self->normalize = &bw_normalize;
1240
+ self->scorer = &bw_scorer;
1241
+ self->explain = &bw_explain;
1242
+ self->to_s = &bw_to_s;
1243
+ self->destroy = &bw_destroy;
1244
+ self->sum_of_squared_weights = &bw_sum_of_squared_weights;
1245
+
1246
+ self->similarity = query->get_similarity(query, searcher);
1247
+ self->value = query->boost;
1248
+
1249
+ return self;
1145
1250
  }
1146
1251
 
1147
1252
  /***************************************************************************
1148
- * NonMatchScorer
1253
+ *
1254
+ * BooleanClause
1255
+ *
1149
1256
  ***************************************************************************/
1150
1257
 
1151
- float nmsc_score(Scorer *self)
1152
- {
1153
- return 0.0;
1258
+ void bc_set_occur(BooleanClause *self, enum BC_TYPE occur)
1259
+ {
1260
+ self->occur = occur;
1261
+ switch (occur) {
1262
+ case BC_SHOULD:
1263
+ self->is_prohibited = false;
1264
+ self->is_required = false;
1265
+ break;
1266
+ case BC_MUST:
1267
+ self->is_prohibited = false;
1268
+ self->is_required = true;
1269
+ break;
1270
+ case BC_MUST_NOT:
1271
+ self->is_prohibited = true;
1272
+ self->is_required = false;
1273
+ break;
1274
+ default:
1275
+ RAISE(ARG_ERROR, "Invalid value for :occur. Try :occur => :should, "
1276
+ ":must or :must_not instead");
1277
+ }
1154
1278
  }
1155
1279
 
1156
- bool nmsc_next(Scorer *self)
1280
+ void bc_deref(BooleanClause *self)
1157
1281
  {
1158
- return false;
1282
+ if (--self->ref_cnt <= 0) {
1283
+ q_deref(self->query);
1284
+ free(self);
1285
+ }
1159
1286
  }
1160
1287
 
1161
- bool nmsc_skip_to(Scorer *self, int doc_num)
1288
+ static ulong bc_hash(BooleanClause *self)
1162
1289
  {
1163
- return false;
1290
+ return ((q_hash(self->query) << 2) | self->occur);
1164
1291
  }
1165
1292
 
1166
- Explanation *nmsc_explain(Scorer *self, int doc_num)
1293
+ static int bc_eq(BooleanClause *self, BooleanClause *o)
1167
1294
  {
1168
- return expl_create(0.0, estrdup("No documents matched"));
1295
+ return ((self->occur == o->occur) && q_eq(self->query, o->query));
1169
1296
  }
1170
1297
 
1171
- Scorer *non_matching_scorer_create()
1298
+ BooleanClause *bc_new(Query *query, enum BC_TYPE occur)
1172
1299
  {
1173
- Scorer *self = scorer_create(NULL);
1174
- self->score = &nmsc_score;
1175
- self->next = &nmsc_next;
1176
- self->skip_to = &nmsc_skip_to;
1177
- self->explain = &nmsc_explain;
1178
-
1179
- return self;
1300
+ BooleanClause *self = ALLOC(BooleanClause);
1301
+ self->ref_cnt = 1;
1302
+ self->query = query;
1303
+ bc_set_occur(self, occur);
1304
+ return self;
1180
1305
  }
1181
1306
 
1182
-
1183
1307
  /***************************************************************************
1184
- * BooleanScorer
1308
+ *
1309
+ * BooleanQuery
1310
+ *
1185
1311
  ***************************************************************************/
1186
1312
 
1187
- Scorer *counting_sum_scorer_create3(BooleanScorer *bsc, Scorer *req_scorer,
1188
- Scorer *opt_scorer)
1313
+ static MatchVector *bq_get_matchv_i(Query *self, MatchVector *mv,
1314
+ TermVector *tv)
1189
1315
  {
1190
- if (bsc->ps_cnt == 0) { // no prohibited
1191
- return req_opt_sum_scorer_create(req_scorer, opt_scorer);
1192
- } else if (bsc->ps_cnt == 1) { // 1 prohibited
1193
- return req_opt_sum_scorer_create(
1194
- req_excl_scorer_create(req_scorer, bsc->prohibited_scorers[0]),
1195
- opt_scorer);
1196
- } else { // more prohibited
1197
- return req_opt_sum_scorer_create(
1198
- req_excl_scorer_create(req_scorer,
1199
- disjunction_sum_scorer_create(bsc->prohibited_scorers, bsc->ps_cnt, 1)),
1200
- opt_scorer);
1201
- }
1316
+ int i;
1317
+ for (i = BQ(self)->clause_cnt - 1; i >= 0; i--) {
1318
+ if (BQ(self)->clauses[i]->occur != BC_MUST_NOT) {
1319
+ Query *q = BQ(self)->clauses[i]->query;
1320
+ q->get_matchv_i(q, mv, tv);
1321
+ }
1322
+ }
1323
+ return mv;
1202
1324
  }
1203
1325
 
1204
- Scorer *counting_sum_scorer_create2(BooleanScorer *bsc, Scorer *req_scorer,
1205
- Scorer **optional_scorers, int os_cnt)
1326
+ static Query *bq_rewrite(Query *self, IndexReader *ir)
1206
1327
  {
1207
- if (os_cnt == 0) {
1208
- if (bsc->ps_cnt == 0) {
1209
- return req_scorer;
1210
- } else if (bsc->ps_cnt == 1) {
1211
- return req_excl_scorer_create(req_scorer,
1212
- bsc->prohibited_scorers[0]);
1213
- } else { // no optional, more than 1 prohibited
1214
- return req_excl_scorer_create(req_scorer,
1215
- disjunction_sum_scorer_create(bsc->prohibited_scorers, bsc->ps_cnt, 1));
1216
- }
1217
- } else if (os_cnt == 1) {
1218
- return counting_sum_scorer_create3(
1219
- bsc,
1220
- req_scorer,
1221
- single_match_scorer_create(bsc->coordinator, optional_scorers[0]));
1222
- } else { // more optional
1223
- return counting_sum_scorer_create3(
1224
- bsc,
1225
- req_scorer,
1226
- counting_disjunction_sum_scorer_create(bsc->coordinator,
1227
- optional_scorers, os_cnt, 1));
1228
- }
1229
- }
1230
-
1231
- Scorer *counting_sum_scorer_create(BooleanScorer *bsc)
1232
- {
1233
- if (bsc->rs_cnt == 0) {
1234
- if (bsc->os_cnt == 0) {
1235
- int i;
1236
- // only prohibited_scorers so free them and return non_matching scorer
1237
- for (i = 0; i < bsc->ps_cnt; i++) {
1238
- bsc->prohibited_scorers[i]->destroy(bsc->prohibited_scorers[i]);
1239
- }
1240
- return non_matching_scorer_create();
1241
- } else if (bsc->os_cnt == 1) {
1242
- return counting_sum_scorer_create2( // the only optional scorer is required
1243
- bsc,
1244
- single_match_scorer_create(bsc->coordinator, bsc->optional_scorers[0]),
1245
- NULL, 0); // no optional scorers left
1246
- } else { // more than 1 @optional_scorers, no required scorers
1247
- return counting_sum_scorer_create2( // at least one optional scorer is required
1248
- bsc,
1249
- counting_disjunction_sum_scorer_create(bsc->coordinator,
1250
- bsc->optional_scorers, bsc->os_cnt, 1),
1251
- NULL, 0); // no optional scorers left
1252
- }
1253
- } else if (bsc->rs_cnt == 1) { // 1 required
1254
- return counting_sum_scorer_create2(
1255
- bsc,
1256
- single_match_scorer_create(bsc->coordinator, bsc->required_scorers[0]),
1257
- bsc->optional_scorers, bsc->os_cnt);
1258
- } else {// more required scorers
1259
- return counting_sum_scorer_create2(
1260
- bsc,
1261
- counting_conjunction_sum_scorer_create(bsc->coordinator,
1262
- bsc->required_scorers, bsc->rs_cnt),
1263
- bsc->optional_scorers, bsc->os_cnt);
1264
- }
1265
- }
1266
-
1267
- void bsc_init_counting_sum_scorer(BooleanScorer *bsc)
1268
- {
1269
- coord_init(bsc->coordinator);
1270
- bsc->counting_sum_scorer = counting_sum_scorer_create(bsc);
1271
- }
1272
-
1273
- void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur)
1274
- {
1275
- BooleanScorer *bsc = (BooleanScorer *)self->data;
1276
- if (occur != BC_MUST_NOT) {
1277
- bsc->coordinator->max_coord++;
1278
- }
1279
-
1280
- switch (occur) {
1281
- case BC_MUST:
1282
- RECAPA(bsc, rs_cnt, rs_capa, required_scorers, Scorer *);
1283
- bsc->required_scorers[bsc->rs_cnt++] = scorer;
1284
- break;
1285
- case BC_SHOULD:
1286
- RECAPA(bsc, os_cnt, os_capa, optional_scorers, Scorer *);
1287
- bsc->optional_scorers[bsc->os_cnt++] = scorer;
1288
- break;
1289
- case BC_MUST_NOT:
1290
- RECAPA(bsc, ps_cnt, ps_capa, prohibited_scorers, Scorer *);
1291
- bsc->prohibited_scorers[bsc->ps_cnt++] = scorer;
1292
- break;
1293
- default:
1294
- RAISE(ARG_ERROR, UNKNOWN_OCCUR_VAL_ERROR_MSG);
1295
- }
1296
- }
1297
-
1298
- float bsc_score(Scorer *self)
1299
- {
1300
- BooleanScorer *bsc = (BooleanScorer *)self->data;
1301
- Coordinator *coord = bsc->coordinator;
1302
- float sum;
1303
- coord->num_matches = 0;
1304
- sum = bsc->counting_sum_scorer->score(bsc->counting_sum_scorer);
1305
- return sum * coord->coord_factors[coord->num_matches];
1306
- }
1307
-
1308
- bool bsc_next(Scorer *self)
1309
- {
1310
- BooleanScorer *bsc = (BooleanScorer *)self->data;
1311
-
1312
- if (!bsc->counting_sum_scorer) {
1313
- bsc_init_counting_sum_scorer(bsc);
1314
- }
1315
- if (bsc->counting_sum_scorer->next(bsc->counting_sum_scorer)) {
1316
- self->doc = bsc->counting_sum_scorer->doc;
1317
- return true;
1318
- } else {
1319
- return false;
1320
- }
1328
+ int i;
1329
+
1330
+ bool rewritten = false;
1331
+
1332
+ if (BQ(self)->clause_cnt == 1) {
1333
+ /* optimize 1-clause queries */
1334
+ BooleanClause *clause = BQ(self)->clauses[0];
1335
+ if (! clause->is_prohibited) {
1336
+ /* just return clause. Re-write first. */
1337
+ Query *q = clause->query->rewrite(clause->query, ir);
1338
+
1339
+ if (self->boost != 1.0) {
1340
+ /* original_boost is initialized to 0.0. If it has been set to
1341
+ * something else it means this query has already been boosted
1342
+ * before so boost from the original value */
1343
+ if ((q == clause->query) && BQ(self)->original_boost) {
1344
+ /* rewrite was no-op */
1345
+ q->boost = BQ(self)->original_boost * self->boost;
1346
+ }
1347
+ else {
1348
+ /* save original boost in case query is rewritten again */
1349
+ BQ(self)->original_boost = q->boost;
1350
+ q->boost *= self->boost;
1351
+ }
1352
+ }
1353
+
1354
+ return q;
1355
+ }
1356
+ }
1357
+
1358
+ self->ref_cnt++;
1359
+ /* replace each clause's query with its rewritten query */
1360
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1361
+ BooleanClause *clause = BQ(self)->clauses[i];
1362
+ Query *rq = clause->query->rewrite(clause->query, ir);
1363
+ if (rq != clause->query) {
1364
+ if (!rewritten) {
1365
+ int j;
1366
+ Query *new_self = q_new(BooleanQuery);
1367
+ memcpy(new_self, self, sizeof(BooleanQuery));
1368
+ BQ(new_self)->clauses = ALLOC_N(BooleanClause *, BQ(self)->clause_capa);
1369
+ memcpy(BQ(new_self)->clauses, BQ(self)->clauses,
1370
+ BQ(self)->clause_capa * sizeof(BooleanClause *));
1371
+ for (j = 0; j < BQ(self)->clause_cnt; j++) {
1372
+ REF(BQ(self)->clauses[j]);
1373
+ }
1374
+ self->ref_cnt--;
1375
+ self = new_self;
1376
+ self->ref_cnt = 1;
1377
+ rewritten = true;
1378
+ }
1379
+ DEREF(clause);
1380
+ BQ(self)->clauses[i] = bc_new(rq, clause->occur);
1381
+ } else {
1382
+ DEREF(rq);
1383
+ }
1384
+ }
1385
+
1386
+ return self;
1387
+ }
1388
+
1389
+ static void bq_extract_terms(Query *self, HashSet *terms)
1390
+ {
1391
+ int i;
1392
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1393
+ BooleanClause *clause = BQ(self)->clauses[i];
1394
+ clause->query->extract_terms(clause->query, terms);
1395
+ }
1321
1396
  }
1322
1397
 
1323
- bool bsc_skip_to(Scorer *self, int doc_num)
1398
+ static char *bq_to_s(Query *self, const char *field)
1324
1399
  {
1325
- BooleanScorer *bsc = (BooleanScorer *)self->data;
1400
+ int i;
1401
+ BooleanClause *clause;
1402
+ Query *sub_query;
1403
+ char *buffer;
1404
+ char *clause_str;
1405
+ int bp = 0;
1406
+ int size = QUERY_STRING_START_SIZE;
1407
+ int needed;
1408
+ int clause_len;
1409
+
1410
+ buffer = ALLOC_N(char, size);
1411
+ if (self->boost != 1.0) {
1412
+ buffer[0] = '(';
1413
+ bp++;
1414
+ }
1326
1415
 
1327
- if (!bsc->counting_sum_scorer) {
1328
- bsc_init_counting_sum_scorer(bsc);
1329
- }
1330
- if (bsc->counting_sum_scorer->skip_to(bsc->counting_sum_scorer, doc_num)) {
1331
- self->doc = bsc->counting_sum_scorer->doc;
1332
- return true;
1333
- } else {
1334
- return false;
1335
- }
1416
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1417
+ clause = BQ(self)->clauses[i];
1418
+ clause_str = clause->query->to_s(clause->query, field);
1419
+ clause_len = (int)strlen(clause_str);
1420
+ needed = clause_len + 5;
1421
+ while ((size - bp) < needed) {
1422
+ size *= 2;
1423
+ REALLOC_N(buffer, char, size);
1424
+ }
1425
+
1426
+ if (i > 0) {
1427
+ buffer[bp++] = ' ';
1428
+ }
1429
+ if (clause->is_prohibited) {
1430
+ buffer[bp++] = '-';
1431
+ }
1432
+ else if (clause->is_required) {
1433
+ buffer[bp++] = '+';
1434
+ }
1435
+
1436
+ sub_query = clause->query;
1437
+ if (sub_query->type == BOOLEAN_QUERY) {
1438
+ /* wrap sub-bools in parens */
1439
+ buffer[bp++] = '(';
1440
+ memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
1441
+ bp += clause_len;
1442
+ buffer[bp++] = ')';
1443
+ }
1444
+ else {
1445
+ memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
1446
+ bp += clause_len;
1447
+ }
1448
+ free(clause_str);
1449
+ }
1450
+
1451
+ if (self->boost != 1.0) {
1452
+ char *boost_str = strfmt(")^%f", self->boost);
1453
+ int boost_len = (int)strlen(boost_str);
1454
+ REALLOC_N(buffer, char, bp + boost_len + 1);
1455
+ memcpy(buffer + bp, boost_str, sizeof(char) * boost_len);
1456
+ bp += boost_len;
1457
+ free(boost_str);
1458
+ }
1459
+ buffer[bp] = 0;
1460
+ return buffer;
1461
+ }
1462
+
1463
+ static void bq_destroy(Query *self)
1464
+ {
1465
+ int i;
1466
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1467
+ bc_deref(BQ(self)->clauses[i]);
1468
+ }
1469
+ free(BQ(self)->clauses);
1470
+ if (BQ(self)->similarity) {
1471
+ BQ(self)->similarity->destroy(BQ(self)->similarity);
1472
+ }
1473
+ q_destroy_i(self);
1474
+ }
1475
+
1476
+ static float bq_coord_disabled(Similarity *sim, int overlap, int max_overlap)
1477
+ {
1478
+ (void)sim; (void)overlap; (void)max_overlap;
1479
+ return 1.0;
1336
1480
  }
1337
1481
 
1338
- void bsc_destroy(Scorer *self)
1482
+ static Similarity *bq_get_similarity(Query *self, Searcher *searcher)
1339
1483
  {
1340
- BooleanScorer *bsc = (BooleanScorer *)self->data;
1341
- Coordinator *coord = bsc->coordinator;
1484
+ if (!BQ(self)->similarity) {
1485
+ Similarity *sim = q_get_similarity_i(self, searcher);
1486
+ BQ(self)->similarity = ALLOC(Similarity);
1487
+ memcpy(BQ(self)->similarity, sim, sizeof(Similarity));
1488
+ BQ(self)->similarity->coord = &bq_coord_disabled;
1489
+ BQ(self)->similarity->destroy = (void (*)(Similarity *))&free;
1490
+ }
1342
1491
 
1343
- free(coord->coord_factors);
1344
- free(coord);
1492
+ return BQ(self)->similarity;
1493
+ }
1345
1494
 
1346
- if (bsc->counting_sum_scorer) {
1347
- bsc->counting_sum_scorer->destroy(bsc->counting_sum_scorer);
1348
- } else {
1495
+ static ulong bq_hash(Query *self)
1496
+ {
1349
1497
  int i;
1350
- for (i = 0; i < bsc->rs_cnt; i++) {
1351
- bsc->required_scorers[i]->destroy(bsc->required_scorers[i]);
1498
+ ulong hash = 0;
1499
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1500
+ hash ^= bc_hash(BQ(self)->clauses[i]);
1352
1501
  }
1353
-
1354
- for (i = 0; i < bsc->os_cnt; i++) {
1355
- bsc->optional_scorers[i]->destroy(bsc->optional_scorers[i]);
1502
+ return (hash << 1) | BQ(self)->coord_disabled;
1503
+ }
1504
+
1505
+ static int bq_eq(Query *self, Query *o)
1506
+ {
1507
+ int i;
1508
+ BooleanQuery *bq1 = BQ(self);
1509
+ BooleanQuery *bq2 = BQ(o);
1510
+ if ((bq1->coord_disabled != bq2->coord_disabled)
1511
+ || (bq1->max_clause_cnt != bq1->max_clause_cnt)
1512
+ || (bq1->clause_cnt != bq2->clause_cnt)) {
1513
+ return false;
1356
1514
  }
1357
1515
 
1358
- for (i = 0; i < bsc->ps_cnt; i++) {
1359
- bsc->prohibited_scorers[i]->destroy(bsc->prohibited_scorers[i]);
1516
+ for (i = 0; i < bq1->clause_cnt; i++) {
1517
+ if (!bc_eq(bq1->clauses[i], bq2->clauses[i])) {
1518
+ return false;
1519
+ }
1360
1520
  }
1361
- }
1362
- free(bsc->required_scorers);
1363
- free(bsc->optional_scorers);
1364
- free(bsc->prohibited_scorers);
1365
- scorer_destroy_i(self);
1521
+ return true;
1366
1522
  }
1367
1523
 
1368
- Explanation *bsc_explain(Scorer *self, int doc_num)
1524
+ Query *bq_new(bool coord_disabled)
1369
1525
  {
1370
- return expl_create(0.0, estrdup("This explanation is not supported"));
1526
+ Query *self = q_new(BooleanQuery);
1527
+ BQ(self)->coord_disabled = coord_disabled;
1528
+ if (coord_disabled) {
1529
+ self->get_similarity = &bq_get_similarity;
1530
+ }
1531
+ BQ(self)->max_clause_cnt = DEFAULT_MAX_CLAUSE_COUNT;
1532
+ BQ(self)->clause_cnt = 0;
1533
+ BQ(self)->clause_capa = BOOLEAN_CLAUSES_START_CAPA;
1534
+ BQ(self)->clauses = ALLOC_N(BooleanClause *, BOOLEAN_CLAUSES_START_CAPA);
1535
+ BQ(self)->similarity = NULL;
1536
+ BQ(self)->original_boost = 0.0;
1537
+
1538
+ self->type = BOOLEAN_QUERY;
1539
+ self->rewrite = &bq_rewrite;
1540
+ self->extract_terms = &bq_extract_terms;
1541
+ self->to_s = &bq_to_s;
1542
+ self->hash = &bq_hash;
1543
+ self->eq = &bq_eq;
1544
+ self->destroy_i = &bq_destroy;
1545
+ self->create_weight_i = &bw_new;
1546
+ self->get_matchv_i = &bq_get_matchv_i;
1547
+
1548
+ return self;
1549
+ }
1550
+
1551
+ BooleanClause *bq_add_clause_nr(Query *self, BooleanClause *bc)
1552
+ {
1553
+ if (BQ(self)->clause_cnt >= BQ(self)->clause_capa) {
1554
+ BQ(self)->clause_capa *= 2;
1555
+ REALLOC_N(BQ(self)->clauses, BooleanClause *, BQ(self)->clause_capa);
1556
+ }
1557
+ if (BQ(self)->clause_cnt > BQ(self)->max_clause_cnt) {
1558
+ RAISE(STATE_ERROR, "Two many clauses. The max clause limit is set to "
1559
+ "<%d> but your query has <%d> clauses. You can try increasing "
1560
+ ":max_clause_count for the BooleanQuery or using a different "
1561
+ "type of query.", BQ(self)->clause_cnt, BQ(self)->max_clause_cnt);
1562
+ }
1563
+ BQ(self)->clauses[BQ(self)->clause_cnt] = bc;
1564
+ BQ(self)->clause_cnt++;
1565
+ return bc;
1371
1566
  }
1372
1567
 
1373
- Scorer *bsc_create(Similarity *similarity)
1568
+ BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
1374
1569
  {
1375
- Scorer *self = scorer_create(similarity);
1376
- BooleanScorer *bsc = ALLOC(BooleanScorer);
1377
- ZEROSET(bsc, BooleanScorer, 1);
1378
- bsc->coordinator = coord_create(similarity);
1379
- bsc->counting_sum_scorer = NULL;
1380
- self->data = bsc;
1570
+ REF(bc);
1571
+ return bq_add_clause_nr(self, bc);
1572
+ }
1381
1573
 
1382
- self->score = &bsc_score;
1383
- self->next = &bsc_next;
1384
- self->skip_to = &bsc_skip_to;
1385
- self->explain = &bsc_explain;
1386
- self->destroy = &bsc_destroy;
1387
- return self;
1574
+ BooleanClause *bq_add_query_nr(Query *self, Query *sub_query, enum BC_TYPE occur)
1575
+ {
1576
+ BooleanClause *bc = bc_new(sub_query, occur);
1577
+ bq_add_clause(self, bc);
1578
+ bc_deref(bc); /* bc would have been referenced unnecessarily */
1579
+ return bc;
1388
1580
  }
1389
1581
 
1582
+ BooleanClause *bq_add_query(Query *self, Query *sub_query, enum BC_TYPE occur)
1583
+ {
1584
+ REF(sub_query);
1585
+ return bq_add_query_nr(self, sub_query, occur);
1586
+ }
1390
1587