ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/q_boolean.c CHANGED
@@ -1,1390 +1,1587 @@
1
1
  #include <string.h>
2
2
  #include "search.h"
3
+ #include "array.h"
3
4
 
4
- static char * const INVALID_BC_ERROR_MSG = "Invalid value for BooleanClause Type";
5
- static char * const TOO_MANY_CLAUSES_ERROR_MSG = "Too many clauses";
6
- static char * const MIN_NUM_MATCHES_ERROR_MSG = "Minimum nr of matches must be positive";
7
- static char * const TWO_SUB_ERROR_MSG = "There must be at least 2 sub_scorers";
8
- static char * const UNKNOWN_OCCUR_VAL_ERROR_MSG = "Unknown value for occur";
5
+ #define BQ(query) ((BooleanQuery *)(query))
6
+ #define BW(weight) ((BooleanWeight *)(weight))
9
7
 
10
8
  /***************************************************************************
11
9
  *
12
- * BooleanWeight
10
+ * BooleanScorer
13
11
  *
14
12
  ***************************************************************************/
15
13
 
16
- float bw_sum_of_squared_weights(Weight *self)
14
+ /***************************************************************************
15
+ * Coordinator
16
+ ***************************************************************************/
17
+
18
+ typedef struct Coordinator
17
19
  {
18
- BooleanWeight *bw = (BooleanWeight *)self->data;
19
- BooleanQuery *bq = (BooleanQuery *)self->query->data;
20
- Weight *weight;
20
+ int max_coord;
21
+ float *coord_factors;
22
+ Similarity *similarity;
23
+ int num_matches;
24
+ } Coordinator;
21
25
 
22
- float sum = 0.0;
23
- int i;
26
+ static Coordinator *coord_new(Similarity *similarity)
27
+ {
28
+ Coordinator *self = ALLOC_AND_ZERO(Coordinator);
29
+ self->similarity = similarity;
30
+ return self;
31
+ }
24
32
 
25
- for (i = 0; i < bw->w_cnt; i++) {
26
- if (! bq->clauses[i]->is_prohibited) {
27
- weight = bw->weights[i];
28
- sum += weight->sum_of_squared_weights(weight); // sum sub-weights
29
- }
30
- }
33
+ static Coordinator *coord_init(Coordinator *self)
34
+ {
35
+ int i;
36
+ self->coord_factors = ALLOC_N(float, self->max_coord + 1);
31
37
 
32
- sum *= self->value * self->value; // boost each sub-weight
38
+ for (i = 0; i <= self->max_coord; i++) {
39
+ self->coord_factors[i]
40
+ = sim_coord(self->similarity, i, self->max_coord);
41
+ }
33
42
 
34
- return sum;
43
+ return self;
35
44
  }
36
45
 
37
- void bw_normalize(Weight *self, float normalization_factor)
46
+ /***************************************************************************
47
+ * DisjunctionSumScorer
48
+ ***************************************************************************/
49
+
50
+ #define DSSc(scorer) ((DisjunctionSumScorer *)(scorer))
51
+
52
+ typedef struct DisjunctionSumScorer
38
53
  {
39
- BooleanWeight *bw = (BooleanWeight *)self->data;
40
- BooleanQuery *bq = (BooleanQuery *)self->query->data;
41
- Weight *weight;
42
- int i;
43
- normalization_factor *= self->value; /* multiply by query boost */
54
+ Scorer super;
55
+ float cum_score;
56
+ int num_matches;
57
+ int min_num_matches;
58
+ Scorer **sub_scorers;
59
+ int ss_cnt;
60
+ PriorityQueue *scorer_queue;
61
+ Coordinator *coordinator;
62
+ } DisjunctionSumScorer;
44
63
 
45
- for (i = 0; i < bw->w_cnt; i++) {
46
- if (! bq->clauses[i]->is_prohibited) {
47
- weight = bw->weights[i];
48
- weight->normalize(weight, normalization_factor); // sum sub-weights
49
- }
50
- }
64
+ static float dssc_score(Scorer *self)
65
+ {
66
+ return DSSc(self)->cum_score;
51
67
  }
52
68
 
53
- Scorer *bw_scorer(Weight *self, IndexReader *ir)
69
+ static void dssc_init_scorer_queue(DisjunctionSumScorer *dssc)
54
70
  {
55
- Scorer *sub_scorer, *bsc = bsc_create(self->similarity);
56
- BooleanWeight *bw = (BooleanWeight *)self->data;
57
- BooleanQuery *bq = (BooleanQuery *)self->query->data;
58
- BooleanClause *clause;
59
- Weight *weight;
60
- int i;
61
-
62
- for (i = 0; i < bw->w_cnt; i++) {
63
- clause = bq->clauses[i];
64
- weight = bw->weights[i];
65
- sub_scorer = weight->scorer(weight, ir);
66
- if (sub_scorer) {
67
- bsc_add_scorer(bsc, sub_scorer, clause->occur);
68
- } else if (clause->is_required) {
69
- bsc->destroy(bsc);
70
- return NULL;
71
+ int i;
72
+ Scorer *sub_scorer;
73
+ PriorityQueue *pq = dssc->scorer_queue
74
+ = pq_new(dssc->ss_cnt, (lt_ft)&scorer_doc_less_than, NULL);
75
+
76
+ for (i = 0; i < dssc->ss_cnt; i++) {
77
+ sub_scorer = dssc->sub_scorers[i];
78
+ if (sub_scorer->next(sub_scorer)) {
79
+ pq_insert(pq, sub_scorer);
80
+ }
71
81
  }
72
- }
82
+ }
83
+
84
+ static bool dssc_advance_after_current(Scorer *self)
85
+ {
86
+ DisjunctionSumScorer *dssc = DSSc(self);
87
+ PriorityQueue *scorer_queue = dssc->scorer_queue;
88
+
89
+ /* repeat until minimum number of matches is found */
90
+ while (true) {
91
+ Scorer *top = (Scorer *)pq_top(scorer_queue);
92
+ self->doc = top->doc;
93
+ dssc->cum_score = top->score(top);
94
+ dssc->num_matches = 1;
95
+ /* Until all sub-scorers are after self->doc */
96
+ while (true) {
97
+ if (top->next(top)) {
98
+ pq_down(scorer_queue);
99
+ }
100
+ else {
101
+ pq_pop(scorer_queue);
102
+ if (scorer_queue->size
103
+ < (dssc->min_num_matches - dssc->num_matches)) {
104
+ /* Not enough subscorers left for a match on this
105
+ * document, also no more chance of any further match */
106
+ return false;
107
+ }
108
+ if (scorer_queue->size == 0) {
109
+ /* nothing more to advance, check for last match. */
110
+ break;
111
+ }
112
+ }
113
+ top = pq_top(scorer_queue);
114
+ if (top->doc != self->doc) {
115
+ /* All remaining subscorers are after self->doc */
116
+ break;
117
+ }
118
+ else {
119
+ dssc->cum_score += top->score(top);
120
+ dssc->num_matches++;
121
+ }
122
+ }
73
123
 
74
- return bsc;
124
+ if (dssc->num_matches >= dssc->min_num_matches) {
125
+ return true;
126
+ }
127
+ else if (scorer_queue->size < dssc->min_num_matches) {
128
+ return false;
129
+ }
130
+ }
75
131
  }
76
132
 
77
- char *bw_to_s(Weight *self)
133
+ static bool dssc_next(Scorer *self)
78
134
  {
79
- return strfmt("BooleanWeight(%f)", self->value);
135
+ if (DSSc(self)->scorer_queue == NULL) {
136
+ dssc_init_scorer_queue(DSSc(self));
137
+ }
138
+
139
+ if (DSSc(self)->scorer_queue->size < DSSc(self)->min_num_matches) {
140
+ return false;
141
+ }
142
+ else {
143
+ return dssc_advance_after_current(self);
144
+ }
80
145
  }
81
146
 
82
- void bw_destroy(Weight *self)
147
+ static bool dssc_skip_to(Scorer *self, int doc_num)
83
148
  {
84
- int i;
85
- BooleanWeight *bw = (BooleanWeight *)self->data;
149
+ DisjunctionSumScorer *dssc = DSSc(self);
150
+ PriorityQueue *scorer_queue = dssc->scorer_queue;
86
151
 
87
- for (i = 0; i < bw->w_cnt; i++) {
88
- bw->weights[i]->destroy(bw->weights[i]);
89
- }
152
+ if (scorer_queue == NULL) {
153
+ dssc_init_scorer_queue(dssc);
154
+ scorer_queue = dssc->scorer_queue;
155
+ }
90
156
 
91
- free(bw->weights);
92
- free(bw);
93
- w_destroy(self);
157
+ if (scorer_queue->size < dssc->min_num_matches) {
158
+ return false;
159
+ }
160
+ if (doc_num <= self->doc) {
161
+ doc_num = self->doc + 1;
162
+ }
163
+ while (true) {
164
+ Scorer *top = pq_top(scorer_queue);
165
+ if (top->doc >= doc_num) {
166
+ return dssc_advance_after_current(self);
167
+ }
168
+ else if (top->skip_to(top, doc_num)) {
169
+ pq_down(scorer_queue);
170
+ }
171
+ else {
172
+ pq_pop(scorer_queue);
173
+ if (scorer_queue->size < dssc->min_num_matches) {
174
+ return false;
175
+ }
176
+ }
177
+ }
94
178
  }
95
179
 
96
- Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
180
+ static Explanation *dssc_explain(Scorer *self, int doc_num)
97
181
  {
98
- BooleanWeight *bw = (BooleanWeight *)self->data;
99
- BooleanQuery *bq = (BooleanQuery *)self->query->data;
100
- Explanation *sum_expl = expl_create(0.0, estrdup("sum of:"));
101
- BooleanClause *clause;
102
- Weight *weight;
103
- Explanation *explanation;
104
- int coord = 0;
105
- int max_coord = 0;
106
- float coord_factor = 0.0;
107
- float sum = 0.0;
108
- int i;
182
+ int i;
183
+ DisjunctionSumScorer *dssc = DSSc(self);
184
+ Scorer *sub_scorer;
185
+ Explanation *e
186
+ = expl_new(0.0, "At least %d of:", dssc->min_num_matches);
187
+ for (i = 0; i < dssc->ss_cnt; i++) {
188
+ sub_scorer = dssc->sub_scorers[i];
189
+ expl_add_detail(e, sub_scorer->explain(sub_scorer, doc_num));
190
+ }
191
+ return e;
192
+ }
109
193
 
110
- for (i = 0; i < bw->w_cnt; i++) {
111
- weight = bw->weights[i];
112
- clause = bq->clauses[i];
113
- explanation = weight->explain(weight, ir, doc_num);
114
- if (!clause->is_prohibited) max_coord++;
115
- if (explanation->value > 0.0) {
116
- if (!clause->is_prohibited) {
117
- expl_add_detail(sum_expl, explanation);
118
- sum += explanation->value;
119
- coord++;
120
- } else {
121
- expl_destoy(explanation);
122
- expl_destoy(sum_expl);
123
- return expl_create(0.0, estrdup("match prohibited"));
124
- }
125
- } else if (clause->is_required) {
126
- expl_destoy(explanation);
127
- expl_destoy(sum_expl);
128
- return expl_create(0.0, estrdup("match required"));
129
- } else {
130
- expl_destoy(explanation);
194
+ static void dssc_destroy(Scorer *self)
195
+ {
196
+ DisjunctionSumScorer *dssc = DSSc(self);
197
+ int i;
198
+ for (i = 0; i < dssc->ss_cnt; i++) {
199
+ dssc->sub_scorers[i]->destroy(dssc->sub_scorers[i]);
131
200
  }
132
- }
133
- sum_expl->value = sum;
134
-
135
- if (coord == 1) { /* only one clause matched */
136
- explanation = sum_expl; /* eliminate wrapper */
137
- sum_expl->dcnt = 0;
138
- sum_expl = sum_expl->details[0];
139
- expl_destoy(explanation);
140
- }
141
-
142
- coord_factor = sim_coord(self->similarity, coord, max_coord);
143
-
144
- if (coord_factor == 1.0) { /* coord is no-op */
145
- return sum_expl; /* eliminate wrapper */
146
- } else {
147
- explanation = expl_create(sum * coord_factor, estrdup("product of:"));
148
- expl_add_detail(explanation, sum_expl);
149
- expl_add_detail(explanation, expl_create(coord_factor,
150
- strfmt("coord(%d/%d)", coord, max_coord)));
151
- return explanation;
152
- }
153
- }
154
-
155
- Weight *bw_create(Query *query, Searcher *searcher)
156
- {
157
- int i;
158
- Weight *self = w_create(query);
159
- BooleanWeight *bw = ALLOC(BooleanWeight);
160
- BooleanQuery *bq = (BooleanQuery *)query->data;
161
-
162
- bw->w_cnt = bq->clause_cnt;
163
- bw->weights = ALLOC_N(Weight *, bw->w_cnt);
164
- for (i = 0; i < bw->w_cnt; i++) {
165
- bw->weights[i] = q_weight(bq->clauses[i]->query, searcher);
166
- }
167
- self->data = bw;
168
-
169
- self->normalize = &bw_normalize;
170
- self->scorer = &bw_scorer;
171
- self->explain = &bw_explain;
172
- self->to_s = &bw_to_s;
173
- self->destroy = &bw_destroy;
174
- self->sum_of_squared_weights = &bw_sum_of_squared_weights;
175
-
176
- self->similarity = query->get_similarity(query, searcher);
177
- self->value = query->boost;
178
-
179
- return self;
201
+ if (dssc->scorer_queue) {
202
+ pq_destroy(dssc->scorer_queue);
203
+ }
204
+ scorer_destroy_i(self);
180
205
  }
181
206
 
182
- /***************************************************************************
183
- *
184
- * BooleanClause
185
- *
186
- ***************************************************************************/
207
+ static Scorer *disjunction_sum_scorer_new(Scorer **sub_scorers, int ss_cnt,
208
+ int min_num_matches)
209
+ {
210
+ Scorer *self = scorer_new(DisjunctionSumScorer, NULL);
211
+ DSSc(self)->ss_cnt = ss_cnt;
212
+
213
+ /* The document number of the current match */
214
+ self->doc = -1;
215
+ DSSc(self)->cum_score = -1.0;
216
+
217
+ /* The number of subscorers that provide the current match. */
218
+ DSSc(self)->num_matches = -1;
219
+ DSSc(self)->coordinator = NULL;
187
220
 
188
- void bc_set_occur(BooleanClause *self, unsigned int occur)
189
- {
190
- self->occur = occur;
191
- switch (occur) {
192
- case BC_SHOULD:
193
- self->is_prohibited = false;
194
- self->is_required = false;
195
- break;
196
- case BC_MUST:
197
- self->is_prohibited = false;
198
- self->is_required = true;
199
- break;
200
- case BC_MUST_NOT:
201
- self->is_prohibited = true;
202
- self->is_required = false;
203
- break;
204
- default:
205
- RAISE(ARG_ERROR, INVALID_BC_ERROR_MSG);
206
- }
221
+ #ifdef DEBUG
222
+ if (min_num_matches <= 0) {
223
+ RAISE(ARG_ERROR, "The min_num_matches value <%d> should not be less "
224
+ "than 0\n", min_num_matches);
225
+ }
226
+ if (ss_cnt <= 1) {
227
+ RAISE(ARG_ERROR, "There should be at least 2 sub_scorers in a "
228
+ "DiscjunctionSumScorer. <%d> is not enough", ss_cnt);
229
+ }
230
+ #endif
231
+
232
+ DSSc(self)->min_num_matches = min_num_matches;
233
+ DSSc(self)->sub_scorers = sub_scorers;
234
+ DSSc(self)->scorer_queue = NULL;
235
+
236
+ self->score = &dssc_score;
237
+ self->next = &dssc_next;
238
+ self->skip_to = &dssc_skip_to;
239
+ self->explain = &dssc_explain;
240
+ self->destroy = &dssc_destroy;
241
+
242
+ return self;
207
243
  }
208
244
 
209
- void bc_deref(BooleanClause *self)
245
+ static float cdssc_score(Scorer *self)
210
246
  {
211
- if (--self->ref_cnt <= 0) {
212
- q_deref(self->query);
213
- free(self);
214
- }
247
+ DSSc(self)->coordinator->num_matches += DSSc(self)->num_matches;
248
+ return DSSc(self)->cum_score;
215
249
  }
216
250
 
217
- uint bc_hash(BooleanClause *self)
251
+ static Scorer *counting_disjunction_sum_scorer_new(
252
+ Coordinator *coordinator, Scorer **sub_scorers, int ss_cnt,
253
+ int min_num_matches)
218
254
  {
219
- return ((q_hash(self->query) << 2) | self->occur);
255
+ Scorer *self = disjunction_sum_scorer_new(sub_scorers, ss_cnt,
256
+ min_num_matches);
257
+ DSSc(self)->coordinator = coordinator;
258
+ self->score = &cdssc_score;
259
+ return self;
220
260
  }
221
261
 
222
- int bc_eq(BooleanClause *self, BooleanClause *o)
262
+ /***************************************************************************
263
+ * ConjunctionScorer
264
+ ***************************************************************************/
265
+
266
+ #define CSc(scorer) ((ConjunctionScorer *)(scorer))
267
+
268
+ typedef struct ConjunctionScorer
223
269
  {
224
- return ((self->occur == o->occur) && q_eq(self->query, o->query));
225
- }
270
+ Scorer super;
271
+ bool first_time : 1;
272
+ bool more : 1;
273
+ float coord;
274
+ Scorer **sub_scorers;
275
+ int ss_cnt;
276
+ int first_idx;
277
+ Coordinator *coordinator;
278
+ int last_scored_doc;
279
+ } ConjunctionScorer;
226
280
 
227
- BooleanClause *bc_create(Query *query, unsigned int occur)
281
+ static void csc_sort_scorers(ConjunctionScorer *csc)
228
282
  {
229
- BooleanClause *self = ALLOC(BooleanClause);
230
- self->ref_cnt = 1;
231
- self->query = query;
232
- bc_set_occur(self, occur);
233
- return self;
283
+ qsort(csc->sub_scorers, csc->ss_cnt, sizeof(Scorer *), &scorer_doc_cmp);
284
+ csc->first_idx = 0;
234
285
  }
235
286
 
236
- /***************************************************************************
237
- *
238
- * BooleanQuery
239
- *
240
- ***************************************************************************/
287
+ static void csc_init(Scorer *self, bool init_scorers)
288
+ {
289
+ ConjunctionScorer *csc = CSc(self);
290
+ const int sub_sc_cnt = csc->ss_cnt;
241
291
 
242
- Query *bq_rewrite(Query *self, IndexReader *ir)
243
- {
244
- BooleanQuery *bq = (BooleanQuery *)self->data;
245
- BooleanClause *clause;
246
- Query *q, *rq;
247
- int i;
248
-
249
- if (bq->clause_cnt == 1) { // optimize 1-clause queries
250
- clause = bq->clauses[0];
251
- if (! clause->is_prohibited) { // just return clause
252
- q = clause->query->rewrite(clause->query, ir); // rewrite first
253
-
254
- if (self->boost != 1.0) {// incorporate boost
255
- /* original_boost is initialized to 0.0. If it has been set to
256
- * something else it means this query has already been boosted before
257
- * so boost from the original value */
258
- if ((q == clause->query) && bq->original_boost) {
259
- /* rewrite was no-op */
260
- q->boost = bq->original_boost * self->boost;
261
- } else {
262
- bq->original_boost = q->boost; /* save original boost */
263
- q->boost *= self->boost;
292
+ /* compute coord factor */
293
+ csc->coord = sim_coord(self->similarity, sub_sc_cnt, sub_sc_cnt);
294
+
295
+ csc->more = (sub_sc_cnt > 0);
296
+
297
+ if (init_scorers) {
298
+ int i;
299
+ /* move each scorer to its first entry */
300
+ for (i = 0; i < sub_sc_cnt; i++) {
301
+ Scorer *sub_scorer = csc->sub_scorers[i];
302
+ if (!csc->more) {
303
+ break;
304
+ }
305
+ csc->more = sub_scorer->next(sub_scorer);
264
306
  }
265
- }
266
-
267
- return q;
268
- }
269
- }
270
-
271
- /* replace each clause's query with its rewritten query */
272
- for (i = 0; i < bq->clause_cnt; i++) {
273
- clause = bq->clauses[i];
274
- rq = clause->query->rewrite(clause->query, ir);
275
- q_deref(clause->query);
276
- clause->query = rq;
277
- }
278
-
279
- self->ref_cnt++;
280
- return self;
281
- }
282
-
283
- void bq_extract_terms(Query *self, HashSet *terms)
284
- {
285
- BooleanQuery *bq = (BooleanQuery *)self->data;
286
- BooleanClause *clause;
287
- int i;
288
- for (i = 0; i < bq->clause_cnt; i++) {
289
- clause = bq->clauses[i];
290
- clause->query->extract_terms(clause->query, terms);
291
- }
292
- }
293
-
294
- char *bq_to_s(Query *self, char *field)
295
- {
296
- int i;
297
- BooleanQuery *bq = (BooleanQuery *)self->data;
298
- BooleanClause *clause;
299
- Query *sub_query;
300
- char *buffer;
301
- char *clause_str;
302
- int bp = 0;
303
- int size = QUERY_STRING_START_SIZE;
304
- int needed;
305
- int clause_len;
306
-
307
- buffer = ALLOC_N(char, size);
308
- if (self->boost != 1.0) {
309
- buffer[0] = '(';
310
- bp++;
311
- }
312
-
313
- for (i = 0; i < bq->clause_cnt; i++) {
314
- clause = bq->clauses[i];
315
- clause_str = clause->query->to_s(clause->query, field);
316
- clause_len = (int)strlen(clause_str);
317
- needed = clause_len + 5;
318
- while ((size - bp) < needed) {
319
- size *= 2;
320
- REALLOC_N(buffer, char, size);
321
- }
322
-
323
- if (i > 0) {
324
- buffer[bp++] = ' ';
325
- }
326
- if (clause->is_prohibited) {
327
- buffer[bp++] = '-';
328
- } else if (clause->is_required) {
329
- buffer[bp++] = '+';
330
- }
331
-
332
- sub_query = clause->query;
333
- if (sub_query->type == BOOLEAN_QUERY) { // wrap sub-bools in parens
334
- buffer[bp++] = '(';
335
- memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
336
- bp += clause_len;
337
- buffer[bp++] = ')';
338
- } else {
339
- memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
340
- bp += clause_len;
341
- }
342
- free(clause_str);
343
- }
344
-
345
- if (self->boost != 1.0) {
346
- char *boost_str = strfmt(")^%f", self->boost);
347
- int boost_len = (int)strlen(boost_str);
348
- REALLOC_N(buffer, char, bp + boost_len + 1);
349
- memcpy(buffer + bp, boost_str, sizeof(char) * boost_len);
350
- bp += boost_len;
351
- free(boost_str);
352
- }
353
- buffer[bp] = 0;
354
- return buffer;
307
+ if (csc->more) {
308
+ csc_sort_scorers(csc);
309
+ }
310
+ }
311
+
312
+ csc->first_time = false;
355
313
  }
356
314
 
357
- static void bq_destroy(Query *self)
315
+ static float csc_score(Scorer *self)
358
316
  {
359
- BooleanQuery *bq = (BooleanQuery *)self->data;
360
- int i;
361
- for (i = 0; i < bq->clause_cnt; i++) {
362
- bc_deref(bq->clauses[i]);
363
- }
364
- free(bq->clauses);
365
- if (bq->similarity) {
366
- bq->similarity->destroy(bq->similarity);
367
- }
368
- free(bq);
369
- q_destroy_i(self);
317
+ ConjunctionScorer *csc = CSc(self);
318
+ const int sub_sc_cnt = csc->ss_cnt;
319
+ float score = 0.0; /* sum scores */
320
+ int i;
321
+ for (i = 0; i < sub_sc_cnt; i++) {
322
+ Scorer *sub_scorer = csc->sub_scorers[i];
323
+ score += sub_scorer->score(sub_scorer);
324
+ }
325
+ score *= csc->coord;
326
+ return score;
327
+ }
328
+
329
+ static bool csc_do_next(Scorer *self)
330
+ {
331
+ ConjunctionScorer *csc = CSc(self);
332
+ const int sub_sc_cnt = csc->ss_cnt;
333
+ int first_idx = csc->first_idx;
334
+ Scorer *first_sc = csc->sub_scorers[first_idx];
335
+ Scorer *last_sc = csc->sub_scorers[PREV_NUM(first_idx, sub_sc_cnt)];
336
+
337
+ /* skip to doc with all clauses */
338
+ while (csc->more && (first_sc->doc < last_sc->doc)) {
339
+ /* skip first upto last */
340
+ csc->more = first_sc->skip_to(first_sc, last_sc->doc);
341
+ /* move first to last */
342
+ last_sc = first_sc;
343
+ first_idx = NEXT_NUM(first_idx, sub_sc_cnt);
344
+ first_sc = csc->sub_scorers[first_idx];
345
+ }
346
+ self->doc = first_sc->doc;
347
+ csc->first_idx = first_idx;
348
+ return csc->more;
370
349
  }
371
350
 
372
- float bq_coord_disabled(Similarity *sim, int overlap, int max_overlap)
351
+ static bool csc_next(Scorer *self)
373
352
  {
374
- return 1.0;
353
+ ConjunctionScorer *csc = CSc(self);
354
+ if (csc->first_time) {
355
+ csc_init(self, true);
356
+ }
357
+ else if (csc->more) {
358
+ /* trigger further scanning */
359
+ const int last_idx = PREV_NUM(csc->first_idx, csc->ss_cnt);
360
+ Scorer *sub_scorer = csc->sub_scorers[last_idx];
361
+ csc->more = sub_scorer->next(sub_scorer);
362
+ }
363
+ return csc_do_next(self);
375
364
  }
376
365
 
377
- Similarity *bq_get_similarity(Query *self, Searcher *searcher)
366
+ static bool csc_skip_to(Scorer *self, int doc_num)
378
367
  {
379
- BooleanQuery *bq = (BooleanQuery *)self->data;
380
- if (!bq->similarity) {
381
- Similarity *sim = q_get_similarity_i(self, searcher);
382
- bq->similarity = ALLOC(Similarity);
383
- memcpy(bq->similarity, sim, sizeof(Similarity));
384
- bq->similarity->coord = &bq_coord_disabled;
385
- bq->similarity->destroy = (void (*)(Similarity *))&free;
386
- }
368
+ ConjunctionScorer *csc = CSc(self);
369
+ const int sub_sc_cnt = csc->ss_cnt;
370
+ int i;
371
+ bool more = csc->more;
387
372
 
388
- return bq->similarity;
389
- }
373
+ if (csc->first_time) {
374
+ csc_init(self, true);
375
+ }
390
376
 
391
- static uint bq_hash(Query *self)
392
- {
393
- int i;
394
- uint hash = 0;
395
- BooleanQuery *bq = (BooleanQuery *)self->data;
396
- for (i = 0; i < bq->clause_cnt; i++) {
397
- hash ^= bc_hash(bq->clauses[i]);
398
- }
399
- return (hash << 1) | bq->coord_disabled;
377
+ for (i = 0; i < sub_sc_cnt; i++) {
378
+ if (!more) {
379
+ break;
380
+ }
381
+ else {
382
+ Scorer *sub_scorer = csc->sub_scorers[i];
383
+ more = sub_scorer->skip_to(sub_scorer, doc_num);
384
+ }
385
+ }
386
+ if (more) {
387
+ /* resort the scorers */
388
+ csc_sort_scorers(csc);
389
+ }
390
+
391
+ more = csc->more;
392
+ return csc_do_next(self);
400
393
  }
401
394
 
402
- static int bq_eq(Query *self, Query *o)
395
+ static void csc_destroy(Scorer *self)
403
396
  {
404
- int i;
405
- BooleanQuery *bq1 = (BooleanQuery *)self->data;
406
- BooleanQuery *bq2 = (BooleanQuery *)o->data;
407
- if ((bq1->coord_disabled != bq2->coord_disabled) ||
408
- (bq1->max_clause_cnt != bq1->max_clause_cnt) ||
409
- (bq1->clause_cnt != bq2->clause_cnt)) {
410
- return false;
411
- }
412
-
413
- for (i = 0; i < bq1->clause_cnt; i++) {
414
- if (!bc_eq(bq1->clauses[i], bq2->clauses[i])) {
415
- return false;
416
- }
417
- }
418
- return true;
419
- }
420
-
421
- Query *bq_create(bool coord_disabled)
422
- {
423
- Query *self = q_create();
424
- BooleanQuery *bq = ALLOC(BooleanQuery);
425
- bq->coord_disabled = coord_disabled;
426
- if (coord_disabled) {
427
- self->get_similarity = &bq_get_similarity;
428
- }
429
- bq->max_clause_cnt = DEFAULT_MAX_CLAUSE_COUNT;
430
- bq->clause_cnt = 0;
431
- bq->clause_capa = BOOLEAN_CLAUSES_START_CAPA;
432
- bq->clauses = ALLOC_N(BooleanClause *, BOOLEAN_CLAUSES_START_CAPA);
433
- bq->similarity = NULL;
434
- bq->original_boost = 0.0;
435
- self->data = bq;
436
-
437
- self->type = BOOLEAN_QUERY;
438
- self->rewrite = &bq_rewrite;
439
- self->extract_terms = &bq_extract_terms;
440
- self->to_s = &bq_to_s;
441
- self->hash = &bq_hash;
442
- self->eq = &bq_eq;
443
- self->destroy_i = &bq_destroy;
444
- self->create_weight_i = &bw_create;
445
- return self;
397
+ ConjunctionScorer *csc = CSc(self);
398
+ const int sub_sc_cnt = csc->ss_cnt;
399
+ int i;
400
+ for (i = 0; i < sub_sc_cnt; i++) {
401
+ csc->sub_scorers[i]->destroy(csc->sub_scorers[i]);
402
+ }
403
+ free(csc->sub_scorers);
404
+ scorer_destroy_i(self);
446
405
  }
447
406
 
448
- BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
407
+ static Scorer *conjunction_scorer_new(Similarity *similarity)
449
408
  {
450
- BooleanQuery *bq = (BooleanQuery *)self->data;
451
- if (!self->destroy_all) ref(bc);
452
- if (bq->clause_cnt >= bq->clause_capa) {
453
- bq->clause_capa *= 2;
454
- REALLOC_N(bq->clauses, BooleanClause *, bq->clause_capa);
455
- }
456
- if (bq->clause_cnt > bq->max_clause_cnt) {
457
- RAISE(STATE_ERROR, TOO_MANY_CLAUSES_ERROR_MSG);
458
- }
459
- bq->clauses[bq->clause_cnt] = bc;
460
- bq->clause_cnt++;
461
- return bc;
462
- }
463
-
464
- BooleanClause *bq_add_query(Query *self, Query *sub_query, unsigned int occur)
465
- {
466
- BooleanClause *bc = bc_create(sub_query, occur);
467
- bq_add_clause(self, bc);
468
- if (!self->destroy_all) {
469
- ref(sub_query);
470
- bc_deref(bc); /* bc would have been referenced unnecessarily */
471
- }
472
- return bc;
473
- }
409
+ Scorer *self = scorer_new(ConjunctionScorer, similarity);
474
410
 
475
- /***************************************************************************
476
- *
477
- * BooleanScorer
478
- *
479
- ***************************************************************************/
411
+ CSc(self)->first_time = true;
412
+ CSc(self)->more = true;
413
+ CSc(self)->coordinator = NULL;
480
414
 
481
- /***************************************************************************
482
- * Coordinator
483
- ***************************************************************************/
415
+ self->score = &csc_score;
416
+ self->next = &csc_next;
417
+ self->skip_to = &csc_skip_to;
418
+ self->destroy = &csc_destroy;
484
419
 
485
- Coordinator *coord_create(Similarity *similarity)
420
+ return self;
421
+ }
422
+
423
+ static float ccsc_score(Scorer *self)
486
424
  {
487
- Coordinator *self = ALLOC(Coordinator);
488
- ZEROSET(self, Coordinator, 1);
489
- self->similarity = similarity;
490
- return self;
425
+ ConjunctionScorer *csc = CSc(self);
426
+
427
+ int doc;
428
+ if ((doc = self->doc) > csc->last_scored_doc) {
429
+ csc->last_scored_doc = doc;
430
+ csc->coordinator->num_matches += csc->ss_cnt;
431
+ }
432
+
433
+ return csc_score(self);
491
434
  }
492
435
 
493
- Coordinator *coord_init(Coordinator *self)
436
+ static Scorer *counting_conjunction_sum_scorer_new(
437
+ Coordinator *coordinator, Scorer **sub_scorers, int ss_cnt)
494
438
  {
495
- int i;
496
- self->coord_factors = ALLOC_N(float, self->max_coord + 1);
439
+ Scorer *self = conjunction_scorer_new(sim_create_default());
440
+ ConjunctionScorer *csc = CSc(self);
441
+ csc->coordinator = coordinator;
442
+ csc->last_scored_doc = -1;
443
+ csc->sub_scorers = ALLOC_N(Scorer *, ss_cnt);
444
+ memcpy(csc->sub_scorers, sub_scorers, sizeof(Scorer *) * ss_cnt);
445
+ csc->ss_cnt = ss_cnt;
497
446
 
498
- for (i = 0; i <= self->max_coord; i++) {
499
- self->coord_factors[i] = sim_coord(self->similarity, i, self->max_coord);
500
- }
447
+ self->score = &ccsc_score;
501
448
 
502
- return self;
449
+ return self;
503
450
  }
504
451
 
505
452
  /***************************************************************************
506
- * DisjunctionSumScorer
453
+ * SingleMatchScorer
507
454
  ***************************************************************************/
508
455
 
509
- float dssc_score(Scorer *self)
456
+ #define SMSc(scorer) ((SingleMatchScorer *)(scorer))
457
+
458
+ typedef struct SingleMatchScorer
510
459
  {
511
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
512
- return dssc->cum_score;
513
- }
460
+ Scorer super;
461
+ Coordinator *coordinator;
462
+ Scorer *scorer;
463
+ } SingleMatchScorer;
464
+
514
465
 
515
- void dssc_init_scorer_queue(DisjunctionSumScorer *dssc)
466
+ static float smsc_score(Scorer *self)
516
467
  {
517
- int i;
518
- Scorer *sub_scorer;
519
- PriorityQueue *pq = dssc->scorer_queue =
520
- pq_create(dssc->ss_cnt, &scorer_doc_less_than);
468
+ SMSc(self)->coordinator->num_matches++;
469
+ return SMSc(self)->scorer->score(SMSc(self)->scorer);
470
+ }
521
471
 
522
- for (i = 0; i < dssc->ss_cnt; i++) {
523
- sub_scorer = dssc->sub_scorers[i];
524
- if (sub_scorer->next(sub_scorer)) {
525
- pq_insert(pq, sub_scorer);
472
+ static bool smsc_next(Scorer *self)
473
+ {
474
+ Scorer *scorer = SMSc(self)->scorer;
475
+ if (scorer->next(scorer)) {
476
+ self->doc = scorer->doc;
477
+ return true;
526
478
  }
527
- }
479
+ return false;
528
480
  }
529
481
 
530
- bool dssc_advance_after_current(Scorer *self)
482
+ static bool smsc_skip_to(Scorer *self, int doc_num)
531
483
  {
532
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
533
- PriorityQueue *scorer_queue = dssc->scorer_queue;
534
- Scorer *top;
535
- while (true) { // repeat until minimum nr of matches
536
- top = (Scorer *)pq_top(scorer_queue);
537
- self->doc = top->doc;
538
- dssc->cum_score = top->score(top);
539
- dssc->num_matches = 1;
540
- while (true) { // Until all subscorers are after self->hit.doc
541
- if (top->next(top)) {
542
- pq_down(scorer_queue);
543
- } else {
544
- pq_pop(scorer_queue);
545
- if (scorer_queue->count < (dssc->min_num_matches - dssc->num_matches)) {
546
- // Not enough subscorers left for a match on this document,
547
- // and also no more chance of any further match.
548
- return false;
549
- }
550
- if (scorer_queue->count == 0) {
551
- break; // nothing more to advance, check for last match.
552
- }
553
- }
554
- top = pq_top(scorer_queue);
555
- if (top->doc != self->doc) {
556
- break; // All remaining subscorers are after self->hit.doc.
557
- } else {
558
- dssc->cum_score += top->score(top);
559
- dssc->num_matches++;
560
- }
484
+ Scorer *scorer = SMSc(self)->scorer;
485
+ if (scorer->skip_to(scorer, doc_num)) {
486
+ self->doc = scorer->doc;
487
+ return true;
561
488
  }
489
+ return false;
490
+ }
562
491
 
563
- if (dssc->num_matches >= dssc->min_num_matches) {
564
- return true;
565
- } else if (scorer_queue->count < dssc->min_num_matches) {
566
- return false;
567
- }
568
- }
492
+ static Explanation *smsc_explain(Scorer *self, int doc_num)
493
+ {
494
+ Scorer *scorer = SMSc(self)->scorer;
495
+ return scorer->explain(scorer, doc_num);
569
496
  }
570
497
 
571
- bool dssc_next(Scorer *self)
498
+ static void smsc_destroy(Scorer *self)
572
499
  {
573
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
500
+ Scorer *scorer = SMSc(self)->scorer;
501
+ scorer->destroy(scorer);
502
+ scorer_destroy_i(self);
503
+ }
574
504
 
575
- if (dssc->scorer_queue == NULL) {
576
- dssc_init_scorer_queue(dssc);
577
- }
505
+ static Scorer *single_match_scorer_new(Coordinator *coordinator,
506
+ Scorer *scorer)
507
+ {
508
+ Scorer *self = scorer_new(SingleMatchScorer, scorer->similarity);
509
+ SMSc(self)->coordinator = coordinator;
510
+ SMSc(self)->scorer = scorer;
578
511
 
579
- if (dssc->scorer_queue->count < dssc->min_num_matches) {
580
- return false;
581
- } else {
582
- return dssc_advance_after_current(self);
583
- }
512
+ self->score = &smsc_score;
513
+ self->next = &smsc_next;
514
+ self->skip_to = &smsc_skip_to;
515
+ self->explain = &smsc_explain;
516
+ self->destroy = &smsc_destroy;
517
+ return self;
584
518
  }
585
519
 
586
- bool dssc_skip_to(Scorer *self, int doc_num)
520
+ /***************************************************************************
521
+ * ReqOptSumScorer
522
+ ***************************************************************************/
523
+
524
+ #define ROSSc(scorer) ((ReqOptSumScorer *)(scorer))
525
+
526
+ typedef struct ReqOptSumScorer
587
527
  {
588
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
589
- PriorityQueue *scorer_queue = dssc->scorer_queue;
590
- Scorer *top;
528
+ Scorer super;
529
+ Scorer *req_scorer;
530
+ Scorer *opt_scorer;
531
+ bool first_time_opt;
532
+ } ReqOptSumScorer;
591
533
 
592
- if (scorer_queue == NULL) {
593
- dssc_init_scorer_queue(dssc);
594
- scorer_queue = dssc->scorer_queue;
595
- }
534
+ static float rossc_score(Scorer *self)
535
+ {
536
+ ReqOptSumScorer *rossc = ROSSc(self);
537
+ Scorer *req_scorer = rossc->req_scorer;
538
+ Scorer *opt_scorer = rossc->opt_scorer;
539
+ int cur_doc = req_scorer->doc;
540
+ float req_score = req_scorer->score(req_scorer);
596
541
 
597
- if (scorer_queue->count < dssc->min_num_matches) {
598
- return false;
599
- }
600
- if (doc_num <= self->doc) {
601
- doc_num = self->doc + 1;
602
- }
603
- while (true) {
604
- top = pq_top(scorer_queue);
605
- if (top->doc >= doc_num) {
606
- return dssc_advance_after_current(self);
607
- } else if (top->skip_to(top, doc_num)) {
608
- pq_down(scorer_queue);
609
- } else {
610
- pq_pop(scorer_queue);
611
- if (scorer_queue->count < dssc->min_num_matches) {
612
- return false;
613
- }
542
+ if (rossc->first_time_opt) {
543
+ rossc->first_time_opt = false;
544
+ if (! opt_scorer->skip_to(opt_scorer, cur_doc)) {
545
+ SCORER_NULLIFY(rossc->opt_scorer);
546
+ return req_score;
547
+ }
548
+ }
549
+ else if (opt_scorer == NULL) {
550
+ return req_score;
614
551
  }
615
- }
552
+ else if ((opt_scorer->doc < cur_doc)
553
+ && ! opt_scorer->skip_to(opt_scorer, cur_doc)) {
554
+ SCORER_NULLIFY(rossc->opt_scorer);
555
+ return req_score;
556
+ }
557
+ /* assert (@opt_scorer != nil) and (@opt_scorer.doc() >= cur_doc) */
558
+ return (opt_scorer->doc == cur_doc)
559
+ ? req_score + opt_scorer->score(opt_scorer)
560
+ : req_score;
616
561
  }
617
562
 
618
- Explanation *dssc_explain(Scorer *self, int doc_num)
563
+ static bool rossc_next(Scorer *self)
619
564
  {
620
- int i;
621
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
622
- Scorer *sub_scorer;
623
- Explanation *e = expl_create(0.0,
624
- strfmt("At least %d of:", dssc->min_num_matches));
625
- for (i = 0; i < dssc->ss_cnt; i++) {
626
- sub_scorer = dssc->sub_scorers[i];
627
- expl_add_detail(e, sub_scorer->explain(sub_scorer, doc_num));
628
- }
629
- return e;
565
+ Scorer *req_scorer = ROSSc(self)->req_scorer;
566
+ if (req_scorer->next(req_scorer)) {
567
+ self->doc = req_scorer->doc;
568
+ return true;
569
+ }
570
+ return false;
630
571
  }
631
572
 
632
- void dssc_destroy(Scorer *self)
573
+ static bool rossc_skip_to(Scorer *self, int doc_num)
633
574
  {
634
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
635
- int i;
636
- for (i = 0; i < dssc->ss_cnt; i++) {
637
- dssc->sub_scorers[i]->destroy(dssc->sub_scorers[i]);
638
- }
639
- if (dssc->scorer_queue) pq_destroy(dssc->scorer_queue);
640
- scorer_destroy_i(self);
575
+ Scorer *req_scorer = ROSSc(self)->req_scorer;
576
+ if (req_scorer->skip_to(req_scorer, doc_num)) {
577
+ self->doc = req_scorer->doc;
578
+ return true;
579
+ }
580
+ return false;
641
581
  }
642
582
 
643
- Scorer *disjunction_sum_scorer_create(Scorer **sub_scorers, int ss_cnt,
644
- int min_num_matches)
583
+ static Explanation *rossc_explain(Scorer *self, int doc_num)
645
584
  {
646
- Scorer *self = scorer_create(NULL);
647
- DisjunctionSumScorer *dssc = ALLOC(DisjunctionSumScorer);
648
- self->data = dssc;
649
- dssc->ss_cnt = ss_cnt;
650
-
651
- // The document number of the current match.
652
- self->doc = -1;
653
- dssc->cum_score = -1.0;
585
+ Scorer *req_scorer = ROSSc(self)->req_scorer;
586
+ Scorer *opt_scorer = ROSSc(self)->opt_scorer;
654
587
 
655
- // The number of subscorers that provide the current match.
656
- dssc->num_matches = -1;
657
- dssc->coordinator = NULL;
658
-
659
- if (min_num_matches <= 0) {
660
- RAISE(ARG_ERROR, MIN_NUM_MATCHES_ERROR_MSG);
661
- }
662
- if (ss_cnt <= 1) {
663
- RAISE(ARG_ERROR, TWO_SUB_ERROR_MSG);
664
- }
665
-
666
- dssc->min_num_matches = min_num_matches;
667
- dssc->sub_scorers = sub_scorers;
668
-
669
- dssc->scorer_queue = NULL;
670
-
671
- self->score = &dssc_score;
672
- self->next = &dssc_next;
673
- self->skip_to = &dssc_skip_to;
674
- self->explain = &dssc_explain;
675
- self->destroy = &dssc_destroy;
676
-
677
- return self;
588
+ Explanation *e = expl_new(self->score(self),"required, optional:");
589
+ expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
590
+ expl_add_detail(e, opt_scorer->explain(opt_scorer, doc_num));
591
+ return e;
678
592
  }
679
593
 
680
- float cdssc_score(Scorer *self)
594
+ static void rossc_destroy(Scorer *self)
681
595
  {
682
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
683
- dssc->coordinator->num_matches += dssc->num_matches;
684
- return dssc->cum_score;
596
+ ReqOptSumScorer *rossc = ROSSc(self);
597
+ if (rossc->req_scorer) {
598
+ rossc->req_scorer->destroy(rossc->req_scorer);
599
+ }
600
+ if (rossc->opt_scorer) {
601
+ rossc->opt_scorer->destroy(rossc->opt_scorer);
602
+ }
603
+ scorer_destroy_i(self);
685
604
  }
686
605
 
687
- Scorer *counting_disjunction_sum_scorer_create(Coordinator *coordinator,
688
- Scorer **sub_scorers, int ss_cnt, int min_num_matches)
606
+
607
+ static Scorer *req_opt_sum_scorer_new(Scorer *req_scorer, Scorer *opt_scorer)
689
608
  {
690
- Scorer *self = disjunction_sum_scorer_create(
691
- sub_scorers, ss_cnt, min_num_matches);
692
- DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
693
- dssc->coordinator = coordinator;
694
- self->score = &cdssc_score;
695
- return self;
609
+ Scorer *self = scorer_new(ReqOptSumScorer, NULL);
610
+
611
+ ROSSc(self)->req_scorer = req_scorer;
612
+ ROSSc(self)->opt_scorer = opt_scorer;
613
+ ROSSc(self)->first_time_opt = true;
614
+
615
+ self->score = &rossc_score;
616
+ self->next = &rossc_next;
617
+ self->skip_to = &rossc_skip_to;
618
+ self->explain = &rossc_explain;
619
+ self->destroy = &rossc_destroy;
620
+
621
+ return self;
696
622
  }
697
623
 
698
624
  /***************************************************************************
699
- * ConjunctionScorer
625
+ * ReqExclScorer
700
626
  ***************************************************************************/
701
627
 
702
- void csc_sort_scorers(ConjunctionScorer *csc)
628
+ #define RXSc(scorer) ((ReqExclScorer *)(scorer))
629
+ typedef struct ReqExclScorer
703
630
  {
704
- qsort(csc->sub_scorers, csc->ss_cnt, sizeof(Scorer *), &scorer_doc_cmp);
705
- csc->first = 0;
706
- csc->last = csc->ss_cnt - 1;
707
- }
631
+ Scorer super;
632
+ Scorer *req_scorer;
633
+ Scorer *excl_scorer;
634
+ bool first_time;
635
+ } ReqExclScorer;
708
636
 
709
- void csc_init(Scorer *self, bool init_scorers)
637
+ static bool rxsc_to_non_excluded(Scorer *self)
710
638
  {
711
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
712
- Scorer *sub_scorer;
713
- int i;
639
+ Scorer *req_scorer = RXSc(self)->req_scorer;
640
+ Scorer *excl_scorer = RXSc(self)->excl_scorer;
641
+ int excl_doc = excl_scorer->doc, req_doc;
714
642
 
715
- /* compute coord factor */
716
- csc->coord = sim_coord(self->similarity, csc->ss_cnt, csc->ss_cnt);
717
-
718
- csc->more = (csc->ss_cnt > 0);
643
+ do {
644
+ /* may be excluded */
645
+ req_doc = req_scorer->doc;
646
+ if (req_doc < excl_doc) {
647
+ /* req_scorer advanced to before excl_scorer, ie. not excluded */
648
+ self->doc = req_doc;
649
+ return true;
650
+ }
651
+ else if (req_doc > excl_doc) {
652
+ if (! excl_scorer->skip_to(excl_scorer, req_doc)) {
653
+ /* emptied, no more exclusions */
654
+ SCORER_NULLIFY(RXSc(self)->excl_scorer);
655
+ self->doc = req_doc;
656
+ return true;
657
+ }
658
+ excl_doc = excl_scorer->doc;
659
+ if (excl_doc > req_doc) {
660
+ self->doc = req_doc;
661
+ return true; /* not excluded */
662
+ }
663
+ }
664
+ } while (req_scorer->next(req_scorer));
665
+ /* emptied, nothing left */
666
+ SCORER_NULLIFY(RXSc(self)->req_scorer);
667
+ return false;
668
+ }
719
669
 
720
- if (init_scorers) {
721
- // move each scorer to its first entry
670
+ static bool rxsc_next(Scorer *self)
671
+ {
672
+ ReqExclScorer *rxsc = RXSc(self);
673
+ Scorer *req_scorer = rxsc->req_scorer;
674
+ Scorer *excl_scorer = rxsc->excl_scorer;
722
675
 
723
- for (i = 0; i < csc->ss_cnt; i++) {
724
- sub_scorer = csc->sub_scorers[i];
725
- if (!csc->more) break;
726
- csc->more = sub_scorer->next(sub_scorer);
676
+ if (rxsc->first_time) {
677
+ if (! excl_scorer->next(excl_scorer)) {
678
+ /* emptied at start */
679
+ SCORER_NULLIFY(rxsc->excl_scorer);
680
+ excl_scorer = NULL;
681
+ }
682
+ rxsc->first_time = false;
683
+ }
684
+ if (req_scorer == NULL) {
685
+ return false;
727
686
  }
728
- if (csc->more) csc_sort_scorers(csc);
729
- }
687
+ if (! req_scorer->next(req_scorer)) {
688
+ /* emptied, nothing left */
689
+ SCORER_NULLIFY(rxsc->req_scorer);
690
+ return false;
691
+ }
692
+ if (excl_scorer == NULL) {
693
+ self->doc = req_scorer->doc;
694
+ /* req_scorer->next() already returned true */
695
+ return true;
696
+ }
697
+ return rxsc_to_non_excluded(self);
698
+ }
699
+
700
+ static bool rxsc_skip_to(Scorer *self, int doc_num)
701
+ {
702
+ ReqExclScorer *rxsc = RXSc(self);
703
+ Scorer *req_scorer = rxsc->req_scorer;
704
+ Scorer *excl_scorer = rxsc->excl_scorer;
730
705
 
731
- csc->first_time = false;
706
+ if (rxsc->first_time) {
707
+ rxsc->first_time = false;
708
+ if (! excl_scorer->skip_to(excl_scorer, doc_num)) {
709
+ /* emptied */
710
+ SCORER_NULLIFY(rxsc->excl_scorer);
711
+ excl_scorer = NULL;
712
+ }
713
+ }
714
+ if (req_scorer == NULL) {
715
+ return false;
716
+ }
717
+ if (excl_scorer == NULL) {
718
+ if (req_scorer->skip_to(req_scorer, doc_num)) {
719
+ self->doc = req_scorer->doc;
720
+ return true;
721
+ }
722
+ return false;
723
+ }
724
+ if (! req_scorer->skip_to(req_scorer, doc_num)) {
725
+ SCORER_NULLIFY(rxsc->req_scorer);
726
+ return false;
727
+ }
728
+ return rxsc_to_non_excluded(self);
732
729
  }
733
730
 
734
- float csc_score(Scorer *self)
731
+ static float rxsc_score(Scorer *self)
735
732
  {
736
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
737
- Scorer *sub_scorer;
738
- float score = 0.0; // sum scores
739
- int i;
740
- for (i = 0; i < csc->ss_cnt; i++) {
741
- sub_scorer = csc->sub_scorers[i];
742
- score += sub_scorer->score(sub_scorer);
743
- }
744
- score *= csc->coord;
745
- return score;
733
+ Scorer *req_scorer = RXSc(self)->req_scorer;
734
+ return req_scorer->score(req_scorer);
746
735
  }
747
736
 
748
- bool csc_do_next(Scorer *self)
737
+ static Explanation *rxsc_explain(Scorer *self, int doc_num)
749
738
  {
750
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
751
- Scorer *first = csc->sub_scorers[csc->first];
752
- Scorer *last = csc->sub_scorers[csc->last];
739
+ ReqExclScorer *rxsc = RXSc(self);
740
+ Scorer *req_scorer = rxsc->req_scorer;
741
+ Scorer *excl_scorer = rxsc->excl_scorer;
742
+ Explanation *e;
753
743
 
754
- // find doc w/ all clauses
755
- while (csc->more && (first->doc < last->doc)) {
756
- csc->more = first->skip_to(first, last->doc); // skip first upto last
757
- // move first to last
758
- csc->last = csc->first;
759
- last = first;
760
- csc->first = (csc->first + 1) % csc->ss_cnt;
761
- first = csc->sub_scorers[csc->first];
762
- }
763
- self->doc = first->doc;
764
- return csc->more;
744
+ if (excl_scorer->skip_to(excl_scorer, doc_num)
745
+ && excl_scorer->doc == doc_num) {
746
+ e = expl_new(0.0, "excluded:");
747
+ }
748
+ else {
749
+ e = expl_new(0.0, "not excluded:");
750
+ expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
751
+ }
752
+ return e;
765
753
  }
766
754
 
767
- bool csc_next(Scorer *self)
755
+ static void rxsc_destroy(Scorer *self)
768
756
  {
769
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
770
- Scorer *sub_scorer;
771
- if (csc->first_time) {
772
- csc_init(self, true);
773
- } else if (csc->more) {
774
- sub_scorer = csc->sub_scorers[csc->last];
775
- csc->more = sub_scorer->next(sub_scorer); // trigger further scanning
776
- }
777
- return csc_do_next(self);
757
+ ReqExclScorer *rxsc = RXSc(self);
758
+ if (rxsc->req_scorer) {
759
+ rxsc->req_scorer->destroy(rxsc->req_scorer);
760
+ }
761
+ if (rxsc->excl_scorer) {
762
+ rxsc->excl_scorer->destroy(rxsc->excl_scorer);
763
+ }
764
+ scorer_destroy_i(self);
778
765
  }
779
766
 
780
- bool csc_skip_to(Scorer *self, int doc_num)
767
+ static Scorer *req_excl_scorer_new(Scorer *req_scorer, Scorer *excl_scorer)
781
768
  {
782
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
783
- Scorer *sub_scorer;
784
- int i;
769
+ Scorer *self = scorer_new(ReqExclScorer, NULL);
770
+ RXSc(self)->req_scorer = req_scorer;
771
+ RXSc(self)->excl_scorer = excl_scorer;
772
+ RXSc(self)->first_time = true;
785
773
 
786
- if (csc->first_time) {
787
- csc_init(self, true);
788
- }
774
+ self->score = &rxsc_score;
775
+ self->next = &rxsc_next;
776
+ self->skip_to = &rxsc_skip_to;
777
+ self->explain = &rxsc_explain;
778
+ self->destroy = &rxsc_destroy;
789
779
 
790
- for (i = 0; i < csc->ss_cnt; i++) {
791
- if (!csc->more) break;
792
- sub_scorer = csc->sub_scorers[i];
793
- csc->more = sub_scorer->skip_to(sub_scorer, doc_num);
794
- }
795
- if (csc->more) csc_sort_scorers(csc); // resort the scorers
796
-
797
- return csc_do_next(self);
780
+ return self;
798
781
  }
799
782
 
800
- void csc_destroy(Scorer *self)
783
+ /***************************************************************************
784
+ * NonMatchScorer
785
+ ***************************************************************************/
786
+
787
+ static float nmsc_score(Scorer *self)
801
788
  {
802
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
803
- int i;
804
- for (i = 0; i < csc->ss_cnt; i++) {
805
- csc->sub_scorers[i]->destroy(csc->sub_scorers[i]);
806
- }
807
- free(csc->sub_scorers);
808
- scorer_destroy_i(self);
789
+ (void)self;
790
+ return 0.0;
809
791
  }
810
792
 
811
- Scorer *conjunction_scorer_create(Similarity *similarity)
793
+ static bool nmsc_next(Scorer *self)
812
794
  {
813
- Scorer *self = scorer_create(similarity);
814
- ConjunctionScorer *csc = ALLOC(ConjunctionScorer);
815
- ZEROSET(csc, ConjunctionScorer, 1);
816
- self->data = csc;
817
- csc->first_time = true;
818
- csc->more = true;
819
- csc->coordinator = NULL;
820
-
821
- self->score = &csc_score;
822
- self->next = &csc_next;
823
- self->skip_to = &csc_skip_to;
824
- self->destroy = &csc_destroy;
825
-
826
- return self;
795
+ (void)self;
796
+ return false;
827
797
  }
828
798
 
829
- float ccsc_score(Scorer *self)
799
+ static bool nmsc_skip_to(Scorer *self, int doc_num)
830
800
  {
831
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
832
-
833
- int doc;
834
- if ((doc = self->doc) > csc->last_scored_doc) {
835
- csc->last_scored_doc = doc;
836
- csc->coordinator->num_matches += csc->ss_cnt;
837
- }
838
-
839
- return csc_score(self);
801
+ (void)self; (void)doc_num;
802
+ return false;
840
803
  }
841
804
 
842
- Scorer *counting_conjunction_sum_scorer_create(Coordinator *coordinator,
843
- Scorer **sub_scorers, int ss_cnt)
805
+ static Explanation *nmsc_explain(Scorer *self, int doc_num)
844
806
  {
845
- Scorer *self = conjunction_scorer_create(sim_create_default());
846
- ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
847
- csc->coordinator = coordinator;
848
- csc->last_scored_doc = -1;
849
- csc->sub_scorers = ALLOC_N(Scorer *, ss_cnt);
850
- memcpy(csc->sub_scorers, sub_scorers, sizeof(Scorer *) * ss_cnt);
851
- csc->ss_capa = csc->ss_cnt = ss_cnt;
807
+ (void)self; (void)doc_num;
808
+ return expl_new(0.0, "No documents matched");
809
+ }
852
810
 
853
- self->score = &ccsc_score;
811
+ static Scorer *non_matching_scorer_new()
812
+ {
813
+ Scorer *self = scorer_new(Scorer, NULL);
814
+ self->score = &nmsc_score;
815
+ self->next = &nmsc_next;
816
+ self->skip_to = &nmsc_skip_to;
817
+ self->explain = &nmsc_explain;
854
818
 
855
- return self;
819
+ return self;
856
820
  }
857
821
 
858
822
  /***************************************************************************
859
- * SingleMatchScorer
823
+ * BooleanScorer
860
824
  ***************************************************************************/
861
825
 
862
- float smsc_score(Scorer *self)
826
+ #define BSc(scorer) ((BooleanScorer *)(scorer))
827
+ typedef struct BooleanScorer
828
+ {
829
+ Scorer super;
830
+ Scorer **required_scorers;
831
+ int rs_cnt;
832
+ int rs_capa;
833
+ Scorer **optional_scorers;
834
+ int os_cnt;
835
+ int os_capa;
836
+ Scorer **prohibited_scorers;
837
+ int ps_cnt;
838
+ int ps_capa;
839
+ Scorer *counting_sum_scorer;
840
+ Coordinator *coordinator;
841
+ } BooleanScorer;
842
+
843
+ static Scorer *counting_sum_scorer_create3(BooleanScorer *bsc,
844
+ Scorer *req_scorer,
845
+ Scorer *opt_scorer)
863
846
  {
864
- SingleMatchScorer *smsc = (SingleMatchScorer *)self->data;
865
- smsc->coordinator->num_matches++;
866
- return smsc->scorer->score(smsc->scorer);
847
+ if (bsc->ps_cnt == 0) {
848
+ /* no prohibited */
849
+ return req_opt_sum_scorer_new(req_scorer, opt_scorer);
850
+ }
851
+ else if (bsc->ps_cnt == 1) {
852
+ /* 1 prohibited */
853
+ return req_opt_sum_scorer_new(
854
+ req_excl_scorer_new(req_scorer, bsc->prohibited_scorers[0]),
855
+ opt_scorer);
856
+ }
857
+ else {
858
+ /* more prohibited */
859
+ return req_opt_sum_scorer_new(
860
+ req_excl_scorer_new(
861
+ req_scorer,
862
+ disjunction_sum_scorer_new(bsc->prohibited_scorers,
863
+ bsc->ps_cnt, 1)),
864
+ opt_scorer);
865
+ }
867
866
  }
868
867
 
869
- bool smsc_next(Scorer *self)
868
+ static Scorer *counting_sum_scorer_create2(BooleanScorer *bsc,
869
+ Scorer *req_scorer,
870
+ Scorer **optional_scorers,
871
+ int os_cnt)
870
872
  {
871
- Scorer *scorer = ((SingleMatchScorer *)self->data)->scorer;
872
- if (scorer->next(scorer)) {
873
- self->doc = scorer->doc;
874
- return true;
875
- }
876
- return false;
873
+ if (os_cnt == 0) {
874
+ if (bsc->ps_cnt == 0) {
875
+ return req_scorer;
876
+ }
877
+ else if (bsc->ps_cnt == 1) {
878
+ return req_excl_scorer_new(req_scorer,
879
+ bsc->prohibited_scorers[0]);
880
+ }
881
+ else {
882
+ /* no optional, more than 1 prohibited */
883
+ return req_excl_scorer_new(
884
+ req_scorer,
885
+ disjunction_sum_scorer_new(bsc->prohibited_scorers,
886
+ bsc->ps_cnt, 1));
887
+ }
888
+ }
889
+ else if (os_cnt == 1) {
890
+ return counting_sum_scorer_create3(
891
+ bsc,
892
+ req_scorer,
893
+ single_match_scorer_new(bsc->coordinator, optional_scorers[0]));
894
+ }
895
+ else {
896
+ /* more optional */
897
+ return counting_sum_scorer_create3(
898
+ bsc,
899
+ req_scorer,
900
+ counting_disjunction_sum_scorer_new(bsc->coordinator,
901
+ optional_scorers, os_cnt, 1));
902
+ }
877
903
  }
878
904
 
879
- bool smsc_skip_to(Scorer *self, int doc_num)
905
+ static Scorer *counting_sum_scorer_create(BooleanScorer *bsc)
880
906
  {
881
- Scorer *scorer = ((SingleMatchScorer *)self->data)->scorer;
882
- if (scorer->skip_to(scorer, doc_num)) {
883
- self->doc = scorer->doc;
884
- return true;
885
- }
886
- return false;
907
+ if (bsc->rs_cnt == 0) {
908
+ if (bsc->os_cnt == 0) {
909
+ int i;
910
+ /* only prohibited scorers so return non_matching scorer */
911
+ for (i = 0; i < bsc->ps_cnt; i++) {
912
+ bsc->prohibited_scorers[i]->destroy(
913
+ bsc->prohibited_scorers[i]);
914
+ }
915
+ return non_matching_scorer_new();
916
+ }
917
+ else if (bsc->os_cnt == 1) {
918
+ /* the only optional scorer is required */
919
+ return counting_sum_scorer_create2(
920
+ bsc,
921
+ single_match_scorer_new(bsc->coordinator,
922
+ bsc->optional_scorers[0]),
923
+ NULL, 0); /* no optional scorers left */
924
+ }
925
+ else {
926
+ /* more than 1 optional_scorers, no required scorers */
927
+ return counting_sum_scorer_create2(
928
+ bsc,
929
+ counting_disjunction_sum_scorer_new(bsc->coordinator,
930
+ bsc->optional_scorers,
931
+ bsc->os_cnt, 1),
932
+ NULL, 0); /* no optional scorers left */
933
+ }
934
+ }
935
+ else if (bsc->rs_cnt == 1) {
936
+ /* 1 required */
937
+ return counting_sum_scorer_create2(
938
+ bsc,
939
+ single_match_scorer_new(bsc->coordinator, bsc->required_scorers[0]),
940
+ bsc->optional_scorers, bsc->os_cnt);
941
+ }
942
+ else {
943
+ /* more required scorers */
944
+ return counting_sum_scorer_create2(
945
+ bsc,
946
+ counting_conjunction_sum_scorer_new(bsc->coordinator,
947
+ bsc->required_scorers,
948
+ bsc->rs_cnt),
949
+ bsc->optional_scorers, bsc->os_cnt);
950
+ }
887
951
  }
888
952
 
889
- Explanation *smsc_explain(Scorer *self, int doc_num)
953
+ static Scorer *bsc_init_counting_sum_scorer(BooleanScorer *bsc)
890
954
  {
891
- Scorer *scorer = ((SingleMatchScorer *)self->data)->scorer;
892
- return scorer->explain(scorer, doc_num);
955
+ coord_init(bsc->coordinator);
956
+ return bsc->counting_sum_scorer = counting_sum_scorer_create(bsc);
893
957
  }
894
958
 
895
- void smsc_destroy(Scorer *self)
959
+ static void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur)
896
960
  {
897
- Scorer *scorer = ((SingleMatchScorer *)self->data)->scorer;
898
- scorer->destroy(scorer);
899
- scorer_destroy_i(self);
961
+ BooleanScorer *bsc = BSc(self);
962
+ if (occur != BC_MUST_NOT) {
963
+ bsc->coordinator->max_coord++;
964
+ }
965
+
966
+ switch (occur) {
967
+ case BC_MUST:
968
+ RECAPA(bsc, rs_cnt, rs_capa, required_scorers, Scorer *);
969
+ bsc->required_scorers[bsc->rs_cnt++] = scorer;
970
+ break;
971
+ case BC_SHOULD:
972
+ RECAPA(bsc, os_cnt, os_capa, optional_scorers, Scorer *);
973
+ bsc->optional_scorers[bsc->os_cnt++] = scorer;
974
+ break;
975
+ case BC_MUST_NOT:
976
+ RECAPA(bsc, ps_cnt, ps_capa, prohibited_scorers, Scorer *);
977
+ bsc->prohibited_scorers[bsc->ps_cnt++] = scorer;
978
+ break;
979
+ default:
980
+ RAISE(ARG_ERROR, "Invalid value for :occur. Try :should, :must or "
981
+ ":must_not instead");
982
+ }
900
983
  }
901
984
 
902
- Scorer *single_match_scorer_create(Coordinator *coordinator, Scorer *scorer)
985
+ static float bsc_score(Scorer *self)
903
986
  {
904
- Scorer *self = scorer_create(scorer->similarity);
905
- SingleMatchScorer *smsc = ALLOC(SingleMatchScorer);
906
- smsc->coordinator = coordinator;
907
- smsc->scorer = scorer;
908
- self->data = smsc;
909
-
910
- self->score = &smsc_score;
911
- self->next = &smsc_next;
912
- self->skip_to = &smsc_skip_to;
913
- self->explain = &smsc_explain;
914
- self->destroy = &smsc_destroy;
915
- return self;
987
+ BooleanScorer *bsc = BSc(self);
988
+ Coordinator *coord = bsc->coordinator;
989
+ float sum;
990
+ coord->num_matches = 0;
991
+ sum = bsc->counting_sum_scorer->score(bsc->counting_sum_scorer);
992
+ return sum * coord->coord_factors[coord->num_matches];
916
993
  }
917
994
 
918
- /***************************************************************************
919
- * ReqOptSumScorer
920
- ***************************************************************************/
995
+ static bool bsc_next(Scorer *self)
996
+ {
997
+ Scorer *cnt_sum_sc = BSc(self)->counting_sum_scorer;
921
998
 
922
- float rossc_score(Scorer *self)
923
- {
924
- ReqOptSumScorer *rossc = (ReqOptSumScorer *)self->data;
925
- Scorer *req_scorer = rossc->req_scorer;
926
- Scorer *opt_scorer = rossc->opt_scorer;
927
- int cur_doc = req_scorer->doc;
928
- float req_score = req_scorer->score(req_scorer);
929
-
930
- if (rossc->first_time_opt) {
931
- rossc->first_time_opt = false;
932
- if (! opt_scorer->skip_to(opt_scorer, cur_doc)) {
933
- SCORER_NULLIFY(rossc->opt_scorer);
934
- return req_score;
935
- }
936
- } else if (opt_scorer == NULL) {
937
- return req_score;
938
- } else if ((opt_scorer->doc < cur_doc) &&
939
- ! opt_scorer->skip_to(opt_scorer, cur_doc)) {
940
- SCORER_NULLIFY(rossc->opt_scorer);
941
- return req_score;
942
- }
943
- // assert (@opt_scorer != nil) and (@opt_scorer.doc() >= cur_doc)
944
- return (opt_scorer->doc == cur_doc)
945
- ? req_score + opt_scorer->score(opt_scorer)
946
- : req_score;
947
- }
948
-
949
- bool rossc_next(Scorer *self)
950
- {
951
- Scorer *req_scorer = ((ReqOptSumScorer *)self->data)->req_scorer;
952
- if (req_scorer->next(req_scorer)) {
953
- self->doc = req_scorer->doc;
954
- return true;
955
- }
956
- return false;
999
+ if (!cnt_sum_sc) {
1000
+ cnt_sum_sc = bsc_init_counting_sum_scorer(BSc(self));
1001
+ }
1002
+ if (cnt_sum_sc->next(cnt_sum_sc)) {
1003
+ self->doc = cnt_sum_sc->doc;
1004
+ return true;
1005
+ }
1006
+ else {
1007
+ return false;
1008
+ }
957
1009
  }
958
1010
 
959
- bool rossc_skip_to(Scorer *self, int doc_num)
1011
+ static bool bsc_skip_to(Scorer *self, int doc_num)
960
1012
  {
961
- Scorer *req_scorer = ((ReqOptSumScorer *)self->data)->req_scorer;
962
- if (req_scorer->skip_to(req_scorer, doc_num)) {
963
- self->doc = req_scorer->doc;
964
- return true;
965
- }
966
- return false;
1013
+ Scorer *cnt_sum_sc = BSc(self)->counting_sum_scorer;
1014
+
1015
+ if (!BSc(self)->counting_sum_scorer) {
1016
+ cnt_sum_sc = bsc_init_counting_sum_scorer(BSc(self));
1017
+ }
1018
+ if (cnt_sum_sc->skip_to(cnt_sum_sc, doc_num)) {
1019
+ self->doc = cnt_sum_sc->doc;
1020
+ return true;
1021
+ }
1022
+ else {
1023
+ return false;
1024
+ }
967
1025
  }
968
1026
 
969
- Explanation *rossc_explain(Scorer *self, int doc_num)
1027
+ static void bsc_destroy(Scorer *self)
970
1028
  {
971
- ReqOptSumScorer *rossc = (ReqOptSumScorer *)self->data;
972
- Scorer *req_scorer = rossc->req_scorer;
973
- Scorer *opt_scorer = rossc->opt_scorer;
1029
+ BooleanScorer *bsc = BSc(self);
1030
+ Coordinator *coord = bsc->coordinator;
1031
+
1032
+ free(coord->coord_factors);
1033
+ free(coord);
1034
+
1035
+ if (bsc->counting_sum_scorer) {
1036
+ bsc->counting_sum_scorer->destroy(bsc->counting_sum_scorer);
1037
+ }
1038
+ else {
1039
+ int i;
1040
+ for (i = 0; i < bsc->rs_cnt; i++) {
1041
+ bsc->required_scorers[i]->destroy(bsc->required_scorers[i]);
1042
+ }
1043
+
1044
+ for (i = 0; i < bsc->os_cnt; i++) {
1045
+ bsc->optional_scorers[i]->destroy(bsc->optional_scorers[i]);
1046
+ }
974
1047
 
975
- Explanation *e = expl_create(self->score(self), estrdup("required, optional:"));
976
- expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
977
- expl_add_detail(e, opt_scorer->explain(opt_scorer, doc_num));
978
- return e;
1048
+ for (i = 0; i < bsc->ps_cnt; i++) {
1049
+ bsc->prohibited_scorers[i]->destroy(bsc->prohibited_scorers[i]);
1050
+ }
1051
+ }
1052
+ free(bsc->required_scorers);
1053
+ free(bsc->optional_scorers);
1054
+ free(bsc->prohibited_scorers);
1055
+ scorer_destroy_i(self);
979
1056
  }
980
1057
 
981
- void rossc_destroy(Scorer *self)
1058
+ static Explanation *bsc_explain(Scorer *self, int doc_num)
982
1059
  {
983
- ReqOptSumScorer *rossc = (ReqOptSumScorer *)self->data;
984
- if (rossc->req_scorer) rossc->req_scorer->destroy(rossc->req_scorer);
985
- if (rossc->opt_scorer) rossc->opt_scorer->destroy(rossc->opt_scorer);
986
- scorer_destroy_i(self);
1060
+ (void)self; (void)doc_num;
1061
+ return expl_new(0.0, "This explanation is not supported");
987
1062
  }
988
1063
 
989
-
990
- Scorer *req_opt_sum_scorer_create(Scorer *req_scorer, Scorer *opt_scorer)
1064
+ static Scorer *bsc_new(Similarity *similarity)
991
1065
  {
992
- Scorer *self = scorer_create(NULL);
993
- ReqOptSumScorer *rossc = ALLOC(ReqOptSumScorer);
994
- self->data = rossc;
995
- rossc->req_scorer = req_scorer;
996
- rossc->opt_scorer = opt_scorer;
997
- rossc->first_time_opt = true;
998
-
999
- self->score = &rossc_score;
1000
- self->next = &rossc_next;
1001
- self->skip_to = &rossc_skip_to;
1002
- self->explain = &rossc_explain;
1003
- self->destroy = &rossc_destroy;
1066
+ Scorer *self = scorer_new(BooleanScorer, similarity);
1067
+ BSc(self)->coordinator = coord_new(similarity);
1068
+ BSc(self)->counting_sum_scorer = NULL;
1004
1069
 
1005
- return self;
1070
+ self->score = &bsc_score;
1071
+ self->next = &bsc_next;
1072
+ self->skip_to = &bsc_skip_to;
1073
+ self->explain = &bsc_explain;
1074
+ self->destroy = &bsc_destroy;
1075
+ return self;
1006
1076
  }
1007
1077
 
1008
1078
  /***************************************************************************
1009
- * ReqExclScorer
1079
+ *
1080
+ * BooleanWeight
1081
+ *
1010
1082
  ***************************************************************************/
1011
1083
 
1012
- bool rxsc_to_non_excluded(Scorer *self)
1013
- {
1014
- ReqExclScorer *rxsc = (ReqExclScorer *)self->data;
1015
- Scorer *req_scorer = rxsc->req_scorer;
1016
- Scorer *excl_scorer = rxsc->excl_scorer;
1017
- int excl_doc = excl_scorer->doc, req_doc;
1018
-
1019
- do {
1020
- req_doc = req_scorer->doc; // may be excluded
1021
- if (req_doc < excl_doc) {
1022
- // req_scorer advanced to before excl_scorer, ie. not excluded
1023
- self->doc = req_doc;
1024
- return true;
1025
- } else if (req_doc > excl_doc) {
1026
- if (! excl_scorer->skip_to(excl_scorer, req_doc)) {
1027
- SCORER_NULLIFY(rxsc->excl_scorer); // exhausted, no more exclusions
1028
- self->doc = req_doc;
1029
- return true;
1030
- }
1031
- excl_doc = excl_scorer->doc;
1032
- if (excl_doc > req_doc) {
1033
- self->doc = req_doc;
1034
- return true; // not excluded
1035
- }
1084
+ typedef struct BooleanWeight
1085
+ {
1086
+ Weight w;
1087
+ Weight **weights;
1088
+ int w_cnt;
1089
+ } BooleanWeight;
1090
+
1091
+
1092
+ static float bw_sum_of_squared_weights(Weight *self)
1093
+ {
1094
+ BooleanQuery *bq = BQ(self->query);
1095
+ float sum = 0.0;
1096
+ int i;
1097
+
1098
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1099
+ if (! bq->clauses[i]->is_prohibited) {
1100
+ Weight *weight = BW(self)->weights[i];
1101
+ /* sum sub-weights */
1102
+ sum += weight->sum_of_squared_weights(weight);
1103
+ }
1036
1104
  }
1037
- } while (req_scorer->next(req_scorer));
1038
- SCORER_NULLIFY(rxsc->req_scorer); // exhausted, nothing left
1039
- return false;
1105
+
1106
+ /* boost each sub-weight */
1107
+ sum *= self->value * self->value;
1108
+ return sum;
1040
1109
  }
1041
1110
 
1042
- bool rxsc_next(Scorer *self)
1111
+ static void bw_normalize(Weight *self, float normalization_factor)
1043
1112
  {
1044
- ReqExclScorer *rxsc = (ReqExclScorer *)self->data;
1045
- Scorer *req_scorer = rxsc->req_scorer;
1046
- Scorer *excl_scorer = rxsc->excl_scorer;
1113
+ BooleanQuery *bq = BQ(self->query);
1114
+ int i;
1047
1115
 
1048
- if (rxsc->first_time) {
1049
- if (! excl_scorer->next(excl_scorer)) {
1050
- SCORER_NULLIFY(rxsc->excl_scorer); // exhausted at start
1051
- excl_scorer = NULL;
1116
+ normalization_factor *= self->value; /* multiply by query boost */
1117
+
1118
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1119
+ if (! bq->clauses[i]->is_prohibited) {
1120
+ Weight *weight = BW(self)->weights[i];
1121
+ /* sum sub-weights */
1122
+ weight->normalize(weight, normalization_factor);
1123
+ }
1052
1124
  }
1053
- rxsc->first_time = false;
1054
- }
1055
- if (req_scorer == NULL) {
1056
- return false;
1057
- }
1058
- if (! req_scorer->next(req_scorer)) {
1059
- SCORER_NULLIFY(rxsc->req_scorer); // exhausted, nothing left
1060
- return false;
1061
- }
1062
- if (excl_scorer == NULL) {
1063
- self->doc = req_scorer->doc;
1064
- return true; // req_scorer->next() already returned true
1065
- }
1066
- return rxsc_to_non_excluded(self);
1067
1125
  }
1068
1126
 
1069
- bool rxsc_skip_to(Scorer *self, int doc_num)
1127
+ static Scorer *bw_scorer(Weight *self, IndexReader *ir)
1070
1128
  {
1071
- ReqExclScorer *rxsc = (ReqExclScorer *)self->data;
1072
- Scorer *req_scorer = rxsc->req_scorer;
1073
- Scorer *excl_scorer = rxsc->excl_scorer;
1129
+ Scorer *bsc = bsc_new(self->similarity);
1130
+ BooleanQuery *bq = BQ(self->query);
1131
+ int i;
1074
1132
 
1075
- if (rxsc->first_time) {
1076
- rxsc->first_time = false;
1077
- if (! excl_scorer->skip_to(excl_scorer, doc_num)) {
1078
- SCORER_NULLIFY(rxsc->excl_scorer); // exhausted
1079
- excl_scorer = NULL;
1080
- }
1081
- }
1082
- if (req_scorer == NULL) {
1083
- return false;
1084
- }
1085
- if (excl_scorer == NULL) {
1086
- if (req_scorer->skip_to(req_scorer, doc_num)) {
1087
- self->doc = req_scorer->doc;
1088
- return true;
1133
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1134
+ BooleanClause *clause = bq->clauses[i];
1135
+ Weight *weight = BW(self)->weights[i];
1136
+ Scorer *sub_scorer = weight->scorer(weight, ir);
1137
+ if (sub_scorer) {
1138
+ bsc_add_scorer(bsc, sub_scorer, clause->occur);
1139
+ }
1140
+ else if (clause->is_required) {
1141
+ bsc->destroy(bsc);
1142
+ return NULL;
1143
+ }
1089
1144
  }
1090
- return false;
1091
- }
1092
- if (! req_scorer->skip_to(req_scorer, doc_num)) {
1093
- SCORER_NULLIFY(rxsc->req_scorer);
1094
- return false;
1095
- }
1096
- return rxsc_to_non_excluded(self);
1145
+
1146
+ return bsc;
1097
1147
  }
1098
1148
 
1099
- float rxsc_score(Scorer *self)
1149
+ static char *bw_to_s(Weight *self)
1100
1150
  {
1101
- Scorer *req_scorer = ((ReqExclScorer *)self->data)->req_scorer;
1102
- return req_scorer->score(req_scorer);
1151
+ return strfmt("BooleanWeight(%f)", self->value);
1103
1152
  }
1104
1153
 
1105
- Explanation *rxsc_explain(Scorer *self, int doc_num)
1154
+ static void bw_destroy(Weight *self)
1106
1155
  {
1107
- ReqExclScorer *rxsc = (ReqExclScorer *)self->data;
1108
- Scorer *req_scorer = rxsc->req_scorer;
1109
- Scorer *excl_scorer = rxsc->excl_scorer;
1156
+ int i;
1110
1157
 
1111
- Explanation *e;
1112
- if (excl_scorer->skip_to(excl_scorer, doc_num) && excl_scorer->doc == doc_num) {
1113
- e = expl_create(0.0, estrdup("excluded:"));
1114
- } else {
1115
- e = expl_create(0.0, estrdup("not excluded:"));
1116
- expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
1117
- }
1118
- return e;
1158
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1159
+ BW(self)->weights[i]->destroy(BW(self)->weights[i]);
1160
+ }
1161
+
1162
+ free(BW(self)->weights);
1163
+ w_destroy(self);
1119
1164
  }
1120
1165
 
1121
- void rxsc_destroy(Scorer *self)
1166
+ static Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
1122
1167
  {
1123
- ReqExclScorer *rxsc = (ReqExclScorer *)self->data;
1124
- if (rxsc->req_scorer) rxsc->req_scorer->destroy(rxsc->req_scorer);
1125
- if (rxsc->excl_scorer) rxsc->excl_scorer->destroy(rxsc->excl_scorer);
1126
- scorer_destroy_i(self);
1168
+ BooleanQuery *bq = BQ(self->query);
1169
+ Explanation *sum_expl = expl_new(0.0, "sum of:");
1170
+ Explanation *explanation;
1171
+ int coord = 0;
1172
+ int max_coord = 0;
1173
+ float coord_factor = 0.0;
1174
+ float sum = 0.0;
1175
+ int i;
1176
+
1177
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1178
+ Weight *weight = BW(self)->weights[i];
1179
+ BooleanClause *clause = bq->clauses[i];
1180
+ explanation = weight->explain(weight, ir, doc_num);
1181
+ if (!clause->is_prohibited) {
1182
+ max_coord++;
1183
+ }
1184
+ if (explanation->value > 0.0) {
1185
+ if (!clause->is_prohibited) {
1186
+ expl_add_detail(sum_expl, explanation);
1187
+ sum += explanation->value;
1188
+ coord++;
1189
+ }
1190
+ else {
1191
+ expl_destroy(explanation);
1192
+ expl_destroy(sum_expl);
1193
+ return expl_new(0.0, "match prohibited");
1194
+ }
1195
+ }
1196
+ else if (clause->is_required) {
1197
+ expl_destroy(explanation);
1198
+ expl_destroy(sum_expl);
1199
+ return expl_new(0.0, "match required");
1200
+ }
1201
+ else {
1202
+ expl_destroy(explanation);
1203
+ }
1204
+ }
1205
+ sum_expl->value = sum;
1206
+
1207
+ if (coord == 1) { /* only one clause matched */
1208
+ explanation = sum_expl; /* eliminate wrapper */
1209
+ ary_size(sum_expl->details) = 0;
1210
+ sum_expl = sum_expl->details[0];
1211
+ expl_destroy(explanation);
1212
+ }
1213
+
1214
+ coord_factor = sim_coord(self->similarity, coord, max_coord);
1215
+
1216
+ if (coord_factor == 1.0) { /* coord is no-op */
1217
+ return sum_expl; /* eliminate wrapper */
1218
+ }
1219
+ else {
1220
+ explanation = expl_new(sum * coord_factor, "product of:");
1221
+ expl_add_detail(explanation, sum_expl);
1222
+ expl_add_detail(explanation, expl_new(coord_factor, "coord(%d/%d)",
1223
+ coord, max_coord));
1224
+ return explanation;
1225
+ }
1127
1226
  }
1128
1227
 
1129
- Scorer *req_excl_scorer_create(Scorer *req_scorer, Scorer *excl_scorer)
1228
+ static Weight *bw_new(Query *query, Searcher *searcher)
1130
1229
  {
1131
- Scorer *self = scorer_create(NULL);
1132
- ReqExclScorer *rxsc = ALLOC(ReqExclScorer);
1133
- self->data = rxsc;
1134
- rxsc->req_scorer = req_scorer;
1135
- rxsc->excl_scorer = excl_scorer;
1136
- rxsc->first_time = true;
1137
-
1138
- self->score = &rxsc_score;
1139
- self->next = &rxsc_next;
1140
- self->skip_to = &rxsc_skip_to;
1141
- self->explain = &rxsc_explain;
1142
- self->destroy = &rxsc_destroy;
1230
+ int i;
1231
+ Weight *self = w_new(BooleanWeight, query);
1232
+
1233
+ BW(self)->w_cnt = BQ(query)->clause_cnt;
1234
+ BW(self)->weights = ALLOC_N(Weight *, BW(self)->w_cnt);
1235
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1236
+ BW(self)->weights[i] = q_weight(BQ(query)->clauses[i]->query, searcher);
1237
+ }
1143
1238
 
1144
- return self;
1239
+ self->normalize = &bw_normalize;
1240
+ self->scorer = &bw_scorer;
1241
+ self->explain = &bw_explain;
1242
+ self->to_s = &bw_to_s;
1243
+ self->destroy = &bw_destroy;
1244
+ self->sum_of_squared_weights = &bw_sum_of_squared_weights;
1245
+
1246
+ self->similarity = query->get_similarity(query, searcher);
1247
+ self->value = query->boost;
1248
+
1249
+ return self;
1145
1250
  }
1146
1251
 
1147
1252
  /***************************************************************************
1148
- * NonMatchScorer
1253
+ *
1254
+ * BooleanClause
1255
+ *
1149
1256
  ***************************************************************************/
1150
1257
 
1151
- float nmsc_score(Scorer *self)
1152
- {
1153
- return 0.0;
1258
+ void bc_set_occur(BooleanClause *self, enum BC_TYPE occur)
1259
+ {
1260
+ self->occur = occur;
1261
+ switch (occur) {
1262
+ case BC_SHOULD:
1263
+ self->is_prohibited = false;
1264
+ self->is_required = false;
1265
+ break;
1266
+ case BC_MUST:
1267
+ self->is_prohibited = false;
1268
+ self->is_required = true;
1269
+ break;
1270
+ case BC_MUST_NOT:
1271
+ self->is_prohibited = true;
1272
+ self->is_required = false;
1273
+ break;
1274
+ default:
1275
+ RAISE(ARG_ERROR, "Invalid value for :occur. Try :occur => :should, "
1276
+ ":must or :must_not instead");
1277
+ }
1154
1278
  }
1155
1279
 
1156
- bool nmsc_next(Scorer *self)
1280
+ void bc_deref(BooleanClause *self)
1157
1281
  {
1158
- return false;
1282
+ if (--self->ref_cnt <= 0) {
1283
+ q_deref(self->query);
1284
+ free(self);
1285
+ }
1159
1286
  }
1160
1287
 
1161
- bool nmsc_skip_to(Scorer *self, int doc_num)
1288
+ static ulong bc_hash(BooleanClause *self)
1162
1289
  {
1163
- return false;
1290
+ return ((q_hash(self->query) << 2) | self->occur);
1164
1291
  }
1165
1292
 
1166
- Explanation *nmsc_explain(Scorer *self, int doc_num)
1293
+ static int bc_eq(BooleanClause *self, BooleanClause *o)
1167
1294
  {
1168
- return expl_create(0.0, estrdup("No documents matched"));
1295
+ return ((self->occur == o->occur) && q_eq(self->query, o->query));
1169
1296
  }
1170
1297
 
1171
- Scorer *non_matching_scorer_create()
1298
+ BooleanClause *bc_new(Query *query, enum BC_TYPE occur)
1172
1299
  {
1173
- Scorer *self = scorer_create(NULL);
1174
- self->score = &nmsc_score;
1175
- self->next = &nmsc_next;
1176
- self->skip_to = &nmsc_skip_to;
1177
- self->explain = &nmsc_explain;
1178
-
1179
- return self;
1300
+ BooleanClause *self = ALLOC(BooleanClause);
1301
+ self->ref_cnt = 1;
1302
+ self->query = query;
1303
+ bc_set_occur(self, occur);
1304
+ return self;
1180
1305
  }
1181
1306
 
1182
-
1183
1307
  /***************************************************************************
1184
- * BooleanScorer
1308
+ *
1309
+ * BooleanQuery
1310
+ *
1185
1311
  ***************************************************************************/
1186
1312
 
1187
- Scorer *counting_sum_scorer_create3(BooleanScorer *bsc, Scorer *req_scorer,
1188
- Scorer *opt_scorer)
1313
+ static MatchVector *bq_get_matchv_i(Query *self, MatchVector *mv,
1314
+ TermVector *tv)
1189
1315
  {
1190
- if (bsc->ps_cnt == 0) { // no prohibited
1191
- return req_opt_sum_scorer_create(req_scorer, opt_scorer);
1192
- } else if (bsc->ps_cnt == 1) { // 1 prohibited
1193
- return req_opt_sum_scorer_create(
1194
- req_excl_scorer_create(req_scorer, bsc->prohibited_scorers[0]),
1195
- opt_scorer);
1196
- } else { // more prohibited
1197
- return req_opt_sum_scorer_create(
1198
- req_excl_scorer_create(req_scorer,
1199
- disjunction_sum_scorer_create(bsc->prohibited_scorers, bsc->ps_cnt, 1)),
1200
- opt_scorer);
1201
- }
1316
+ int i;
1317
+ for (i = BQ(self)->clause_cnt - 1; i >= 0; i--) {
1318
+ if (BQ(self)->clauses[i]->occur != BC_MUST_NOT) {
1319
+ Query *q = BQ(self)->clauses[i]->query;
1320
+ q->get_matchv_i(q, mv, tv);
1321
+ }
1322
+ }
1323
+ return mv;
1202
1324
  }
1203
1325
 
1204
- Scorer *counting_sum_scorer_create2(BooleanScorer *bsc, Scorer *req_scorer,
1205
- Scorer **optional_scorers, int os_cnt)
1326
+ static Query *bq_rewrite(Query *self, IndexReader *ir)
1206
1327
  {
1207
- if (os_cnt == 0) {
1208
- if (bsc->ps_cnt == 0) {
1209
- return req_scorer;
1210
- } else if (bsc->ps_cnt == 1) {
1211
- return req_excl_scorer_create(req_scorer,
1212
- bsc->prohibited_scorers[0]);
1213
- } else { // no optional, more than 1 prohibited
1214
- return req_excl_scorer_create(req_scorer,
1215
- disjunction_sum_scorer_create(bsc->prohibited_scorers, bsc->ps_cnt, 1));
1216
- }
1217
- } else if (os_cnt == 1) {
1218
- return counting_sum_scorer_create3(
1219
- bsc,
1220
- req_scorer,
1221
- single_match_scorer_create(bsc->coordinator, optional_scorers[0]));
1222
- } else { // more optional
1223
- return counting_sum_scorer_create3(
1224
- bsc,
1225
- req_scorer,
1226
- counting_disjunction_sum_scorer_create(bsc->coordinator,
1227
- optional_scorers, os_cnt, 1));
1228
- }
1229
- }
1230
-
1231
- Scorer *counting_sum_scorer_create(BooleanScorer *bsc)
1232
- {
1233
- if (bsc->rs_cnt == 0) {
1234
- if (bsc->os_cnt == 0) {
1235
- int i;
1236
- // only prohibited_scorers so free them and return non_matching scorer
1237
- for (i = 0; i < bsc->ps_cnt; i++) {
1238
- bsc->prohibited_scorers[i]->destroy(bsc->prohibited_scorers[i]);
1239
- }
1240
- return non_matching_scorer_create();
1241
- } else if (bsc->os_cnt == 1) {
1242
- return counting_sum_scorer_create2( // the only optional scorer is required
1243
- bsc,
1244
- single_match_scorer_create(bsc->coordinator, bsc->optional_scorers[0]),
1245
- NULL, 0); // no optional scorers left
1246
- } else { // more than 1 @optional_scorers, no required scorers
1247
- return counting_sum_scorer_create2( // at least one optional scorer is required
1248
- bsc,
1249
- counting_disjunction_sum_scorer_create(bsc->coordinator,
1250
- bsc->optional_scorers, bsc->os_cnt, 1),
1251
- NULL, 0); // no optional scorers left
1252
- }
1253
- } else if (bsc->rs_cnt == 1) { // 1 required
1254
- return counting_sum_scorer_create2(
1255
- bsc,
1256
- single_match_scorer_create(bsc->coordinator, bsc->required_scorers[0]),
1257
- bsc->optional_scorers, bsc->os_cnt);
1258
- } else {// more required scorers
1259
- return counting_sum_scorer_create2(
1260
- bsc,
1261
- counting_conjunction_sum_scorer_create(bsc->coordinator,
1262
- bsc->required_scorers, bsc->rs_cnt),
1263
- bsc->optional_scorers, bsc->os_cnt);
1264
- }
1265
- }
1266
-
1267
- void bsc_init_counting_sum_scorer(BooleanScorer *bsc)
1268
- {
1269
- coord_init(bsc->coordinator);
1270
- bsc->counting_sum_scorer = counting_sum_scorer_create(bsc);
1271
- }
1272
-
1273
- void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur)
1274
- {
1275
- BooleanScorer *bsc = (BooleanScorer *)self->data;
1276
- if (occur != BC_MUST_NOT) {
1277
- bsc->coordinator->max_coord++;
1278
- }
1279
-
1280
- switch (occur) {
1281
- case BC_MUST:
1282
- RECAPA(bsc, rs_cnt, rs_capa, required_scorers, Scorer *);
1283
- bsc->required_scorers[bsc->rs_cnt++] = scorer;
1284
- break;
1285
- case BC_SHOULD:
1286
- RECAPA(bsc, os_cnt, os_capa, optional_scorers, Scorer *);
1287
- bsc->optional_scorers[bsc->os_cnt++] = scorer;
1288
- break;
1289
- case BC_MUST_NOT:
1290
- RECAPA(bsc, ps_cnt, ps_capa, prohibited_scorers, Scorer *);
1291
- bsc->prohibited_scorers[bsc->ps_cnt++] = scorer;
1292
- break;
1293
- default:
1294
- RAISE(ARG_ERROR, UNKNOWN_OCCUR_VAL_ERROR_MSG);
1295
- }
1296
- }
1297
-
1298
- float bsc_score(Scorer *self)
1299
- {
1300
- BooleanScorer *bsc = (BooleanScorer *)self->data;
1301
- Coordinator *coord = bsc->coordinator;
1302
- float sum;
1303
- coord->num_matches = 0;
1304
- sum = bsc->counting_sum_scorer->score(bsc->counting_sum_scorer);
1305
- return sum * coord->coord_factors[coord->num_matches];
1306
- }
1307
-
1308
- bool bsc_next(Scorer *self)
1309
- {
1310
- BooleanScorer *bsc = (BooleanScorer *)self->data;
1311
-
1312
- if (!bsc->counting_sum_scorer) {
1313
- bsc_init_counting_sum_scorer(bsc);
1314
- }
1315
- if (bsc->counting_sum_scorer->next(bsc->counting_sum_scorer)) {
1316
- self->doc = bsc->counting_sum_scorer->doc;
1317
- return true;
1318
- } else {
1319
- return false;
1320
- }
1328
+ int i;
1329
+
1330
+ bool rewritten = false;
1331
+
1332
+ if (BQ(self)->clause_cnt == 1) {
1333
+ /* optimize 1-clause queries */
1334
+ BooleanClause *clause = BQ(self)->clauses[0];
1335
+ if (! clause->is_prohibited) {
1336
+ /* just return clause. Re-write first. */
1337
+ Query *q = clause->query->rewrite(clause->query, ir);
1338
+
1339
+ if (self->boost != 1.0) {
1340
+ /* original_boost is initialized to 0.0. If it has been set to
1341
+ * something else it means this query has already been boosted
1342
+ * before so boost from the original value */
1343
+ if ((q == clause->query) && BQ(self)->original_boost) {
1344
+ /* rewrite was no-op */
1345
+ q->boost = BQ(self)->original_boost * self->boost;
1346
+ }
1347
+ else {
1348
+ /* save original boost in case query is rewritten again */
1349
+ BQ(self)->original_boost = q->boost;
1350
+ q->boost *= self->boost;
1351
+ }
1352
+ }
1353
+
1354
+ return q;
1355
+ }
1356
+ }
1357
+
1358
+ self->ref_cnt++;
1359
+ /* replace each clause's query with its rewritten query */
1360
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1361
+ BooleanClause *clause = BQ(self)->clauses[i];
1362
+ Query *rq = clause->query->rewrite(clause->query, ir);
1363
+ if (rq != clause->query) {
1364
+ if (!rewritten) {
1365
+ int j;
1366
+ Query *new_self = q_new(BooleanQuery);
1367
+ memcpy(new_self, self, sizeof(BooleanQuery));
1368
+ BQ(new_self)->clauses = ALLOC_N(BooleanClause *, BQ(self)->clause_capa);
1369
+ memcpy(BQ(new_self)->clauses, BQ(self)->clauses,
1370
+ BQ(self)->clause_capa * sizeof(BooleanClause *));
1371
+ for (j = 0; j < BQ(self)->clause_cnt; j++) {
1372
+ REF(BQ(self)->clauses[j]);
1373
+ }
1374
+ self->ref_cnt--;
1375
+ self = new_self;
1376
+ self->ref_cnt = 1;
1377
+ rewritten = true;
1378
+ }
1379
+ DEREF(clause);
1380
+ BQ(self)->clauses[i] = bc_new(rq, clause->occur);
1381
+ } else {
1382
+ DEREF(rq);
1383
+ }
1384
+ }
1385
+
1386
+ return self;
1387
+ }
1388
+
1389
+ static void bq_extract_terms(Query *self, HashSet *terms)
1390
+ {
1391
+ int i;
1392
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1393
+ BooleanClause *clause = BQ(self)->clauses[i];
1394
+ clause->query->extract_terms(clause->query, terms);
1395
+ }
1321
1396
  }
1322
1397
 
1323
- bool bsc_skip_to(Scorer *self, int doc_num)
1398
+ static char *bq_to_s(Query *self, const char *field)
1324
1399
  {
1325
- BooleanScorer *bsc = (BooleanScorer *)self->data;
1400
+ int i;
1401
+ BooleanClause *clause;
1402
+ Query *sub_query;
1403
+ char *buffer;
1404
+ char *clause_str;
1405
+ int bp = 0;
1406
+ int size = QUERY_STRING_START_SIZE;
1407
+ int needed;
1408
+ int clause_len;
1409
+
1410
+ buffer = ALLOC_N(char, size);
1411
+ if (self->boost != 1.0) {
1412
+ buffer[0] = '(';
1413
+ bp++;
1414
+ }
1326
1415
 
1327
- if (!bsc->counting_sum_scorer) {
1328
- bsc_init_counting_sum_scorer(bsc);
1329
- }
1330
- if (bsc->counting_sum_scorer->skip_to(bsc->counting_sum_scorer, doc_num)) {
1331
- self->doc = bsc->counting_sum_scorer->doc;
1332
- return true;
1333
- } else {
1334
- return false;
1335
- }
1416
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1417
+ clause = BQ(self)->clauses[i];
1418
+ clause_str = clause->query->to_s(clause->query, field);
1419
+ clause_len = (int)strlen(clause_str);
1420
+ needed = clause_len + 5;
1421
+ while ((size - bp) < needed) {
1422
+ size *= 2;
1423
+ REALLOC_N(buffer, char, size);
1424
+ }
1425
+
1426
+ if (i > 0) {
1427
+ buffer[bp++] = ' ';
1428
+ }
1429
+ if (clause->is_prohibited) {
1430
+ buffer[bp++] = '-';
1431
+ }
1432
+ else if (clause->is_required) {
1433
+ buffer[bp++] = '+';
1434
+ }
1435
+
1436
+ sub_query = clause->query;
1437
+ if (sub_query->type == BOOLEAN_QUERY) {
1438
+ /* wrap sub-bools in parens */
1439
+ buffer[bp++] = '(';
1440
+ memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
1441
+ bp += clause_len;
1442
+ buffer[bp++] = ')';
1443
+ }
1444
+ else {
1445
+ memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
1446
+ bp += clause_len;
1447
+ }
1448
+ free(clause_str);
1449
+ }
1450
+
1451
+ if (self->boost != 1.0) {
1452
+ char *boost_str = strfmt(")^%f", self->boost);
1453
+ int boost_len = (int)strlen(boost_str);
1454
+ REALLOC_N(buffer, char, bp + boost_len + 1);
1455
+ memcpy(buffer + bp, boost_str, sizeof(char) * boost_len);
1456
+ bp += boost_len;
1457
+ free(boost_str);
1458
+ }
1459
+ buffer[bp] = 0;
1460
+ return buffer;
1461
+ }
1462
+
1463
+ static void bq_destroy(Query *self)
1464
+ {
1465
+ int i;
1466
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1467
+ bc_deref(BQ(self)->clauses[i]);
1468
+ }
1469
+ free(BQ(self)->clauses);
1470
+ if (BQ(self)->similarity) {
1471
+ BQ(self)->similarity->destroy(BQ(self)->similarity);
1472
+ }
1473
+ q_destroy_i(self);
1474
+ }
1475
+
1476
+ static float bq_coord_disabled(Similarity *sim, int overlap, int max_overlap)
1477
+ {
1478
+ (void)sim; (void)overlap; (void)max_overlap;
1479
+ return 1.0;
1336
1480
  }
1337
1481
 
1338
- void bsc_destroy(Scorer *self)
1482
+ static Similarity *bq_get_similarity(Query *self, Searcher *searcher)
1339
1483
  {
1340
- BooleanScorer *bsc = (BooleanScorer *)self->data;
1341
- Coordinator *coord = bsc->coordinator;
1484
+ if (!BQ(self)->similarity) {
1485
+ Similarity *sim = q_get_similarity_i(self, searcher);
1486
+ BQ(self)->similarity = ALLOC(Similarity);
1487
+ memcpy(BQ(self)->similarity, sim, sizeof(Similarity));
1488
+ BQ(self)->similarity->coord = &bq_coord_disabled;
1489
+ BQ(self)->similarity->destroy = (void (*)(Similarity *))&free;
1490
+ }
1342
1491
 
1343
- free(coord->coord_factors);
1344
- free(coord);
1492
+ return BQ(self)->similarity;
1493
+ }
1345
1494
 
1346
- if (bsc->counting_sum_scorer) {
1347
- bsc->counting_sum_scorer->destroy(bsc->counting_sum_scorer);
1348
- } else {
1495
+ static ulong bq_hash(Query *self)
1496
+ {
1349
1497
  int i;
1350
- for (i = 0; i < bsc->rs_cnt; i++) {
1351
- bsc->required_scorers[i]->destroy(bsc->required_scorers[i]);
1498
+ ulong hash = 0;
1499
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1500
+ hash ^= bc_hash(BQ(self)->clauses[i]);
1352
1501
  }
1353
-
1354
- for (i = 0; i < bsc->os_cnt; i++) {
1355
- bsc->optional_scorers[i]->destroy(bsc->optional_scorers[i]);
1502
+ return (hash << 1) | BQ(self)->coord_disabled;
1503
+ }
1504
+
1505
+ static int bq_eq(Query *self, Query *o)
1506
+ {
1507
+ int i;
1508
+ BooleanQuery *bq1 = BQ(self);
1509
+ BooleanQuery *bq2 = BQ(o);
1510
+ if ((bq1->coord_disabled != bq2->coord_disabled)
1511
+ || (bq1->max_clause_cnt != bq1->max_clause_cnt)
1512
+ || (bq1->clause_cnt != bq2->clause_cnt)) {
1513
+ return false;
1356
1514
  }
1357
1515
 
1358
- for (i = 0; i < bsc->ps_cnt; i++) {
1359
- bsc->prohibited_scorers[i]->destroy(bsc->prohibited_scorers[i]);
1516
+ for (i = 0; i < bq1->clause_cnt; i++) {
1517
+ if (!bc_eq(bq1->clauses[i], bq2->clauses[i])) {
1518
+ return false;
1519
+ }
1360
1520
  }
1361
- }
1362
- free(bsc->required_scorers);
1363
- free(bsc->optional_scorers);
1364
- free(bsc->prohibited_scorers);
1365
- scorer_destroy_i(self);
1521
+ return true;
1366
1522
  }
1367
1523
 
1368
- Explanation *bsc_explain(Scorer *self, int doc_num)
1524
+ Query *bq_new(bool coord_disabled)
1369
1525
  {
1370
- return expl_create(0.0, estrdup("This explanation is not supported"));
1526
+ Query *self = q_new(BooleanQuery);
1527
+ BQ(self)->coord_disabled = coord_disabled;
1528
+ if (coord_disabled) {
1529
+ self->get_similarity = &bq_get_similarity;
1530
+ }
1531
+ BQ(self)->max_clause_cnt = DEFAULT_MAX_CLAUSE_COUNT;
1532
+ BQ(self)->clause_cnt = 0;
1533
+ BQ(self)->clause_capa = BOOLEAN_CLAUSES_START_CAPA;
1534
+ BQ(self)->clauses = ALLOC_N(BooleanClause *, BOOLEAN_CLAUSES_START_CAPA);
1535
+ BQ(self)->similarity = NULL;
1536
+ BQ(self)->original_boost = 0.0;
1537
+
1538
+ self->type = BOOLEAN_QUERY;
1539
+ self->rewrite = &bq_rewrite;
1540
+ self->extract_terms = &bq_extract_terms;
1541
+ self->to_s = &bq_to_s;
1542
+ self->hash = &bq_hash;
1543
+ self->eq = &bq_eq;
1544
+ self->destroy_i = &bq_destroy;
1545
+ self->create_weight_i = &bw_new;
1546
+ self->get_matchv_i = &bq_get_matchv_i;
1547
+
1548
+ return self;
1549
+ }
1550
+
1551
+ BooleanClause *bq_add_clause_nr(Query *self, BooleanClause *bc)
1552
+ {
1553
+ if (BQ(self)->clause_cnt >= BQ(self)->clause_capa) {
1554
+ BQ(self)->clause_capa *= 2;
1555
+ REALLOC_N(BQ(self)->clauses, BooleanClause *, BQ(self)->clause_capa);
1556
+ }
1557
+ if (BQ(self)->clause_cnt > BQ(self)->max_clause_cnt) {
1558
+ RAISE(STATE_ERROR, "Two many clauses. The max clause limit is set to "
1559
+ "<%d> but your query has <%d> clauses. You can try increasing "
1560
+ ":max_clause_count for the BooleanQuery or using a different "
1561
+ "type of query.", BQ(self)->clause_cnt, BQ(self)->max_clause_cnt);
1562
+ }
1563
+ BQ(self)->clauses[BQ(self)->clause_cnt] = bc;
1564
+ BQ(self)->clause_cnt++;
1565
+ return bc;
1371
1566
  }
1372
1567
 
1373
- Scorer *bsc_create(Similarity *similarity)
1568
+ BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
1374
1569
  {
1375
- Scorer *self = scorer_create(similarity);
1376
- BooleanScorer *bsc = ALLOC(BooleanScorer);
1377
- ZEROSET(bsc, BooleanScorer, 1);
1378
- bsc->coordinator = coord_create(similarity);
1379
- bsc->counting_sum_scorer = NULL;
1380
- self->data = bsc;
1570
+ REF(bc);
1571
+ return bq_add_clause_nr(self, bc);
1572
+ }
1381
1573
 
1382
- self->score = &bsc_score;
1383
- self->next = &bsc_next;
1384
- self->skip_to = &bsc_skip_to;
1385
- self->explain = &bsc_explain;
1386
- self->destroy = &bsc_destroy;
1387
- return self;
1574
+ BooleanClause *bq_add_query_nr(Query *self, Query *sub_query, enum BC_TYPE occur)
1575
+ {
1576
+ BooleanClause *bc = bc_new(sub_query, occur);
1577
+ bq_add_clause(self, bc);
1578
+ bc_deref(bc); /* bc would have been referenced unnecessarily */
1579
+ return bc;
1388
1580
  }
1389
1581
 
1582
+ BooleanClause *bq_add_query(Query *self, Query *sub_query, enum BC_TYPE occur)
1583
+ {
1584
+ REF(sub_query);
1585
+ return bq_add_query_nr(self, sub_query, occur);
1586
+ }
1390
1587