isomorfeus-ferret 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (222) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +612 -0
  3. data/README.md +44 -0
  4. data/ext/isomorfeus_ferret_ext/benchmark.c +223 -0
  5. data/ext/isomorfeus_ferret_ext/benchmark.h +45 -0
  6. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +25 -0
  7. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +123 -0
  8. data/ext/isomorfeus_ferret_ext/bm_hash.c +118 -0
  9. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +40 -0
  10. data/ext/isomorfeus_ferret_ext/bm_store.c +93 -0
  11. data/ext/isomorfeus_ferret_ext/email.rl +21 -0
  12. data/ext/isomorfeus_ferret_ext/extconf.rb +5 -0
  13. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -0
  14. data/ext/isomorfeus_ferret_ext/frb_analysis.c +2577 -0
  15. data/ext/isomorfeus_ferret_ext/frb_index.c +3457 -0
  16. data/ext/isomorfeus_ferret_ext/frb_lang.c +9 -0
  17. data/ext/isomorfeus_ferret_ext/frb_lang.h +17 -0
  18. data/ext/isomorfeus_ferret_ext/frb_qparser.c +629 -0
  19. data/ext/isomorfeus_ferret_ext/frb_search.c +4460 -0
  20. data/ext/isomorfeus_ferret_ext/frb_store.c +515 -0
  21. data/ext/isomorfeus_ferret_ext/frb_threading.h +30 -0
  22. data/ext/isomorfeus_ferret_ext/frb_utils.c +1127 -0
  23. data/ext/isomorfeus_ferret_ext/frt_analysis.c +1644 -0
  24. data/ext/isomorfeus_ferret_ext/frt_analysis.h +247 -0
  25. data/ext/isomorfeus_ferret_ext/frt_array.c +124 -0
  26. data/ext/isomorfeus_ferret_ext/frt_array.h +54 -0
  27. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +95 -0
  28. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +586 -0
  29. data/ext/isomorfeus_ferret_ext/frt_compound_io.c +374 -0
  30. data/ext/isomorfeus_ferret_ext/frt_config.h +44 -0
  31. data/ext/isomorfeus_ferret_ext/frt_document.c +134 -0
  32. data/ext/isomorfeus_ferret_ext/frt_document.h +52 -0
  33. data/ext/isomorfeus_ferret_ext/frt_except.c +95 -0
  34. data/ext/isomorfeus_ferret_ext/frt_except.h +188 -0
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +233 -0
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +42 -0
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +157 -0
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +502 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +427 -0
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +290 -0
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +518 -0
  42. data/ext/isomorfeus_ferret_ext/frt_hash.h +466 -0
  43. data/ext/isomorfeus_ferret_ext/frt_hashset.c +191 -0
  44. data/ext/isomorfeus_ferret_ext/frt_hashset.h +206 -0
  45. data/ext/isomorfeus_ferret_ext/frt_helper.c +62 -0
  46. data/ext/isomorfeus_ferret_ext/frt_helper.h +13 -0
  47. data/ext/isomorfeus_ferret_ext/frt_ind.c +353 -0
  48. data/ext/isomorfeus_ferret_ext/frt_ind.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/frt_index.c +6377 -0
  50. data/ext/isomorfeus_ferret_ext/frt_index.h +880 -0
  51. data/ext/isomorfeus_ferret_ext/frt_lang.c +104 -0
  52. data/ext/isomorfeus_ferret_ext/frt_lang.h +44 -0
  53. data/ext/isomorfeus_ferret_ext/frt_mempool.c +87 -0
  54. data/ext/isomorfeus_ferret_ext/frt_mempool.h +33 -0
  55. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +349 -0
  56. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +52 -0
  57. data/ext/isomorfeus_ferret_ext/frt_posh.c +1006 -0
  58. data/ext/isomorfeus_ferret_ext/frt_posh.h +973 -0
  59. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +147 -0
  60. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +147 -0
  61. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1612 -0
  62. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +157 -0
  63. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +209 -0
  64. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +281 -0
  65. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +147 -0
  66. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +672 -0
  67. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +3084 -0
  68. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1182 -0
  69. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +98 -0
  70. data/ext/isomorfeus_ferret_ext/frt_q_range.c +665 -0
  71. data/ext/isomorfeus_ferret_ext/frt_q_span.c +2386 -0
  72. data/ext/isomorfeus_ferret_ext/frt_q_term.c +311 -0
  73. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +166 -0
  74. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +460 -0
  75. data/ext/isomorfeus_ferret_ext/frt_scanner.c +899 -0
  76. data/ext/isomorfeus_ferret_ext/frt_scanner.h +28 -0
  77. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +6705 -0
  78. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +4419 -0
  79. data/ext/isomorfeus_ferret_ext/frt_search.c +1824 -0
  80. data/ext/isomorfeus_ferret_ext/frt_search.h +924 -0
  81. data/ext/isomorfeus_ferret_ext/frt_similarity.c +150 -0
  82. data/ext/isomorfeus_ferret_ext/frt_similarity.h +79 -0
  83. data/ext/isomorfeus_ferret_ext/frt_sort.c +796 -0
  84. data/ext/isomorfeus_ferret_ext/frt_stopwords.c +395 -0
  85. data/ext/isomorfeus_ferret_ext/frt_store.c +680 -0
  86. data/ext/isomorfeus_ferret_ext/frt_store.h +789 -0
  87. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +72 -0
  88. data/ext/isomorfeus_ferret_ext/frt_threading.h +23 -0
  89. data/ext/isomorfeus_ferret_ext/frt_win32.h +54 -0
  90. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +409 -0
  91. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +95 -0
  92. data/ext/isomorfeus_ferret_ext/libstemmer.c +93 -0
  93. data/ext/isomorfeus_ferret_ext/libstemmer.h +73 -0
  94. data/ext/isomorfeus_ferret_ext/q_parser.y +1366 -0
  95. data/ext/isomorfeus_ferret_ext/scanner.h +28 -0
  96. data/ext/isomorfeus_ferret_ext/scanner.in +43 -0
  97. data/ext/isomorfeus_ferret_ext/scanner.rl +84 -0
  98. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +200 -0
  99. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +85 -0
  100. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +324 -0
  101. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +7 -0
  102. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +610 -0
  103. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +6 -0
  104. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +1104 -0
  105. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +6 -0
  106. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +749 -0
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +7 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +1233 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +6 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +490 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +6 -0
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1217 -0
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +7 -0
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +1052 -0
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +6 -0
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +283 -0
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +6 -0
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +735 -0
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +6 -0
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +1003 -0
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +7 -0
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +1079 -0
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +6 -0
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +293 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +984 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +686 -0
  129. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +6 -0
  130. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +325 -0
  131. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +6 -0
  132. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +620 -0
  133. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +6 -0
  134. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +1111 -0
  135. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +6 -0
  136. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +754 -0
  137. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +6 -0
  138. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +1242 -0
  139. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +6 -0
  140. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +495 -0
  141. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +1220 -0
  143. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +6 -0
  144. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +1059 -0
  145. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +6 -0
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +285 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +741 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +1009 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +990 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +680 -0
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +6 -0
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +1083 -0
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +6 -0
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +294 -0
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +6 -0
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +2191 -0
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +6 -0
  162. data/ext/isomorfeus_ferret_ext/stem_api.c +66 -0
  163. data/ext/isomorfeus_ferret_ext/stem_api.h +26 -0
  164. data/ext/isomorfeus_ferret_ext/stem_header.h +57 -0
  165. data/ext/isomorfeus_ferret_ext/stem_modules.h +190 -0
  166. data/ext/isomorfeus_ferret_ext/stem_modules.txt +50 -0
  167. data/ext/isomorfeus_ferret_ext/stem_utilities.c +478 -0
  168. data/ext/isomorfeus_ferret_ext/test.c +850 -0
  169. data/ext/isomorfeus_ferret_ext/test.h +416 -0
  170. data/ext/isomorfeus_ferret_ext/test_1710.c +63 -0
  171. data/ext/isomorfeus_ferret_ext/test_analysis.c +1221 -0
  172. data/ext/isomorfeus_ferret_ext/test_array.c +272 -0
  173. data/ext/isomorfeus_ferret_ext/test_bitvector.c +600 -0
  174. data/ext/isomorfeus_ferret_ext/test_compound_io.c +170 -0
  175. data/ext/isomorfeus_ferret_ext/test_document.c +156 -0
  176. data/ext/isomorfeus_ferret_ext/test_except.c +244 -0
  177. data/ext/isomorfeus_ferret_ext/test_fields.c +522 -0
  178. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +185 -0
  179. data/ext/isomorfeus_ferret_ext/test_filter.c +331 -0
  180. data/ext/isomorfeus_ferret_ext/test_fs_store.c +25 -0
  181. data/ext/isomorfeus_ferret_ext/test_global.c +299 -0
  182. data/ext/isomorfeus_ferret_ext/test_hash.c +485 -0
  183. data/ext/isomorfeus_ferret_ext/test_hashset.c +288 -0
  184. data/ext/isomorfeus_ferret_ext/test_helper.c +47 -0
  185. data/ext/isomorfeus_ferret_ext/test_highlighter.c +548 -0
  186. data/ext/isomorfeus_ferret_ext/test_index.c +2323 -0
  187. data/ext/isomorfeus_ferret_ext/test_lang.c +74 -0
  188. data/ext/isomorfeus_ferret_ext/test_mempool.c +102 -0
  189. data/ext/isomorfeus_ferret_ext/test_multimapper.c +64 -0
  190. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +213 -0
  191. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +84 -0
  192. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +61 -0
  193. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +241 -0
  194. data/ext/isomorfeus_ferret_ext/test_q_parser.c +464 -0
  195. data/ext/isomorfeus_ferret_ext/test_q_span.c +575 -0
  196. data/ext/isomorfeus_ferret_ext/test_ram_store.c +77 -0
  197. data/ext/isomorfeus_ferret_ext/test_search.c +1874 -0
  198. data/ext/isomorfeus_ferret_ext/test_segments.c +167 -0
  199. data/ext/isomorfeus_ferret_ext/test_similarity.c +25 -0
  200. data/ext/isomorfeus_ferret_ext/test_sort.c +333 -0
  201. data/ext/isomorfeus_ferret_ext/test_store.c +591 -0
  202. data/ext/isomorfeus_ferret_ext/test_store.h +3 -0
  203. data/ext/isomorfeus_ferret_ext/test_term.c +351 -0
  204. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +373 -0
  205. data/ext/isomorfeus_ferret_ext/test_test.c +83 -0
  206. data/ext/isomorfeus_ferret_ext/test_threading.c +188 -0
  207. data/ext/isomorfeus_ferret_ext/testhelper.c +561 -0
  208. data/ext/isomorfeus_ferret_ext/testhelper.h +25 -0
  209. data/ext/isomorfeus_ferret_ext/tests_all.h +87 -0
  210. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +1854 -0
  211. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +1999 -0
  212. data/ext/isomorfeus_ferret_ext/url.rl +27 -0
  213. data/ext/isomorfeus_ferret_ext/word_list.h +15156 -0
  214. data/lib/isomorfeus/ferret/document.rb +132 -0
  215. data/lib/isomorfeus/ferret/field_symbol.rb +85 -0
  216. data/lib/isomorfeus/ferret/index/field_infos.rb +48 -0
  217. data/lib/isomorfeus/ferret/index/index.rb +970 -0
  218. data/lib/isomorfeus/ferret/monitor.rb +323 -0
  219. data/lib/isomorfeus/ferret/stdlib_patches.rb +151 -0
  220. data/lib/isomorfeus/ferret/version.rb +5 -0
  221. data/lib/isomorfeus-ferret.rb +8 -0
  222. metadata +307 -0
@@ -0,0 +1,1612 @@
1
+ #include <string.h>
2
+ #include "frt_search.h"
3
+ #include "frt_array.h"
4
+
5
+ #define BQ(query) ((FrtBooleanQuery *)(query))
6
+ #define BW(weight) ((BooleanWeight *)(weight))
7
+
8
+ /***************************************************************************
9
+ *
10
+ * BooleanScorer
11
+ *
12
+ ***************************************************************************/
13
+
14
+ /***************************************************************************
15
+ * Coordinator
16
+ ***************************************************************************/
17
+
18
+ typedef struct Coordinator
19
+ {
20
+ int max_coord;
21
+ float *coord_factors;
22
+ FrtSimilarity *similarity;
23
+ int num_matches;
24
+ } Coordinator;
25
+
26
+ static Coordinator *coord_new(FrtSimilarity *similarity)
27
+ {
28
+ Coordinator *self = FRT_ALLOC_AND_ZERO(Coordinator);
29
+ self->similarity = similarity;
30
+ return self;
31
+ }
32
+
33
+ static Coordinator *coord_init(Coordinator *self)
34
+ {
35
+ int i;
36
+ self->coord_factors = FRT_ALLOC_N(float, self->max_coord + 1);
37
+
38
+ for (i = 0; i <= self->max_coord; i++) {
39
+ self->coord_factors[i]
40
+ = frt_sim_coord(self->similarity, i, self->max_coord);
41
+ }
42
+
43
+ return self;
44
+ }
45
+
46
+ /***************************************************************************
47
+ * DisjunctionSumScorer
48
+ ***************************************************************************/
49
+
50
+ #define DSSc(scorer) ((DisjunctionSumScorer *)(scorer))
51
+
52
+ typedef struct DisjunctionSumScorer
53
+ {
54
+ FrtScorer super;
55
+ float cum_score;
56
+ int num_matches;
57
+ int min_num_matches;
58
+ FrtScorer **sub_scorers;
59
+ int ss_cnt;
60
+ FrtPriorityQueue *scorer_queue;
61
+ Coordinator *coordinator;
62
+ } DisjunctionSumScorer;
63
+
64
+ static float dssc_score(FrtScorer *self)
65
+ {
66
+ return DSSc(self)->cum_score;
67
+ }
68
+
69
+ static void dssc_init_scorer_queue(DisjunctionSumScorer *dssc)
70
+ {
71
+ int i;
72
+ FrtScorer *sub_scorer;
73
+ FrtPriorityQueue *pq = dssc->scorer_queue
74
+ = frt_pq_new(dssc->ss_cnt, (frt_lt_ft)&frt_scorer_doc_less_than, NULL);
75
+
76
+ for (i = 0; i < dssc->ss_cnt; i++) {
77
+ sub_scorer = dssc->sub_scorers[i];
78
+ if (sub_scorer->next(sub_scorer)) {
79
+ frt_pq_insert(pq, sub_scorer);
80
+ }
81
+ }
82
+ }
83
+
84
+ static bool dssc_advance_after_current(FrtScorer *self)
85
+ {
86
+ DisjunctionSumScorer *dssc = DSSc(self);
87
+ FrtPriorityQueue *scorer_queue = dssc->scorer_queue;
88
+
89
+ /* repeat until minimum number of matches is found */
90
+ while (true) {
91
+ FrtScorer *top = (FrtScorer *)frt_pq_top(scorer_queue);
92
+ self->doc = top->doc;
93
+ dssc->cum_score = top->score(top);
94
+ dssc->num_matches = 1;
95
+ /* Until all sub-scorers are after self->doc */
96
+ while (true) {
97
+ if (top->next(top)) {
98
+ frt_pq_down(scorer_queue);
99
+ }
100
+ else {
101
+ frt_pq_pop(scorer_queue);
102
+ if (scorer_queue->size
103
+ < (dssc->min_num_matches - dssc->num_matches)) {
104
+ /* Not enough subscorers left for a match on this
105
+ * document, also no more chance of any further match */
106
+ return false;
107
+ }
108
+ if (scorer_queue->size == 0) {
109
+ /* nothing more to advance, check for last match. */
110
+ break;
111
+ }
112
+ }
113
+ top = (FrtScorer *)frt_pq_top(scorer_queue);
114
+ if (top->doc != self->doc) {
115
+ /* All remaining subscorers are after self->doc */
116
+ break;
117
+ }
118
+ else {
119
+ dssc->cum_score += top->score(top);
120
+ dssc->num_matches++;
121
+ }
122
+ }
123
+
124
+ if (dssc->num_matches >= dssc->min_num_matches) {
125
+ return true;
126
+ }
127
+ else if (scorer_queue->size < dssc->min_num_matches) {
128
+ return false;
129
+ }
130
+ }
131
+ }
132
+
133
+ static bool dssc_next(FrtScorer *self)
134
+ {
135
+ if (DSSc(self)->scorer_queue == NULL) {
136
+ dssc_init_scorer_queue(DSSc(self));
137
+ }
138
+
139
+ if (DSSc(self)->scorer_queue->size < DSSc(self)->min_num_matches) {
140
+ return false;
141
+ }
142
+ else {
143
+ return dssc_advance_after_current(self);
144
+ }
145
+ }
146
+
147
+ static bool dssc_skip_to(FrtScorer *self, int doc_num)
148
+ {
149
+ DisjunctionSumScorer *dssc = DSSc(self);
150
+ FrtPriorityQueue *scorer_queue = dssc->scorer_queue;
151
+
152
+ if (scorer_queue == NULL) {
153
+ dssc_init_scorer_queue(dssc);
154
+ scorer_queue = dssc->scorer_queue;
155
+ }
156
+
157
+ if (scorer_queue->size < dssc->min_num_matches) {
158
+ return false;
159
+ }
160
+ if (doc_num <= self->doc) {
161
+ doc_num = self->doc + 1;
162
+ }
163
+ while (true) {
164
+ FrtScorer *top = (FrtScorer *)frt_pq_top(scorer_queue);
165
+ if (top->doc >= doc_num) {
166
+ return dssc_advance_after_current(self);
167
+ }
168
+ else if (top->skip_to(top, doc_num)) {
169
+ frt_pq_down(scorer_queue);
170
+ }
171
+ else {
172
+ frt_pq_pop(scorer_queue);
173
+ if (scorer_queue->size < dssc->min_num_matches) {
174
+ return false;
175
+ }
176
+ }
177
+ }
178
+ }
179
+
180
+ static FrtExplanation *dssc_explain(FrtScorer *self, int doc_num)
181
+ {
182
+ int i;
183
+ DisjunctionSumScorer *dssc = DSSc(self);
184
+ FrtScorer *sub_scorer;
185
+ FrtExplanation *e
186
+ = frt_expl_new(0.0, "At least %d of:", dssc->min_num_matches);
187
+ for (i = 0; i < dssc->ss_cnt; i++) {
188
+ sub_scorer = dssc->sub_scorers[i];
189
+ frt_expl_add_detail(e, sub_scorer->explain(sub_scorer, doc_num));
190
+ }
191
+ return e;
192
+ }
193
+
194
+ static void dssc_destroy(FrtScorer *self)
195
+ {
196
+ DisjunctionSumScorer *dssc = DSSc(self);
197
+ int i;
198
+ for (i = 0; i < dssc->ss_cnt; i++) {
199
+ dssc->sub_scorers[i]->destroy(dssc->sub_scorers[i]);
200
+ }
201
+ if (dssc->scorer_queue) {
202
+ frt_pq_destroy(dssc->scorer_queue);
203
+ }
204
+ frt_scorer_destroy_i(self);
205
+ }
206
+
207
+ static FrtScorer *disjunction_sum_scorer_new(FrtScorer **sub_scorers, int ss_cnt,
208
+ int min_num_matches)
209
+ {
210
+ FrtScorer *self = frt_scorer_new(DisjunctionSumScorer, NULL);
211
+ DSSc(self)->ss_cnt = ss_cnt;
212
+
213
+ /* The document number of the current match */
214
+ self->doc = -1;
215
+ DSSc(self)->cum_score = -1.0;
216
+
217
+ /* The number of subscorers that provide the current match. */
218
+ DSSc(self)->num_matches = -1;
219
+ DSSc(self)->coordinator = NULL;
220
+
221
+ #ifdef DEBUG
222
+ if (min_num_matches <= 0) {
223
+ FRT_RAISE(FRT_ARG_ERROR, "The min_num_matches value <%d> should not be less "
224
+ "than 0\n", min_num_matches);
225
+ }
226
+ if (ss_cnt <= 1) {
227
+ FRT_RAISE(FRT_ARG_ERROR, "There should be at least 2 sub_scorers in a "
228
+ "DiscjunctionSumScorer. <%d> is not enough", ss_cnt);
229
+ }
230
+ #endif
231
+
232
+ DSSc(self)->min_num_matches = min_num_matches;
233
+ DSSc(self)->sub_scorers = sub_scorers;
234
+ DSSc(self)->scorer_queue = NULL;
235
+
236
+ self->score = &dssc_score;
237
+ self->next = &dssc_next;
238
+ self->skip_to = &dssc_skip_to;
239
+ self->explain = &dssc_explain;
240
+ self->destroy = &dssc_destroy;
241
+
242
+ return self;
243
+ }
244
+
245
+ static float cdssc_score(FrtScorer *self)
246
+ {
247
+ DSSc(self)->coordinator->num_matches += DSSc(self)->num_matches;
248
+ return DSSc(self)->cum_score;
249
+ }
250
+
251
+ static FrtScorer *counting_disjunction_sum_scorer_new(
252
+ Coordinator *coordinator, FrtScorer **sub_scorers, int ss_cnt,
253
+ int min_num_matches)
254
+ {
255
+ FrtScorer *self = disjunction_sum_scorer_new(sub_scorers, ss_cnt,
256
+ min_num_matches);
257
+ DSSc(self)->coordinator = coordinator;
258
+ self->score = &cdssc_score;
259
+ return self;
260
+ }
261
+
262
+ /***************************************************************************
263
+ * ConjunctionScorer
264
+ ***************************************************************************/
265
+
266
+ #define CSc(scorer) ((ConjunctionScorer *)(scorer))
267
+
268
+ typedef struct ConjunctionScorer
269
+ {
270
+ FrtScorer super;
271
+ bool first_time : 1;
272
+ bool more : 1;
273
+ float coord;
274
+ FrtScorer **sub_scorers;
275
+ int ss_cnt;
276
+ int first_idx;
277
+ Coordinator *coordinator;
278
+ int last_scored_doc;
279
+ } ConjunctionScorer;
280
+
281
+ static void csc_sort_scorers(ConjunctionScorer *csc)
282
+ {
283
+ int i;
284
+ FrtScorer *current = csc->sub_scorers[0], *previous;
285
+ for (i = 1; i < csc->ss_cnt; i++) {
286
+ previous = current;
287
+ current = csc->sub_scorers[i];
288
+ if (previous->doc > current->doc) {
289
+ if (!current->skip_to(current, previous->doc)) {
290
+ csc->more = false;
291
+ return;
292
+ }
293
+ }
294
+ }
295
+ /*qsort(csc->sub_scorers, csc->ss_cnt, sizeof(FrtScorer *), &frt_scorer_doc_cmp);*/
296
+ csc->first_idx = 0;
297
+ }
298
+
299
+ static void csc_init(FrtScorer *self, bool init_scorers)
300
+ {
301
+ ConjunctionScorer *csc = CSc(self);
302
+ const int sub_sc_cnt = csc->ss_cnt;
303
+
304
+ /* compute coord factor */
305
+ csc->coord = frt_sim_coord(self->similarity, sub_sc_cnt, sub_sc_cnt);
306
+
307
+ csc->more = (sub_sc_cnt > 0);
308
+
309
+ if (init_scorers) {
310
+ int i;
311
+ /* move each scorer to its first entry */
312
+ for (i = 0; i < sub_sc_cnt; i++) {
313
+ FrtScorer *sub_scorer = csc->sub_scorers[i];
314
+ if (!csc->more) {
315
+ break;
316
+ }
317
+ csc->more = sub_scorer->next(sub_scorer);
318
+ }
319
+ if (csc->more) {
320
+ csc_sort_scorers(csc);
321
+ }
322
+ }
323
+
324
+ csc->first_time = false;
325
+ }
326
+
327
+ static float csc_score(FrtScorer *self)
328
+ {
329
+ ConjunctionScorer *csc = CSc(self);
330
+ const int sub_sc_cnt = csc->ss_cnt;
331
+ float score = 0.0f; /* sum scores */
332
+ int i;
333
+ for (i = 0; i < sub_sc_cnt; i++) {
334
+ FrtScorer *sub_scorer = csc->sub_scorers[i];
335
+ score += sub_scorer->score(sub_scorer);
336
+ }
337
+ score *= csc->coord;
338
+ return score;
339
+ }
340
+
341
+ static bool csc_do_next(FrtScorer *self)
342
+ {
343
+ ConjunctionScorer *csc = CSc(self);
344
+ const int sub_sc_cnt = csc->ss_cnt;
345
+ int first_idx = csc->first_idx;
346
+ FrtScorer *first_sc = csc->sub_scorers[first_idx];
347
+ FrtScorer *last_sc = csc->sub_scorers[FRT_PREV_NUM(first_idx, sub_sc_cnt)];
348
+
349
+ /* skip to doc with all clauses */
350
+ while (csc->more && (first_sc->doc < last_sc->doc)) {
351
+ /* skip first upto last */
352
+ csc->more = first_sc->skip_to(first_sc, last_sc->doc);
353
+ /* move first to last */
354
+ last_sc = first_sc;
355
+ first_idx = FRT_NEXT_NUM(first_idx, sub_sc_cnt);
356
+ first_sc = csc->sub_scorers[first_idx];
357
+ }
358
+ self->doc = first_sc->doc;
359
+ csc->first_idx = first_idx;
360
+ return csc->more;
361
+ }
362
+
363
+ static bool csc_next(FrtScorer *self)
364
+ {
365
+ ConjunctionScorer *csc = CSc(self);
366
+ if (csc->first_time) {
367
+ csc_init(self, true);
368
+ }
369
+ else if (csc->more) {
370
+ /* trigger further scanning */
371
+ const int last_idx = FRT_PREV_NUM(csc->first_idx, csc->ss_cnt);
372
+ FrtScorer *sub_scorer = csc->sub_scorers[last_idx];
373
+ csc->more = sub_scorer->next(sub_scorer);
374
+ }
375
+ return csc_do_next(self);
376
+ }
377
+
378
+ static bool csc_skip_to(FrtScorer *self, int doc_num)
379
+ {
380
+ ConjunctionScorer *csc = CSc(self);
381
+ const int sub_sc_cnt = csc->ss_cnt;
382
+ int i;
383
+ bool more = csc->more;
384
+
385
+ if (csc->first_time) {
386
+ csc_init(self, false);
387
+ }
388
+
389
+ for (i = 0; i < sub_sc_cnt; i++) {
390
+ if (!more) {
391
+ break;
392
+ }
393
+ else {
394
+ FrtScorer *sub_scorer = csc->sub_scorers[i];
395
+ more = sub_scorer->skip_to(sub_scorer, doc_num);
396
+ }
397
+ }
398
+ if (more) {
399
+ /* resort the scorers */
400
+ csc_sort_scorers(csc);
401
+ }
402
+
403
+ csc->more = more;
404
+ return csc_do_next(self);
405
+ }
406
+
407
+ static void csc_destroy(FrtScorer *self)
408
+ {
409
+ ConjunctionScorer *csc = CSc(self);
410
+ const int sub_sc_cnt = csc->ss_cnt;
411
+ int i;
412
+ for (i = 0; i < sub_sc_cnt; i++) {
413
+ csc->sub_scorers[i]->destroy(csc->sub_scorers[i]);
414
+ }
415
+ free(csc->sub_scorers);
416
+ frt_scorer_destroy_i(self);
417
+ }
418
+
419
+ static FrtScorer *conjunction_scorer_new(FrtSimilarity *similarity)
420
+ {
421
+ FrtScorer *self = frt_scorer_new(ConjunctionScorer, similarity);
422
+
423
+ CSc(self)->first_time = true;
424
+ CSc(self)->more = true;
425
+ CSc(self)->coordinator = NULL;
426
+
427
+ self->score = &csc_score;
428
+ self->next = &csc_next;
429
+ self->skip_to = &csc_skip_to;
430
+ self->destroy = &csc_destroy;
431
+
432
+ return self;
433
+ }
434
+
435
+ static float ccsc_score(FrtScorer *self)
436
+ {
437
+ ConjunctionScorer *csc = CSc(self);
438
+
439
+ int doc;
440
+ if ((doc = self->doc) > csc->last_scored_doc) {
441
+ csc->last_scored_doc = doc;
442
+ csc->coordinator->num_matches += csc->ss_cnt;
443
+ }
444
+
445
+ return csc_score(self);
446
+ }
447
+
448
+ static FrtScorer *counting_conjunction_sum_scorer_new(
449
+ Coordinator *coordinator, FrtScorer **sub_scorers, int ss_cnt)
450
+ {
451
+ FrtScorer *self = conjunction_scorer_new(frt_sim_create_default());
452
+ ConjunctionScorer *csc = CSc(self);
453
+ csc->coordinator = coordinator;
454
+ csc->last_scored_doc = -1;
455
+ csc->sub_scorers = FRT_ALLOC_N(FrtScorer *, ss_cnt);
456
+ memcpy(csc->sub_scorers, sub_scorers, sizeof(FrtScorer *) * ss_cnt);
457
+ csc->ss_cnt = ss_cnt;
458
+
459
+ self->score = &ccsc_score;
460
+
461
+ return self;
462
+ }
463
+
464
+ /***************************************************************************
465
+ * SingleMatchScorer
466
+ ***************************************************************************/
467
+
468
+ #define SMSc(scorer) ((SingleMatchScorer *)(scorer))
469
+
470
+ typedef struct SingleMatchScorer
471
+ {
472
+ FrtScorer super;
473
+ Coordinator *coordinator;
474
+ FrtScorer *scorer;
475
+ } SingleMatchScorer;
476
+
477
+
478
+ static float smsc_score(FrtScorer *self)
479
+ {
480
+ SMSc(self)->coordinator->num_matches++;
481
+ return SMSc(self)->scorer->score(SMSc(self)->scorer);
482
+ }
483
+
484
+ static bool smsc_next(FrtScorer *self)
485
+ {
486
+ FrtScorer *scorer = SMSc(self)->scorer;
487
+ if (scorer->next(scorer)) {
488
+ self->doc = scorer->doc;
489
+ return true;
490
+ }
491
+ return false;
492
+ }
493
+
494
+ static bool smsc_skip_to(FrtScorer *self, int doc_num)
495
+ {
496
+ FrtScorer *scorer = SMSc(self)->scorer;
497
+ if (scorer->skip_to(scorer, doc_num)) {
498
+ self->doc = scorer->doc;
499
+ return true;
500
+ }
501
+ return false;
502
+ }
503
+
504
+ static FrtExplanation *smsc_explain(FrtScorer *self, int doc_num)
505
+ {
506
+ FrtScorer *scorer = SMSc(self)->scorer;
507
+ return scorer->explain(scorer, doc_num);
508
+ }
509
+
510
+ static void smsc_destroy(FrtScorer *self)
511
+ {
512
+ FrtScorer *scorer = SMSc(self)->scorer;
513
+ scorer->destroy(scorer);
514
+ frt_scorer_destroy_i(self);
515
+ }
516
+
517
+ static FrtScorer *single_match_scorer_new(Coordinator *coordinator,
518
+ FrtScorer *scorer)
519
+ {
520
+ FrtScorer *self = frt_scorer_new(SingleMatchScorer, scorer->similarity);
521
+ SMSc(self)->coordinator = coordinator;
522
+ SMSc(self)->scorer = scorer;
523
+
524
+ self->score = &smsc_score;
525
+ self->next = &smsc_next;
526
+ self->skip_to = &smsc_skip_to;
527
+ self->explain = &smsc_explain;
528
+ self->destroy = &smsc_destroy;
529
+ return self;
530
+ }
531
+
532
+ /***************************************************************************
533
+ * ReqOptSumScorer
534
+ ***************************************************************************/
535
+
536
+ #define ROSSc(scorer) ((ReqOptSumScorer *)(scorer))
537
+
538
+ typedef struct ReqOptSumScorer
539
+ {
540
+ FrtScorer super;
541
+ FrtScorer *req_scorer;
542
+ FrtScorer *opt_scorer;
543
+ bool first_time_opt;
544
+ } ReqOptSumScorer;
545
+
546
+ static float rossc_score(FrtScorer *self)
547
+ {
548
+ ReqOptSumScorer *rossc = ROSSc(self);
549
+ FrtScorer *req_scorer = rossc->req_scorer;
550
+ FrtScorer *opt_scorer = rossc->opt_scorer;
551
+ int cur_doc = req_scorer->doc;
552
+ float req_score = req_scorer->score(req_scorer);
553
+
554
+ if (rossc->first_time_opt) {
555
+ rossc->first_time_opt = false;
556
+ if (! opt_scorer->skip_to(opt_scorer, cur_doc)) {
557
+ FRT_SCORER_NULLIFY(rossc->opt_scorer);
558
+ return req_score;
559
+ }
560
+ }
561
+ else if (opt_scorer == NULL) {
562
+ return req_score;
563
+ }
564
+ else if ((opt_scorer->doc < cur_doc)
565
+ && ! opt_scorer->skip_to(opt_scorer, cur_doc)) {
566
+ FRT_SCORER_NULLIFY(rossc->opt_scorer);
567
+ return req_score;
568
+ }
569
+ /* assert (@opt_scorer != nil) and (@opt_scorer.doc() >= cur_doc) */
570
+ return (opt_scorer->doc == cur_doc)
571
+ ? req_score + opt_scorer->score(opt_scorer)
572
+ : req_score;
573
+ }
574
+
575
+ static bool rossc_next(FrtScorer *self)
576
+ {
577
+ FrtScorer *req_scorer = ROSSc(self)->req_scorer;
578
+ if (req_scorer->next(req_scorer)) {
579
+ self->doc = req_scorer->doc;
580
+ return true;
581
+ }
582
+ return false;
583
+ }
584
+
585
+ static bool rossc_skip_to(FrtScorer *self, int doc_num)
586
+ {
587
+ FrtScorer *req_scorer = ROSSc(self)->req_scorer;
588
+ if (req_scorer->skip_to(req_scorer, doc_num)) {
589
+ self->doc = req_scorer->doc;
590
+ return true;
591
+ }
592
+ return false;
593
+ }
594
+
595
+ static FrtExplanation *rossc_explain(FrtScorer *self, int doc_num)
596
+ {
597
+ FrtScorer *req_scorer = ROSSc(self)->req_scorer;
598
+ FrtScorer *opt_scorer = ROSSc(self)->opt_scorer;
599
+
600
+ FrtExplanation *e = frt_expl_new(self->score(self),"required, optional:");
601
+ frt_expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
602
+ frt_expl_add_detail(e, opt_scorer->explain(opt_scorer, doc_num));
603
+ return e;
604
+ }
605
+
606
+ static void rossc_destroy(FrtScorer *self)
607
+ {
608
+ ReqOptSumScorer *rossc = ROSSc(self);
609
+ if (rossc->req_scorer) {
610
+ rossc->req_scorer->destroy(rossc->req_scorer);
611
+ }
612
+ if (rossc->opt_scorer) {
613
+ rossc->opt_scorer->destroy(rossc->opt_scorer);
614
+ }
615
+ frt_scorer_destroy_i(self);
616
+ }
617
+
618
+
619
+ static FrtScorer *req_opt_sum_scorer_new(FrtScorer *req_scorer, FrtScorer *opt_scorer)
620
+ {
621
+ FrtScorer *self = frt_scorer_new(ReqOptSumScorer, NULL);
622
+
623
+ ROSSc(self)->req_scorer = req_scorer;
624
+ ROSSc(self)->opt_scorer = opt_scorer;
625
+ ROSSc(self)->first_time_opt = true;
626
+
627
+ self->score = &rossc_score;
628
+ self->next = &rossc_next;
629
+ self->skip_to = &rossc_skip_to;
630
+ self->explain = &rossc_explain;
631
+ self->destroy = &rossc_destroy;
632
+
633
+ return self;
634
+ }
635
+
636
+ /***************************************************************************
637
+ * ReqExclScorer
638
+ ***************************************************************************/
639
+
640
+ #define RXSc(scorer) ((ReqExclScorer *)(scorer))
641
+ typedef struct ReqExclScorer
642
+ {
643
+ FrtScorer super;
644
+ FrtScorer *req_scorer;
645
+ FrtScorer *excl_scorer;
646
+ bool first_time;
647
+ } ReqExclScorer;
648
+
649
+ static bool rxsc_to_non_excluded(FrtScorer *self)
650
+ {
651
+ FrtScorer *req_scorer = RXSc(self)->req_scorer;
652
+ FrtScorer *excl_scorer = RXSc(self)->excl_scorer;
653
+ int excl_doc = excl_scorer->doc, req_doc;
654
+
655
+ do {
656
+ /* may be excluded */
657
+ req_doc = req_scorer->doc;
658
+ if (req_doc < excl_doc) {
659
+ /* req_scorer advanced to before excl_scorer, ie. not excluded */
660
+ self->doc = req_doc;
661
+ return true;
662
+ }
663
+ else if (req_doc > excl_doc) {
664
+ if (! excl_scorer->skip_to(excl_scorer, req_doc)) {
665
+ /* emptied, no more exclusions */
666
+ FRT_SCORER_NULLIFY(RXSc(self)->excl_scorer);
667
+ self->doc = req_doc;
668
+ return true;
669
+ }
670
+ excl_doc = excl_scorer->doc;
671
+ if (excl_doc > req_doc) {
672
+ self->doc = req_doc;
673
+ return true; /* not excluded */
674
+ }
675
+ }
676
+ } while (req_scorer->next(req_scorer));
677
+ /* emptied, nothing left */
678
+ FRT_SCORER_NULLIFY(RXSc(self)->req_scorer);
679
+ return false;
680
+ }
681
+
682
+ static bool rxsc_next(FrtScorer *self)
683
+ {
684
+ ReqExclScorer *rxsc = RXSc(self);
685
+ FrtScorer *req_scorer = rxsc->req_scorer;
686
+ FrtScorer *excl_scorer = rxsc->excl_scorer;
687
+
688
+ if (rxsc->first_time) {
689
+ if (! excl_scorer->next(excl_scorer)) {
690
+ /* emptied at start */
691
+ FRT_SCORER_NULLIFY(rxsc->excl_scorer);
692
+ excl_scorer = NULL;
693
+ }
694
+ rxsc->first_time = false;
695
+ }
696
+ if (req_scorer == NULL) {
697
+ return false;
698
+ }
699
+ if (! req_scorer->next(req_scorer)) {
700
+ /* emptied, nothing left */
701
+ FRT_SCORER_NULLIFY(rxsc->req_scorer);
702
+ return false;
703
+ }
704
+ if (excl_scorer == NULL) {
705
+ self->doc = req_scorer->doc;
706
+ /* req_scorer->next() already returned true */
707
+ return true;
708
+ }
709
+ return rxsc_to_non_excluded(self);
710
+ }
711
+
712
+ static bool rxsc_skip_to(FrtScorer *self, int doc_num)
713
+ {
714
+ ReqExclScorer *rxsc = RXSc(self);
715
+ FrtScorer *req_scorer = rxsc->req_scorer;
716
+ FrtScorer *excl_scorer = rxsc->excl_scorer;
717
+
718
+ if (rxsc->first_time) {
719
+ rxsc->first_time = false;
720
+ if (! excl_scorer->skip_to(excl_scorer, doc_num)) {
721
+ /* emptied */
722
+ FRT_SCORER_NULLIFY(rxsc->excl_scorer);
723
+ excl_scorer = NULL;
724
+ }
725
+ }
726
+ if (req_scorer == NULL) {
727
+ return false;
728
+ }
729
+ if (excl_scorer == NULL) {
730
+ if (req_scorer->skip_to(req_scorer, doc_num)) {
731
+ self->doc = req_scorer->doc;
732
+ return true;
733
+ }
734
+ return false;
735
+ }
736
+ if (! req_scorer->skip_to(req_scorer, doc_num)) {
737
+ FRT_SCORER_NULLIFY(rxsc->req_scorer);
738
+ return false;
739
+ }
740
+ return rxsc_to_non_excluded(self);
741
+ }
742
+
743
+ static float rxsc_score(FrtScorer *self)
744
+ {
745
+ FrtScorer *req_scorer = RXSc(self)->req_scorer;
746
+ return req_scorer->score(req_scorer);
747
+ }
748
+
749
+ static FrtExplanation *rxsc_explain(FrtScorer *self, int doc_num)
750
+ {
751
+ ReqExclScorer *rxsc = RXSc(self);
752
+ FrtScorer *req_scorer = rxsc->req_scorer;
753
+ FrtScorer *excl_scorer = rxsc->excl_scorer;
754
+ FrtExplanation *e;
755
+
756
+ if (excl_scorer->skip_to(excl_scorer, doc_num)
757
+ && excl_scorer->doc == doc_num) {
758
+ e = frt_expl_new(0.0, "excluded:");
759
+ }
760
+ else {
761
+ e = frt_expl_new(0.0, "not excluded:");
762
+ frt_expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
763
+ }
764
+ return e;
765
+ }
766
+
767
+ static void rxsc_destroy(FrtScorer *self)
768
+ {
769
+ ReqExclScorer *rxsc = RXSc(self);
770
+ if (rxsc->req_scorer) {
771
+ rxsc->req_scorer->destroy(rxsc->req_scorer);
772
+ }
773
+ if (rxsc->excl_scorer) {
774
+ rxsc->excl_scorer->destroy(rxsc->excl_scorer);
775
+ }
776
+ frt_scorer_destroy_i(self);
777
+ }
778
+
779
+ static FrtScorer *req_excl_scorer_new(FrtScorer *req_scorer, FrtScorer *excl_scorer)
780
+ {
781
+ FrtScorer *self = frt_scorer_new(ReqExclScorer, NULL);
782
+ RXSc(self)->req_scorer = req_scorer;
783
+ RXSc(self)->excl_scorer = excl_scorer;
784
+ RXSc(self)->first_time = true;
785
+
786
+ self->score = &rxsc_score;
787
+ self->next = &rxsc_next;
788
+ self->skip_to = &rxsc_skip_to;
789
+ self->explain = &rxsc_explain;
790
+ self->destroy = &rxsc_destroy;
791
+
792
+ return self;
793
+ }
794
+
795
+ /***************************************************************************
796
+ * NonMatchScorer
797
+ ***************************************************************************/
798
+
799
+ static float nmsc_score(FrtScorer *self)
800
+ {
801
+ (void)self;
802
+ return 0.0;
803
+ }
804
+
805
+ static bool nmsc_next(FrtScorer *self)
806
+ {
807
+ (void)self;
808
+ return false;
809
+ }
810
+
811
+ static bool nmsc_skip_to(FrtScorer *self, int doc_num)
812
+ {
813
+ (void)self; (void)doc_num;
814
+ return false;
815
+ }
816
+
817
+ static FrtExplanation *nmsc_explain(FrtScorer *self, int doc_num)
818
+ {
819
+ (void)self; (void)doc_num;
820
+ return frt_expl_new(0.0, "No documents matched");
821
+ }
822
+
823
+ static FrtScorer *non_matching_scorer_new()
824
+ {
825
+ FrtScorer *self = frt_scorer_new(FrtScorer, NULL);
826
+ self->score = &nmsc_score;
827
+ self->next = &nmsc_next;
828
+ self->skip_to = &nmsc_skip_to;
829
+ self->explain = &nmsc_explain;
830
+
831
+ return self;
832
+ }
833
+
834
+ /***************************************************************************
835
+ * BooleanScorer
836
+ ***************************************************************************/
837
+
838
+ #define BSc(scorer) ((BooleanScorer *)(scorer))
839
+ typedef struct BooleanScorer
840
+ {
841
+ FrtScorer super;
842
+ FrtScorer **required_scorers;
843
+ int rs_cnt;
844
+ int rs_capa;
845
+ FrtScorer **optional_scorers;
846
+ int os_cnt;
847
+ int os_capa;
848
+ FrtScorer **prohibited_scorers;
849
+ int ps_cnt;
850
+ int ps_capa;
851
+ FrtScorer *counting_sum_scorer;
852
+ Coordinator *coordinator;
853
+ } BooleanScorer;
854
+
855
+ static FrtScorer *counting_sum_scorer_create3(BooleanScorer *bsc,
856
+ FrtScorer *req_scorer,
857
+ FrtScorer *opt_scorer)
858
+ {
859
+ if (bsc->ps_cnt == 0) {
860
+ /* no prohibited */
861
+ return req_opt_sum_scorer_new(req_scorer, opt_scorer);
862
+ }
863
+ else if (bsc->ps_cnt == 1) {
864
+ /* 1 prohibited */
865
+ return req_opt_sum_scorer_new(
866
+ req_excl_scorer_new(req_scorer, bsc->prohibited_scorers[0]),
867
+ opt_scorer);
868
+ }
869
+ else {
870
+ /* more prohibited */
871
+ return req_opt_sum_scorer_new(
872
+ req_excl_scorer_new(
873
+ req_scorer,
874
+ disjunction_sum_scorer_new(bsc->prohibited_scorers,
875
+ bsc->ps_cnt, 1)),
876
+ opt_scorer);
877
+ }
878
+ }
879
+
880
+ static FrtScorer *counting_sum_scorer_create2(BooleanScorer *bsc,
881
+ FrtScorer *req_scorer,
882
+ FrtScorer **optional_scorers,
883
+ int os_cnt)
884
+ {
885
+ if (os_cnt == 0) {
886
+ if (bsc->ps_cnt == 0) {
887
+ return req_scorer;
888
+ }
889
+ else if (bsc->ps_cnt == 1) {
890
+ return req_excl_scorer_new(req_scorer,
891
+ bsc->prohibited_scorers[0]);
892
+ }
893
+ else {
894
+ /* no optional, more than 1 prohibited */
895
+ return req_excl_scorer_new(
896
+ req_scorer,
897
+ disjunction_sum_scorer_new(bsc->prohibited_scorers,
898
+ bsc->ps_cnt, 1));
899
+ }
900
+ }
901
+ else if (os_cnt == 1) {
902
+ return counting_sum_scorer_create3(
903
+ bsc,
904
+ req_scorer,
905
+ single_match_scorer_new(bsc->coordinator, optional_scorers[0]));
906
+ }
907
+ else {
908
+ /* more optional */
909
+ return counting_sum_scorer_create3(
910
+ bsc,
911
+ req_scorer,
912
+ counting_disjunction_sum_scorer_new(bsc->coordinator,
913
+ optional_scorers, os_cnt, 1));
914
+ }
915
+ }
916
+
917
+ static FrtScorer *counting_sum_scorer_create(BooleanScorer *bsc)
918
+ {
919
+ if (bsc->rs_cnt == 0) {
920
+ if (bsc->os_cnt == 0) {
921
+ int i;
922
+ /* only prohibited scorers so return non_matching scorer */
923
+ for (i = 0; i < bsc->ps_cnt; i++) {
924
+ bsc->prohibited_scorers[i]->destroy(
925
+ bsc->prohibited_scorers[i]);
926
+ }
927
+ return non_matching_scorer_new();
928
+ }
929
+ else if (bsc->os_cnt == 1) {
930
+ /* the only optional scorer is required */
931
+ return counting_sum_scorer_create2(
932
+ bsc,
933
+ single_match_scorer_new(bsc->coordinator,
934
+ bsc->optional_scorers[0]),
935
+ NULL, 0); /* no optional scorers left */
936
+ }
937
+ else {
938
+ /* more than 1 optional_scorers, no required scorers */
939
+ return counting_sum_scorer_create2(
940
+ bsc,
941
+ counting_disjunction_sum_scorer_new(bsc->coordinator,
942
+ bsc->optional_scorers,
943
+ bsc->os_cnt, 1),
944
+ NULL, 0); /* no optional scorers left */
945
+ }
946
+ }
947
+ else if (bsc->rs_cnt == 1) {
948
+ /* 1 required */
949
+ return counting_sum_scorer_create2(
950
+ bsc,
951
+ single_match_scorer_new(bsc->coordinator, bsc->required_scorers[0]),
952
+ bsc->optional_scorers, bsc->os_cnt);
953
+ }
954
+ else {
955
+ /* more required scorers */
956
+ return counting_sum_scorer_create2(
957
+ bsc,
958
+ counting_conjunction_sum_scorer_new(bsc->coordinator,
959
+ bsc->required_scorers,
960
+ bsc->rs_cnt),
961
+ bsc->optional_scorers, bsc->os_cnt);
962
+ }
963
+ }
964
+
965
+ static FrtScorer *bsc_init_counting_sum_scorer(BooleanScorer *bsc)
966
+ {
967
+ coord_init(bsc->coordinator);
968
+ return bsc->counting_sum_scorer = counting_sum_scorer_create(bsc);
969
+ }
970
+
971
+ static void bsc_add_scorer(FrtScorer *self, FrtScorer *scorer, unsigned int occur)
972
+ {
973
+ BooleanScorer *bsc = BSc(self);
974
+ if (occur != FRT_BC_MUST_NOT) {
975
+ bsc->coordinator->max_coord++;
976
+ }
977
+
978
+ switch (occur) {
979
+ case FRT_BC_MUST:
980
+ FRT_RECAPA(bsc, rs_cnt, rs_capa, required_scorers, FrtScorer *);
981
+ bsc->required_scorers[bsc->rs_cnt++] = scorer;
982
+ break;
983
+ case FRT_BC_SHOULD:
984
+ FRT_RECAPA(bsc, os_cnt, os_capa, optional_scorers, FrtScorer *);
985
+ bsc->optional_scorers[bsc->os_cnt++] = scorer;
986
+ break;
987
+ case FRT_BC_MUST_NOT:
988
+ FRT_RECAPA(bsc, ps_cnt, ps_capa, prohibited_scorers, FrtScorer *);
989
+ bsc->prohibited_scorers[bsc->ps_cnt++] = scorer;
990
+ break;
991
+ default:
992
+ FRT_RAISE(FRT_ARG_ERROR, "Invalid value for :occur. Try :should, :must or "
993
+ ":must_not instead");
994
+ }
995
+ }
996
+
997
+ static float bsc_score(FrtScorer *self)
998
+ {
999
+ BooleanScorer *bsc = BSc(self);
1000
+ Coordinator *coord = bsc->coordinator;
1001
+ float sum;
1002
+ coord->num_matches = 0;
1003
+ sum = bsc->counting_sum_scorer->score(bsc->counting_sum_scorer);
1004
+ return sum * coord->coord_factors[coord->num_matches];
1005
+ }
1006
+
1007
+ static bool bsc_next(FrtScorer *self)
1008
+ {
1009
+ FrtScorer *cnt_sum_sc = BSc(self)->counting_sum_scorer;
1010
+
1011
+ if (!cnt_sum_sc) {
1012
+ cnt_sum_sc = bsc_init_counting_sum_scorer(BSc(self));
1013
+ }
1014
+ if (cnt_sum_sc->next(cnt_sum_sc)) {
1015
+ self->doc = cnt_sum_sc->doc;
1016
+ return true;
1017
+ }
1018
+ else {
1019
+ return false;
1020
+ }
1021
+ }
1022
+
1023
+ static bool bsc_skip_to(FrtScorer *self, int doc_num)
1024
+ {
1025
+ FrtScorer *cnt_sum_sc = BSc(self)->counting_sum_scorer;
1026
+
1027
+ if (!BSc(self)->counting_sum_scorer) {
1028
+ cnt_sum_sc = bsc_init_counting_sum_scorer(BSc(self));
1029
+ }
1030
+ if (cnt_sum_sc->skip_to(cnt_sum_sc, doc_num)) {
1031
+ self->doc = cnt_sum_sc->doc;
1032
+ return true;
1033
+ }
1034
+ else {
1035
+ return false;
1036
+ }
1037
+ }
1038
+
1039
+ static void bsc_destroy(FrtScorer *self)
1040
+ {
1041
+ BooleanScorer *bsc = BSc(self);
1042
+ Coordinator *coord = bsc->coordinator;
1043
+
1044
+ free(coord->coord_factors);
1045
+ free(coord);
1046
+
1047
+ if (bsc->counting_sum_scorer) {
1048
+ bsc->counting_sum_scorer->destroy(bsc->counting_sum_scorer);
1049
+ }
1050
+ else {
1051
+ int i;
1052
+ for (i = 0; i < bsc->rs_cnt; i++) {
1053
+ bsc->required_scorers[i]->destroy(bsc->required_scorers[i]);
1054
+ }
1055
+
1056
+ for (i = 0; i < bsc->os_cnt; i++) {
1057
+ bsc->optional_scorers[i]->destroy(bsc->optional_scorers[i]);
1058
+ }
1059
+
1060
+ for (i = 0; i < bsc->ps_cnt; i++) {
1061
+ bsc->prohibited_scorers[i]->destroy(bsc->prohibited_scorers[i]);
1062
+ }
1063
+ }
1064
+ free(bsc->required_scorers);
1065
+ free(bsc->optional_scorers);
1066
+ free(bsc->prohibited_scorers);
1067
+ frt_scorer_destroy_i(self);
1068
+ }
1069
+
1070
+ static FrtExplanation *bsc_explain(FrtScorer *self, int doc_num)
1071
+ {
1072
+ (void)self; (void)doc_num;
1073
+ return frt_expl_new(0.0, "This explanation is not supported");
1074
+ }
1075
+
1076
+ static FrtScorer *bsc_new(FrtSimilarity *similarity)
1077
+ {
1078
+ FrtScorer *self = frt_scorer_new(BooleanScorer, similarity);
1079
+ BSc(self)->coordinator = coord_new(similarity);
1080
+ BSc(self)->counting_sum_scorer = NULL;
1081
+
1082
+ self->score = &bsc_score;
1083
+ self->next = &bsc_next;
1084
+ self->skip_to = &bsc_skip_to;
1085
+ self->explain = &bsc_explain;
1086
+ self->destroy = &bsc_destroy;
1087
+ return self;
1088
+ }
1089
+
1090
+ /***************************************************************************
1091
+ *
1092
+ * BooleanWeight
1093
+ *
1094
+ ***************************************************************************/
1095
+
1096
+ typedef struct BooleanWeight
1097
+ {
1098
+ FrtWeight w;
1099
+ FrtWeight **weights;
1100
+ int w_cnt;
1101
+ } BooleanWeight;
1102
+
1103
+
1104
+ static float bw_sum_of_squared_weights(FrtWeight *self)
1105
+ {
1106
+ FrtBooleanQuery *bq = BQ(self->query);
1107
+ float sum = 0.0f;
1108
+ int i;
1109
+
1110
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1111
+ if (! bq->clauses[i]->is_prohibited) {
1112
+ FrtWeight *weight = BW(self)->weights[i];
1113
+ /* sum sub-weights */
1114
+ sum += weight->sum_of_squared_weights(weight);
1115
+ }
1116
+ }
1117
+
1118
+ /* boost each sub-weight */
1119
+ sum *= self->value * self->value;
1120
+ return sum;
1121
+ }
1122
+
1123
+ static void bw_normalize(FrtWeight *self, float normalization_factor)
1124
+ {
1125
+ FrtBooleanQuery *bq = BQ(self->query);
1126
+ int i;
1127
+
1128
+ normalization_factor *= self->value; /* multiply by query boost */
1129
+
1130
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1131
+ if (! bq->clauses[i]->is_prohibited) {
1132
+ FrtWeight *weight = BW(self)->weights[i];
1133
+ /* sum sub-weights */
1134
+ weight->normalize(weight, normalization_factor);
1135
+ }
1136
+ }
1137
+ }
1138
+
1139
+ static FrtScorer *bw_scorer(FrtWeight *self, FrtIndexReader *ir)
1140
+ {
1141
+ FrtScorer *bsc = bsc_new(self->similarity);
1142
+ FrtBooleanQuery *bq = BQ(self->query);
1143
+ int i;
1144
+
1145
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1146
+ FrtBooleanClause *clause = bq->clauses[i];
1147
+ FrtWeight *weight = BW(self)->weights[i];
1148
+ FrtScorer *sub_scorer = weight->scorer(weight, ir);
1149
+ if (sub_scorer) {
1150
+ bsc_add_scorer(bsc, sub_scorer, clause->occur);
1151
+ }
1152
+ else if (clause->is_required) {
1153
+ bsc->destroy(bsc);
1154
+ return NULL;
1155
+ }
1156
+ }
1157
+
1158
+ return bsc;
1159
+ }
1160
+
1161
+ static char *bw_to_s(FrtWeight *self)
1162
+ {
1163
+ return frt_strfmt("BooleanWeight(%f)", self->value);
1164
+ }
1165
+
1166
+ static void bw_destroy(FrtWeight *self)
1167
+ {
1168
+ int i;
1169
+
1170
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1171
+ BW(self)->weights[i]->destroy(BW(self)->weights[i]);
1172
+ }
1173
+
1174
+ free(BW(self)->weights);
1175
+ frt_w_destroy(self);
1176
+ }
1177
+
1178
+ static FrtExplanation *bw_explain(FrtWeight *self, FrtIndexReader *ir, int doc_num)
1179
+ {
1180
+ FrtBooleanQuery *bq = BQ(self->query);
1181
+ FrtExplanation *sum_expl = frt_expl_new(0.0f, "sum of:");
1182
+ FrtExplanation *explanation;
1183
+ int coord = 0;
1184
+ int max_coord = 0;
1185
+ float coord_factor = 0.0f;
1186
+ float sum = 0.0f;
1187
+ int i;
1188
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1189
+ FrtWeight *weight = BW(self)->weights[i];
1190
+ FrtBooleanClause *clause = bq->clauses[i];
1191
+ explanation = weight->explain(weight, ir, doc_num);
1192
+ if (!clause->is_prohibited) {
1193
+ max_coord++;
1194
+ }
1195
+ if (explanation->value > 0.0f) {
1196
+ if (!clause->is_prohibited) {
1197
+ frt_expl_add_detail(sum_expl, explanation);
1198
+ sum += explanation->value;
1199
+ coord++;
1200
+ } else {
1201
+ frt_expl_destroy(explanation);
1202
+ frt_expl_destroy(sum_expl);
1203
+ return frt_expl_new(0.0, "match prohibited");
1204
+ }
1205
+ } else if (clause->is_required) {
1206
+ frt_expl_destroy(explanation);
1207
+ frt_expl_destroy(sum_expl);
1208
+ return frt_expl_new(0.0, "match required");
1209
+ } else {
1210
+ frt_expl_destroy(explanation);
1211
+ }
1212
+ }
1213
+ sum_expl->value = sum;
1214
+ if (coord == 1) { /* only one clause matched */
1215
+ explanation = sum_expl; /* eliminate wrapper */
1216
+ frt_ary_size(sum_expl->details) = 0;
1217
+ sum_expl = sum_expl->details[0];
1218
+ frt_expl_destroy(explanation);
1219
+ }
1220
+ coord_factor = frt_sim_coord(self->similarity, coord, max_coord);
1221
+ if (coord_factor == 1.0) { /* coord is no-op */
1222
+ return sum_expl; /* eliminate wrapper */
1223
+ } else {
1224
+ explanation = frt_expl_new(sum * coord_factor, "product of:");
1225
+ frt_expl_add_detail(explanation, sum_expl);
1226
+ frt_expl_add_detail(explanation, frt_expl_new(coord_factor, "coord(%d/%d)",
1227
+ coord, max_coord));
1228
+ return explanation;
1229
+ }
1230
+ }
1231
+
1232
+ static FrtWeight *bw_new(FrtQuery *query, FrtSearcher *searcher)
1233
+ {
1234
+ int i;
1235
+ FrtWeight *self = w_new(BooleanWeight, query);
1236
+
1237
+ BW(self)->w_cnt = BQ(query)->clause_cnt;
1238
+ BW(self)->weights = FRT_ALLOC_N(FrtWeight *, BW(self)->w_cnt);
1239
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1240
+ BW(self)->weights[i] = frt_q_weight(BQ(query)->clauses[i]->query, searcher);
1241
+ }
1242
+
1243
+ self->normalize = &bw_normalize;
1244
+ self->scorer = &bw_scorer;
1245
+ self->explain = &bw_explain;
1246
+ self->to_s = &bw_to_s;
1247
+ self->destroy = &bw_destroy;
1248
+ self->sum_of_squared_weights = &bw_sum_of_squared_weights;
1249
+
1250
+ self->similarity = query->get_similarity(query, searcher);
1251
+ self->value = query->boost;
1252
+
1253
+ return self;
1254
+ }
1255
+
1256
+ /***************************************************************************
1257
+ *
1258
+ * BooleanClause
1259
+ *
1260
+ ***************************************************************************/
1261
+
1262
+ void frt_bc_set_occur(FrtBooleanClause *self, FrtBCType occur)
1263
+ {
1264
+ self->occur = occur;
1265
+ switch (occur) {
1266
+ case FRT_BC_SHOULD:
1267
+ self->is_prohibited = false;
1268
+ self->is_required = false;
1269
+ break;
1270
+ case FRT_BC_MUST:
1271
+ self->is_prohibited = false;
1272
+ self->is_required = true;
1273
+ break;
1274
+ case FRT_BC_MUST_NOT:
1275
+ self->is_prohibited = true;
1276
+ self->is_required = false;
1277
+ break;
1278
+ default:
1279
+ FRT_RAISE(FRT_ARG_ERROR, "Invalid value for :occur. Try :occur => :should, "
1280
+ ":must or :must_not instead");
1281
+ }
1282
+ }
1283
+
1284
+ void frt_bc_deref(FrtBooleanClause *self)
1285
+ {
1286
+ if (--self->ref_cnt <= 0) {
1287
+ frt_q_deref(self->query);
1288
+ free(self);
1289
+ }
1290
+ }
1291
+
1292
+ static unsigned long long bc_hash(FrtBooleanClause *self)
1293
+ {
1294
+ return ((frt_q_hash(self->query) << 2) | self->occur);
1295
+ }
1296
+
1297
+ static int bc_eq(FrtBooleanClause *self, FrtBooleanClause *o)
1298
+ {
1299
+ return ((self->occur == o->occur) && frt_q_eq(self->query, o->query));
1300
+ }
1301
+
1302
+ FrtBooleanClause *frt_bc_new(FrtQuery *query, FrtBCType occur)
1303
+ {
1304
+ FrtBooleanClause *self = FRT_ALLOC(FrtBooleanClause);
1305
+ self->ref_cnt = 1;
1306
+ self->query = query;
1307
+ frt_bc_set_occur(self, occur);
1308
+ return self;
1309
+ }
1310
+
1311
+ /***************************************************************************
1312
+ *
1313
+ * BooleanQuery
1314
+ *
1315
+ ***************************************************************************/
1316
+
1317
+ static FrtMatchVector *bq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv,
1318
+ FrtTermVector *tv)
1319
+ {
1320
+ int i;
1321
+ for (i = BQ(self)->clause_cnt - 1; i >= 0; i--) {
1322
+ if (BQ(self)->clauses[i]->occur != FRT_BC_MUST_NOT) {
1323
+ FrtQuery *q = BQ(self)->clauses[i]->query;
1324
+ q->get_matchv_i(q, mv, tv);
1325
+ }
1326
+ }
1327
+ return mv;
1328
+ }
1329
+
1330
+ static FrtQuery *bq_rewrite(FrtQuery *self, FrtIndexReader *ir)
1331
+ {
1332
+ int i;
1333
+ const int clause_cnt = BQ(self)->clause_cnt;
1334
+ bool rewritten = false;
1335
+ bool has_non_prohibited_clause = false;
1336
+
1337
+ if (clause_cnt == 1) {
1338
+ /* optimize 1-clause queries */
1339
+ FrtBooleanClause *clause = BQ(self)->clauses[0];
1340
+ if (! clause->is_prohibited) {
1341
+ /* just return clause. Re-write first. */
1342
+ FrtQuery *q = clause->query->rewrite(clause->query, ir);
1343
+
1344
+ if (self->boost != 1.0) {
1345
+ /* original_boost is initialized to 0.0. If it has been set to
1346
+ * something else it means this query has already been boosted
1347
+ * before so boost from the original value */
1348
+ if ((q == clause->query) && BQ(self)->original_boost) {
1349
+ /* rewrite was no-op */
1350
+ q->boost = BQ(self)->original_boost * self->boost;
1351
+ }
1352
+ else {
1353
+ /* save original boost in case query is rewritten again */
1354
+ BQ(self)->original_boost = q->boost;
1355
+ q->boost *= self->boost;
1356
+ }
1357
+ }
1358
+
1359
+ return q;
1360
+ }
1361
+ }
1362
+
1363
+ self->ref_cnt++;
1364
+ /* replace each clause's query with its rewritten query */
1365
+ for (i = 0; i < clause_cnt; i++) {
1366
+ FrtBooleanClause *clause = BQ(self)->clauses[i];
1367
+ FrtQuery *rq = clause->query->rewrite(clause->query, ir);
1368
+ /* check for at least one non-prohibited clause */
1369
+ if (clause->is_prohibited == false) has_non_prohibited_clause = true;
1370
+ if (rq != clause->query) {
1371
+ if (!rewritten) {
1372
+ int j;
1373
+ FrtQuery *new_self = frt_q_new(FrtBooleanQuery);
1374
+ memcpy(new_self, self, sizeof(FrtBooleanQuery));
1375
+ BQ(new_self)->clauses = FRT_ALLOC_N(FrtBooleanClause *,
1376
+ BQ(self)->clause_capa);
1377
+ memcpy(BQ(new_self)->clauses, BQ(self)->clauses,
1378
+ BQ(self)->clause_capa * sizeof(FrtBooleanClause *));
1379
+ for (j = 0; j < clause_cnt; j++) {
1380
+ FRT_REF(BQ(self)->clauses[j]);
1381
+ }
1382
+ self->ref_cnt--;
1383
+ self = new_self;
1384
+ self->ref_cnt = 1;
1385
+ rewritten = true;
1386
+ }
1387
+ FRT_DEREF(clause);
1388
+ BQ(self)->clauses[i] = frt_bc_new(rq, clause->occur);
1389
+ } else {
1390
+ FRT_DEREF(rq);
1391
+ }
1392
+ }
1393
+ if (clause_cnt > 0 && !has_non_prohibited_clause) {
1394
+ frt_bq_add_query_nr(self, frt_maq_new(), FRT_BC_MUST);
1395
+ }
1396
+
1397
+ return self;
1398
+ }
1399
+
1400
+ static void bq_extract_terms(FrtQuery *self, FrtHashSet *terms)
1401
+ {
1402
+ int i;
1403
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1404
+ FrtBooleanClause *clause = BQ(self)->clauses[i];
1405
+ clause->query->extract_terms(clause->query, terms);
1406
+ }
1407
+ }
1408
+
1409
+ static char *bq_to_s(FrtQuery *self, FrtSymbol field)
1410
+ {
1411
+ int i;
1412
+ FrtBooleanClause *clause;
1413
+ FrtQuery *sub_query;
1414
+ char *buffer;
1415
+ char *clause_str;
1416
+ int bp = 0;
1417
+ int size = FRT_QUERY_STRING_START_SIZE;
1418
+ int needed;
1419
+ int clause_len;
1420
+
1421
+ buffer = FRT_ALLOC_N(char, size);
1422
+ if (self->boost != 1.0) {
1423
+ buffer[0] = '(';
1424
+ bp++;
1425
+ }
1426
+
1427
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1428
+ clause = BQ(self)->clauses[i];
1429
+ clause_str = clause->query->to_s(clause->query, field);
1430
+ clause_len = (int)strlen(clause_str);
1431
+ needed = clause_len + 5;
1432
+ while ((size - bp) < needed) {
1433
+ size *= 2;
1434
+ FRT_REALLOC_N(buffer, char, size);
1435
+ }
1436
+
1437
+ if (i > 0) {
1438
+ buffer[bp++] = ' ';
1439
+ }
1440
+ if (clause->is_prohibited) {
1441
+ buffer[bp++] = '-';
1442
+ }
1443
+ else if (clause->is_required) {
1444
+ buffer[bp++] = '+';
1445
+ }
1446
+
1447
+ sub_query = clause->query;
1448
+ if (sub_query->type == BOOLEAN_QUERY) {
1449
+ /* wrap sub-bools in parens */
1450
+ buffer[bp++] = '(';
1451
+ memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
1452
+ bp += clause_len;
1453
+ buffer[bp++] = ')';
1454
+ }
1455
+ else {
1456
+ memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
1457
+ bp += clause_len;
1458
+ }
1459
+ free(clause_str);
1460
+ }
1461
+
1462
+ if (self->boost != 1.0) {
1463
+ char *boost_str = frt_strfmt(")^%f", self->boost);
1464
+ int boost_len = (int)strlen(boost_str);
1465
+ FRT_REALLOC_N(buffer, char, bp + boost_len + 1);
1466
+ memcpy(buffer + bp, boost_str, sizeof(char) * boost_len);
1467
+ bp += boost_len;
1468
+ free(boost_str);
1469
+ }
1470
+ buffer[bp] = 0;
1471
+ return buffer;
1472
+ }
1473
+
1474
+ static void bq_destroy(FrtQuery *self)
1475
+ {
1476
+ int i;
1477
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1478
+ frt_bc_deref(BQ(self)->clauses[i]);
1479
+ }
1480
+ free(BQ(self)->clauses);
1481
+ if (BQ(self)->similarity) {
1482
+ BQ(self)->similarity->destroy(BQ(self)->similarity);
1483
+ }
1484
+ frt_q_destroy_i(self);
1485
+ }
1486
+
1487
+ static float bq_coord_disabled(FrtSimilarity *sim, int overlap, int max_overlap)
1488
+ {
1489
+ (void)sim; (void)overlap; (void)max_overlap;
1490
+ return 1.0;
1491
+ }
1492
+
1493
+ static FrtSimilarity *bq_get_similarity(FrtQuery *self, FrtSearcher *searcher)
1494
+ {
1495
+ if (!BQ(self)->similarity) {
1496
+ FrtSimilarity *sim = frt_q_get_similarity_i(self, searcher);
1497
+ BQ(self)->similarity = FRT_ALLOC(FrtSimilarity);
1498
+ memcpy(BQ(self)->similarity, sim, sizeof(FrtSimilarity));
1499
+ BQ(self)->similarity->coord = &bq_coord_disabled;
1500
+ BQ(self)->similarity->destroy = (void (*)(FrtSimilarity *))&free;
1501
+ }
1502
+
1503
+ return BQ(self)->similarity;
1504
+ }
1505
+
1506
+ static unsigned long long bq_hash(FrtQuery *self)
1507
+ {
1508
+ int i;
1509
+ unsigned long long hash = 0;
1510
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1511
+ hash ^= bc_hash(BQ(self)->clauses[i]);
1512
+ }
1513
+ return (hash << 1) | BQ(self)->coord_disabled;
1514
+ }
1515
+
1516
+ static int bq_eq(FrtQuery *self, FrtQuery *o)
1517
+ {
1518
+ int i;
1519
+ FrtBooleanQuery *bq1 = BQ(self);
1520
+ FrtBooleanQuery *bq2 = BQ(o);
1521
+ if ((bq1->coord_disabled != bq2->coord_disabled)
1522
+ || (bq1->max_clause_cnt != bq2->max_clause_cnt)
1523
+ || (bq1->clause_cnt != bq2->clause_cnt)) {
1524
+ return false;
1525
+ }
1526
+
1527
+ for (i = 0; i < bq1->clause_cnt; i++) {
1528
+ if (!bc_eq(bq1->clauses[i], bq2->clauses[i])) {
1529
+ return false;
1530
+ }
1531
+ }
1532
+ return true;
1533
+ }
1534
+
1535
+ FrtQuery *frt_bq_new(bool coord_disabled)
1536
+ {
1537
+ FrtQuery *self = frt_q_new(FrtBooleanQuery);
1538
+ BQ(self)->coord_disabled = coord_disabled;
1539
+ if (coord_disabled) {
1540
+ self->get_similarity = &bq_get_similarity;
1541
+ }
1542
+ BQ(self)->max_clause_cnt = FRT_DEFAULT_MAX_CLAUSE_COUNT;
1543
+ BQ(self)->clause_cnt = 0;
1544
+ BQ(self)->clause_capa = FRT_BOOLEAN_CLAUSES_START_CAPA;
1545
+ BQ(self)->clauses = FRT_ALLOC_N(FrtBooleanClause *, FRT_BOOLEAN_CLAUSES_START_CAPA);
1546
+ BQ(self)->similarity = NULL;
1547
+ BQ(self)->original_boost = 0.0f;
1548
+
1549
+ self->type = BOOLEAN_QUERY;
1550
+ self->rewrite = &bq_rewrite;
1551
+ self->extract_terms = &bq_extract_terms;
1552
+ self->to_s = &bq_to_s;
1553
+ self->hash = &bq_hash;
1554
+ self->eq = &bq_eq;
1555
+ self->destroy_i = &bq_destroy;
1556
+ self->create_weight_i = &bw_new;
1557
+ self->get_matchv_i = &bq_get_matchv_i;
1558
+
1559
+ return self;
1560
+ }
1561
+
1562
+ FrtQuery *frt_bq_new_max(bool coord_disabled, int max)
1563
+ {
1564
+ FrtQuery *q = frt_bq_new(coord_disabled);
1565
+ BQ(q)->max_clause_cnt = max;
1566
+ return q;
1567
+ }
1568
+
1569
+ FrtBooleanClause *frt_bq_add_clause_nr(FrtQuery *self, FrtBooleanClause *bc)
1570
+ {
1571
+ if (BQ(self)->clause_cnt >= BQ(self)->max_clause_cnt) {
1572
+ FRT_RAISE(FRT_STATE_ERROR, "Two many clauses. The max clause limit is set to "
1573
+ "<%d> but your query has <%d> clauses. You can try increasing "
1574
+ ":max_clause_count for the BooleanQuery or using a different "
1575
+ "type of query.", BQ(self)->clause_cnt, BQ(self)->max_clause_cnt);
1576
+ }
1577
+ if (BQ(self)->clause_cnt >= BQ(self)->clause_capa) {
1578
+ BQ(self)->clause_capa *= 2;
1579
+ FRT_REALLOC_N(BQ(self)->clauses, FrtBooleanClause *, BQ(self)->clause_capa);
1580
+ }
1581
+ BQ(self)->clauses[BQ(self)->clause_cnt] = bc;
1582
+ BQ(self)->clause_cnt++;
1583
+ return bc;
1584
+ }
1585
+
1586
+ FrtBooleanClause *frt_bq_add_clause(FrtQuery *self, FrtBooleanClause *bc)
1587
+ {
1588
+ FRT_REF(bc);
1589
+ return frt_bq_add_clause_nr(self, bc);
1590
+ }
1591
+
1592
+ FrtBooleanClause *frt_bq_add_query_nr(FrtQuery *self, FrtQuery *sub_query, FrtBCType occur)
1593
+ {
1594
+ FrtBooleanClause *bc;
1595
+ if (BQ(self)->clause_cnt >= BQ(self)->max_clause_cnt) {
1596
+ FRT_RAISE(FRT_STATE_ERROR, "Two many clauses. The max clause limit is set to "
1597
+ "<%d> but your query has <%d> clauses. You can try increasing "
1598
+ ":max_clause_count for the BooleanQuery or using a different "
1599
+ "type of query.", BQ(self)->clause_cnt, BQ(self)->max_clause_cnt);
1600
+ }
1601
+ bc = frt_bc_new(sub_query, occur);
1602
+ frt_bq_add_clause(self, bc);
1603
+ frt_bc_deref(bc); /* bc was referenced unnecessarily */
1604
+ return bc;
1605
+ }
1606
+
1607
+ FrtBooleanClause *frt_bq_add_query(FrtQuery *self, FrtQuery *sub_query, FrtBCType occur)
1608
+ {
1609
+ FRT_REF(sub_query);
1610
+ return frt_bq_add_query_nr(self, sub_query, occur);
1611
+ }
1612
+