jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
@@ -0,0 +1,149 @@
1
+ #include <string.h>
2
+ #include "priorityqueue.h"
3
+ #include "internal.h"
4
+
5
+ #define START_CAPA 127
6
+
7
+ PriorityQueue *pq_new(int capa, lt_ft less_than, free_ft free_elem)
8
+ {
9
+ PriorityQueue *pq = ALLOC(PriorityQueue);
10
+ pq->size = 0;
11
+ pq->capa = capa;
12
+ pq->mem_capa = (START_CAPA > capa ? capa : START_CAPA) + 1;
13
+ pq->heap = ALLOC_N(void *, pq->mem_capa);
14
+ pq->less_than_i = less_than;
15
+
16
+ /* need to set this yourself if you want to change it */
17
+ pq->free_elem_i = free_elem ? free_elem : &dummy_free;
18
+ return pq;
19
+ }
20
+
21
+ PriorityQueue *pq_clone(PriorityQueue *pq)
22
+ {
23
+ PriorityQueue *new_pq = ALLOC(PriorityQueue);
24
+ memcpy(new_pq, pq, sizeof(PriorityQueue));
25
+ new_pq->heap = ALLOC_N(void *, new_pq->mem_capa);
26
+ memcpy(new_pq->heap, pq->heap, sizeof(void *) * (new_pq->size + 1));
27
+
28
+ return new_pq;
29
+ }
30
+
31
+ void pq_clear(PriorityQueue *pq)
32
+ {
33
+ int i;
34
+ for (i = 1; i <= pq->size; i++) {
35
+ pq->free_elem_i(pq->heap[i]);
36
+ pq->heap[i] = NULL;
37
+ }
38
+ pq->size = 0;
39
+ }
40
+
41
+ void pq_free(PriorityQueue *pq)
42
+ {
43
+ free(pq->heap);
44
+ free(pq);
45
+ }
46
+
47
+ void pq_destroy(PriorityQueue *pq)
48
+ {
49
+ pq_clear(pq);
50
+ pq_free(pq);
51
+ }
52
+
53
+ /**
54
+ * This method is used internally by pq_push. It is similar to pq_down except
55
+ * that where pq_down reorders the elements from the top, pq_up reorders from
56
+ * the bottom.
57
+ *
58
+ * @param pq the PriorityQueue to reorder
59
+ */
60
+ static void pq_up(PriorityQueue *pq)
61
+ {
62
+ void **heap = pq->heap;
63
+ void *node;
64
+ int i = pq->size;
65
+ int j = i >> 1;
66
+
67
+ node = heap[i];
68
+
69
+ while ((j > 0) && pq->less_than_i(node, heap[j])) {
70
+ heap[i] = heap[j];
71
+ i = j;
72
+ j = j >> 1;
73
+ }
74
+ heap[i] = node;
75
+ }
76
+
77
+ void pq_down(PriorityQueue *pq)
78
+ {
79
+ register int i = 1;
80
+ register int j = 2; /* i << 1; */
81
+ register int k = 3; /* j + 1; */
82
+ register int size = pq->size;
83
+ void **heap = pq->heap;
84
+ void *node = heap[i]; /* save top node */
85
+
86
+ if ((k <= size) && (pq->less_than_i(heap[k], heap[j]))) {
87
+ j = k;
88
+ }
89
+
90
+ while ((j <= size) && pq->less_than_i(heap[j], node)) {
91
+ heap[i] = heap[j]; /* shift up child */
92
+ i = j;
93
+ j = i << 1;
94
+ k = j + 1;
95
+ if ((k <= size) && pq->less_than_i(heap[k], heap[j])) {
96
+ j = k;
97
+ }
98
+ }
99
+ heap[i] = node;
100
+ }
101
+
102
+ void pq_push(PriorityQueue *pq, void *elem)
103
+ {
104
+ pq->size++;
105
+ if (pq->size >= pq->mem_capa) {
106
+ pq->mem_capa <<= 1;
107
+ REALLOC_N(pq->heap, void *, pq->mem_capa);
108
+ }
109
+ pq->heap[pq->size] = elem;
110
+ pq_up(pq);
111
+ }
112
+
113
+ PriorityQueueInsertEnum pq_insert(PriorityQueue *pq,
114
+ void *elem)
115
+ {
116
+ if (pq->size < pq->capa) {
117
+ pq_push(pq, elem);
118
+ return PQ_ADDED;
119
+ }
120
+
121
+ if (pq->size > 0 && pq->less_than_i(pq->heap[1], elem)) {
122
+ pq->free_elem_i(pq->heap[1]);
123
+ pq->heap[1] = elem;
124
+ pq_down(pq);
125
+ return PQ_INSERTED;
126
+ }
127
+
128
+ pq->free_elem_i(elem);
129
+ return PQ_DROPPED;
130
+ }
131
+
132
+ void *pq_top(PriorityQueue *pq)
133
+ {
134
+ return pq->size ? pq->heap[1] : NULL;
135
+ }
136
+
137
+ void *pq_pop(PriorityQueue *pq)
138
+ {
139
+ if (pq->size > 0) {
140
+ void *result = pq->heap[1]; /* save first value */
141
+ pq->heap[1] = pq->heap[pq->size]; /* move last to first */
142
+ pq->heap[pq->size] = NULL;
143
+ pq->size--;
144
+ pq_down(pq); /* adjust heap */
145
+ return result;
146
+ }
147
+ return NULL;
148
+ }
149
+
@@ -0,0 +1,155 @@
1
+ #ifndef FRT_PRIORITYQUEUE_H
2
+ #define FRT_PRIORITYQUEUE_H
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include "global.h"
9
+
10
+ typedef bool (*frt_lt_ft)(const void *p1, const void *p2);
11
+
12
+ /**
13
+ * A PriorityQueue has a fixed size and contains a less_than function and a
14
+ * free_elem function specific to the data type to be stored in the queue.
15
+ */
16
+ typedef struct FrtPriorityQueue
17
+ {
18
+ int size;
19
+ int capa;
20
+ int mem_capa;
21
+ void **heap;
22
+ frt_lt_ft less_than_i;
23
+ frt_free_ft free_elem_i;
24
+ } FrtPriorityQueue;
25
+
26
+ /**
27
+ * Create a new PriorityQueue setting the less_than and free_elem for this
28
+ * specific PriorityQueue.
29
+ *
30
+ * @param capa the capacity of the PriorityQueue. As more than the capacity is
31
+ * added to the queue the least valued elements drop off the bottom.
32
+ * @param less_than the function to determine whether one value is less than
33
+ * another for this particular PriorityQueue
34
+ * @param free_elem the function to free the elements in the PriorityQueue
35
+ * when it is destroyed or there is insertion overflow
36
+ * @return a newly allocated PriorityQueue
37
+ */
38
+ extern FrtPriorityQueue *frt_pq_new(int capa,
39
+ frt_lt_ft less_than,
40
+ frt_free_ft free_elem);
41
+
42
+ /**
43
+ * Allocate a clone of the PriorityQueue. This can be used if you want to scan
44
+ * through all elements of the PriorityQueue but you don't want to have to
45
+ * remove the all and add them all again.
46
+ *
47
+ * @param pq the priority queue to clone
48
+ * @return a clone of the original priority queue
49
+ */
50
+ extern FrtPriorityQueue *frt_pq_clone(FrtPriorityQueue *pq);
51
+
52
+ /**
53
+ * Clear all elements from the PriorityQueue and reset the size to 0. When
54
+ * the elements are removed from the PriorityQueue, free_elem is used to free
55
+ * them, unless it was set to NULL in which case nothing will happen to them.
56
+ *
57
+ * @param self the PriorityQueue to clear
58
+ */
59
+ extern void frt_pq_clear(FrtPriorityQueue *self);
60
+
61
+ /**
62
+ * Free the memory allocated to the PriorityQueue. This function does nothing
63
+ * to the elements in the PriorityQueue itself. To destroy them also, use
64
+ * pq_destroy.
65
+ *
66
+ * @param self the PriorityQueue to free
67
+ */
68
+ extern void frt_pq_free(FrtPriorityQueue *self);
69
+
70
+ /**
71
+ * Destroy the PriorityQueue, freeing all memory allocated to it and also
72
+ * destroying all the elements contained by it. This method is equivalent to
73
+ * calling pq_clear followed by pq_free.
74
+ *
75
+ * @param the PriorityQueue to destroy
76
+ */
77
+ extern void frt_pq_destroy(FrtPriorityQueue *self);
78
+
79
+ /**
80
+ * Reorder the PriorityQueue after the top element has been modified. This
81
+ * method is used especially when the PriorityQueue contains a queue of
82
+ * iterators. When the top iterator is incremented you should call this
83
+ * method.
84
+ *
85
+ * @param self the PriorityQueue to reorder
86
+ */
87
+ extern void frt_pq_down(FrtPriorityQueue *self);
88
+
89
+ /**
90
+ * Add another element to the PriorityQueue. This method should only be used
91
+ * when the PriorityQueue has enough space allocated to hold all elements
92
+ * added. If there is a chance that you will add more than the amount you have
93
+ * allocated then you should use pq_insert. pq_insert will handle insertion
94
+ * overflow.
95
+ *
96
+ * @param self the PriorityQueue to add the element to
97
+ * @param elem the element to add to the PriorityQueue
98
+ */
99
+ extern void frt_pq_push(FrtPriorityQueue *self, void *elem);
100
+
101
+ typedef enum {
102
+ FRT_PQ_DROPPED = 0,
103
+ FRT_PQ_ADDED,
104
+ FRT_PQ_INSERTED
105
+ } FrtPriorityQueueInsertEnum;
106
+
107
+ /**
108
+ * Add another element to the PriorityQueue. Unlike pq_push, this method
109
+ * handles insertion overflow. That is, when you insert more elements than the
110
+ * capacity of the PriorityQueue, the elements are dropped off the bottom and
111
+ * freed using the free_elem function.
112
+ *
113
+ * @param self the PriorityQueue to add the element to
114
+ * @param elem the element to add to the PriorityQueue
115
+ * @returns one of three values;
116
+ * <pre>
117
+ * 0 == PQ_DROPPED the element was too small (according to the less_than
118
+ * function) so it was destroyed
119
+ * 1 == PQ_ADDED the element was successfully added
120
+ * 2 == PQ_INSERTED the element was successfully added after another
121
+ * element was dropped and destroyed
122
+ * </pre>
123
+ */
124
+ extern FrtPriorityQueueInsertEnum frt_pq_insert(FrtPriorityQueue *self,
125
+ void *elem);
126
+
127
+ /**
128
+ * Get the top element in the PriorityQueue.
129
+ *
130
+ * @param self the PriorityQueue to get the top from
131
+ * @return the top element in the PriorityQueue
132
+ */
133
+ extern void *frt_pq_top(FrtPriorityQueue *self);
134
+
135
+ /**
136
+ * Remove and return the top element in the PriorityQueue.
137
+ *
138
+ * @param self the PriorityQueue to get the top from
139
+ * @return the top element in the PriorityQueue
140
+ */
141
+ extern void *frt_pq_pop(FrtPriorityQueue *self);
142
+
143
+ /**
144
+ * Return true if the PriorityQueue is full.
145
+ *
146
+ * @param self the PriorityQueue to test
147
+ * @return true if the PriorityQueue is full.
148
+ */
149
+ #define frt_pq_full(pq) ((pq)->size == (pq)->capa)
150
+
151
+ #ifdef __cplusplus
152
+ } // extern "C"
153
+ #endif
154
+
155
+ #endif
data/ext/q_boolean.c ADDED
@@ -0,0 +1,1621 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+ #include "array.h"
4
+ #include "internal.h"
5
+
6
+ #define BQ(query) ((BooleanQuery *)(query))
7
+ #define BW(weight) ((BooleanWeight *)(weight))
8
+
9
+ /***************************************************************************
10
+ *
11
+ * BooleanScorer
12
+ *
13
+ ***************************************************************************/
14
+
15
+ /***************************************************************************
16
+ * Coordinator
17
+ ***************************************************************************/
18
+
19
+ typedef struct Coordinator
20
+ {
21
+ int max_coord;
22
+ float *coord_factors;
23
+ Similarity *similarity;
24
+ int num_matches;
25
+ } Coordinator;
26
+
27
+ static Coordinator *coord_new(Similarity *similarity)
28
+ {
29
+ Coordinator *self = ALLOC_AND_ZERO(Coordinator);
30
+ self->similarity = similarity;
31
+ return self;
32
+ }
33
+
34
+ static Coordinator *coord_init(Coordinator *self)
35
+ {
36
+ int i;
37
+ self->coord_factors = ALLOC_N(float, self->max_coord + 1);
38
+
39
+ for (i = 0; i <= self->max_coord; i++) {
40
+ self->coord_factors[i]
41
+ = sim_coord(self->similarity, i, self->max_coord);
42
+ }
43
+
44
+ return self;
45
+ }
46
+
47
+ /***************************************************************************
48
+ * DisjunctionSumScorer
49
+ ***************************************************************************/
50
+
51
+ #define DSSc(scorer) ((DisjunctionSumScorer *)(scorer))
52
+
53
+ typedef struct DisjunctionSumScorer
54
+ {
55
+ Scorer super;
56
+ float cum_score;
57
+ int num_matches;
58
+ int min_num_matches;
59
+ Scorer **sub_scorers;
60
+ int ss_cnt;
61
+ PriorityQueue *scorer_queue;
62
+ Coordinator *coordinator;
63
+ } DisjunctionSumScorer;
64
+
65
+ static float dssc_score(Scorer *self)
66
+ {
67
+ return DSSc(self)->cum_score;
68
+ }
69
+
70
+ static void dssc_init_scorer_queue(DisjunctionSumScorer *dssc)
71
+ {
72
+ int i;
73
+ Scorer *sub_scorer;
74
+ PriorityQueue *pq = dssc->scorer_queue
75
+ = pq_new(dssc->ss_cnt, (lt_ft)&scorer_doc_less_than, NULL);
76
+
77
+ for (i = 0; i < dssc->ss_cnt; i++) {
78
+ sub_scorer = dssc->sub_scorers[i];
79
+ if (sub_scorer->next(sub_scorer)) {
80
+ pq_insert(pq, sub_scorer);
81
+ }
82
+ }
83
+ }
84
+
85
+ static bool dssc_advance_after_current(Scorer *self)
86
+ {
87
+ DisjunctionSumScorer *dssc = DSSc(self);
88
+ PriorityQueue *scorer_queue = dssc->scorer_queue;
89
+
90
+ /* repeat until minimum number of matches is found */
91
+ while (true) {
92
+ Scorer *top = (Scorer *)pq_top(scorer_queue);
93
+ self->doc = top->doc;
94
+ dssc->cum_score = top->score(top);
95
+ dssc->num_matches = 1;
96
+ /* Until all sub-scorers are after self->doc */
97
+ while (true) {
98
+ if (top->next(top)) {
99
+ pq_down(scorer_queue);
100
+ }
101
+ else {
102
+ pq_pop(scorer_queue);
103
+ if (scorer_queue->size
104
+ < (dssc->min_num_matches - dssc->num_matches)) {
105
+ /* Not enough subscorers left for a match on this
106
+ * document, also no more chance of any further match */
107
+ return false;
108
+ }
109
+ if (scorer_queue->size == 0) {
110
+ /* nothing more to advance, check for last match. */
111
+ break;
112
+ }
113
+ }
114
+ top = (Scorer *)pq_top(scorer_queue);
115
+ if (top->doc != self->doc) {
116
+ /* All remaining subscorers are after self->doc */
117
+ break;
118
+ }
119
+ else {
120
+ dssc->cum_score += top->score(top);
121
+ dssc->num_matches++;
122
+ }
123
+ }
124
+
125
+ if (dssc->num_matches >= dssc->min_num_matches) {
126
+ return true;
127
+ }
128
+ else if (scorer_queue->size < dssc->min_num_matches) {
129
+ return false;
130
+ }
131
+ }
132
+ }
133
+
134
+ static bool dssc_next(Scorer *self)
135
+ {
136
+ if (DSSc(self)->scorer_queue == NULL) {
137
+ dssc_init_scorer_queue(DSSc(self));
138
+ }
139
+
140
+ if (DSSc(self)->scorer_queue->size < DSSc(self)->min_num_matches) {
141
+ return false;
142
+ }
143
+ else {
144
+ return dssc_advance_after_current(self);
145
+ }
146
+ }
147
+
148
+ static bool dssc_skip_to(Scorer *self, int doc_num)
149
+ {
150
+ DisjunctionSumScorer *dssc = DSSc(self);
151
+ PriorityQueue *scorer_queue = dssc->scorer_queue;
152
+
153
+ if (scorer_queue == NULL) {
154
+ dssc_init_scorer_queue(dssc);
155
+ scorer_queue = dssc->scorer_queue;
156
+ }
157
+
158
+ if (scorer_queue->size < dssc->min_num_matches) {
159
+ return false;
160
+ }
161
+ if (doc_num <= self->doc) {
162
+ doc_num = self->doc + 1;
163
+ }
164
+ while (true) {
165
+ Scorer *top = (Scorer *)pq_top(scorer_queue);
166
+ if (top->doc >= doc_num) {
167
+ return dssc_advance_after_current(self);
168
+ }
169
+ else if (top->skip_to(top, doc_num)) {
170
+ pq_down(scorer_queue);
171
+ }
172
+ else {
173
+ pq_pop(scorer_queue);
174
+ if (scorer_queue->size < dssc->min_num_matches) {
175
+ return false;
176
+ }
177
+ }
178
+ }
179
+ }
180
+
181
+ static Explanation *dssc_explain(Scorer *self, int doc_num)
182
+ {
183
+ int i;
184
+ DisjunctionSumScorer *dssc = DSSc(self);
185
+ Scorer *sub_scorer;
186
+ Explanation *e
187
+ = expl_new(0.0, "At least %d of:", dssc->min_num_matches);
188
+ for (i = 0; i < dssc->ss_cnt; i++) {
189
+ sub_scorer = dssc->sub_scorers[i];
190
+ expl_add_detail(e, sub_scorer->explain(sub_scorer, doc_num));
191
+ }
192
+ return e;
193
+ }
194
+
195
+ static void dssc_destroy(Scorer *self)
196
+ {
197
+ DisjunctionSumScorer *dssc = DSSc(self);
198
+ int i;
199
+ for (i = 0; i < dssc->ss_cnt; i++) {
200
+ dssc->sub_scorers[i]->destroy(dssc->sub_scorers[i]);
201
+ }
202
+ if (dssc->scorer_queue) {
203
+ pq_destroy(dssc->scorer_queue);
204
+ }
205
+ scorer_destroy_i(self);
206
+ }
207
+
208
+ static Scorer *disjunction_sum_scorer_new(Scorer **sub_scorers, int ss_cnt,
209
+ int min_num_matches)
210
+ {
211
+ Scorer *self = scorer_new(DisjunctionSumScorer, NULL);
212
+ DSSc(self)->ss_cnt = ss_cnt;
213
+
214
+ /* The document number of the current match */
215
+ self->doc = -1;
216
+ DSSc(self)->cum_score = -1.0;
217
+
218
+ /* The number of subscorers that provide the current match. */
219
+ DSSc(self)->num_matches = -1;
220
+ DSSc(self)->coordinator = NULL;
221
+
222
+ #ifdef DEBUG
223
+ if (min_num_matches <= 0) {
224
+ RAISE(ARG_ERROR, "The min_num_matches value <%d> should not be less "
225
+ "than 0\n", min_num_matches);
226
+ }
227
+ if (ss_cnt <= 1) {
228
+ RAISE(ARG_ERROR, "There should be at least 2 sub_scorers in a "
229
+ "DiscjunctionSumScorer. <%d> is not enough", ss_cnt);
230
+ }
231
+ #endif
232
+
233
+ DSSc(self)->min_num_matches = min_num_matches;
234
+ DSSc(self)->sub_scorers = sub_scorers;
235
+ DSSc(self)->scorer_queue = NULL;
236
+
237
+ self->score = &dssc_score;
238
+ self->next = &dssc_next;
239
+ self->skip_to = &dssc_skip_to;
240
+ self->explain = &dssc_explain;
241
+ self->destroy = &dssc_destroy;
242
+
243
+ return self;
244
+ }
245
+
246
+ static float cdssc_score(Scorer *self)
247
+ {
248
+ DSSc(self)->coordinator->num_matches += DSSc(self)->num_matches;
249
+ return DSSc(self)->cum_score;
250
+ }
251
+
252
+ static Scorer *counting_disjunction_sum_scorer_new(
253
+ Coordinator *coordinator, Scorer **sub_scorers, int ss_cnt,
254
+ int min_num_matches)
255
+ {
256
+ Scorer *self = disjunction_sum_scorer_new(sub_scorers, ss_cnt,
257
+ min_num_matches);
258
+ DSSc(self)->coordinator = coordinator;
259
+ self->score = &cdssc_score;
260
+ return self;
261
+ }
262
+
263
+ /***************************************************************************
264
+ * ConjunctionScorer
265
+ ***************************************************************************/
266
+
267
+ #define CSc(scorer) ((ConjunctionScorer *)(scorer))
268
+
269
+ typedef struct ConjunctionScorer
270
+ {
271
+ Scorer super;
272
+ bool first_time : 1;
273
+ bool more : 1;
274
+ float coord;
275
+ Scorer **sub_scorers;
276
+ int ss_cnt;
277
+ int first_idx;
278
+ Coordinator *coordinator;
279
+ int last_scored_doc;
280
+ } ConjunctionScorer;
281
+
282
+ static void csc_sort_scorers(ConjunctionScorer *csc)
283
+ {
284
+ int i;
285
+ Scorer *current = csc->sub_scorers[0], *previous;
286
+ for (i = 1; i < csc->ss_cnt; i++) {
287
+ previous = current;
288
+ current = csc->sub_scorers[i];
289
+ if (previous->doc > current->doc) {
290
+ if (!current->skip_to(current, previous->doc)) {
291
+ csc->more = false;
292
+ return;
293
+ }
294
+ }
295
+ }
296
+ /*qsort(csc->sub_scorers, csc->ss_cnt, sizeof(Scorer *), &scorer_doc_cmp);*/
297
+ csc->first_idx = 0;
298
+ }
299
+
300
+ static void csc_init(Scorer *self, bool init_scorers)
301
+ {
302
+ ConjunctionScorer *csc = CSc(self);
303
+ const int sub_sc_cnt = csc->ss_cnt;
304
+
305
+ /* compute coord factor */
306
+ csc->coord = sim_coord(self->similarity, sub_sc_cnt, sub_sc_cnt);
307
+
308
+ csc->more = (sub_sc_cnt > 0);
309
+
310
+ if (init_scorers) {
311
+ int i;
312
+ /* move each scorer to its first entry */
313
+ for (i = 0; i < sub_sc_cnt; i++) {
314
+ Scorer *sub_scorer = csc->sub_scorers[i];
315
+ if (!csc->more) {
316
+ break;
317
+ }
318
+ csc->more = sub_scorer->next(sub_scorer);
319
+ }
320
+ if (csc->more) {
321
+ csc_sort_scorers(csc);
322
+ }
323
+ }
324
+
325
+ csc->first_time = false;
326
+ }
327
+
328
+ static float csc_score(Scorer *self)
329
+ {
330
+ ConjunctionScorer *csc = CSc(self);
331
+ const int sub_sc_cnt = csc->ss_cnt;
332
+ float score = 0.0; /* sum scores */
333
+ int i;
334
+ for (i = 0; i < sub_sc_cnt; i++) {
335
+ Scorer *sub_scorer = csc->sub_scorers[i];
336
+ score += sub_scorer->score(sub_scorer);
337
+ }
338
+ score *= csc->coord;
339
+ return score;
340
+ }
341
+
342
+ static bool csc_do_next(Scorer *self)
343
+ {
344
+ ConjunctionScorer *csc = CSc(self);
345
+ const int sub_sc_cnt = csc->ss_cnt;
346
+ int first_idx = csc->first_idx;
347
+ Scorer *first_sc = csc->sub_scorers[first_idx];
348
+ Scorer *last_sc = csc->sub_scorers[PREV_NUM(first_idx, sub_sc_cnt)];
349
+
350
+ /* skip to doc with all clauses */
351
+ while (csc->more && (first_sc->doc < last_sc->doc)) {
352
+ /* skip first upto last */
353
+ csc->more = first_sc->skip_to(first_sc, last_sc->doc);
354
+ /* move first to last */
355
+ last_sc = first_sc;
356
+ first_idx = NEXT_NUM(first_idx, sub_sc_cnt);
357
+ first_sc = csc->sub_scorers[first_idx];
358
+ }
359
+ self->doc = first_sc->doc;
360
+ csc->first_idx = first_idx;
361
+ return csc->more;
362
+ }
363
+
364
+ static bool csc_next(Scorer *self)
365
+ {
366
+ ConjunctionScorer *csc = CSc(self);
367
+ if (csc->first_time) {
368
+ csc_init(self, true);
369
+ }
370
+ else if (csc->more) {
371
+ /* trigger further scanning */
372
+ const int last_idx = PREV_NUM(csc->first_idx, csc->ss_cnt);
373
+ Scorer *sub_scorer = csc->sub_scorers[last_idx];
374
+ csc->more = sub_scorer->next(sub_scorer);
375
+ }
376
+ return csc_do_next(self);
377
+ }
378
+
379
+ static bool csc_skip_to(Scorer *self, int doc_num)
380
+ {
381
+ ConjunctionScorer *csc = CSc(self);
382
+ const int sub_sc_cnt = csc->ss_cnt;
383
+ int i;
384
+ bool more = csc->more;
385
+
386
+ if (csc->first_time) {
387
+ csc_init(self, false);
388
+ }
389
+
390
+ for (i = 0; i < sub_sc_cnt; i++) {
391
+ if (!more) {
392
+ break;
393
+ }
394
+ else {
395
+ Scorer *sub_scorer = csc->sub_scorers[i];
396
+ more = sub_scorer->skip_to(sub_scorer, doc_num);
397
+ }
398
+ }
399
+ if (more) {
400
+ /* resort the scorers */
401
+ csc_sort_scorers(csc);
402
+ }
403
+
404
+ csc->more = more;
405
+ return csc_do_next(self);
406
+ }
407
+
408
+ static void csc_destroy(Scorer *self)
409
+ {
410
+ ConjunctionScorer *csc = CSc(self);
411
+ const int sub_sc_cnt = csc->ss_cnt;
412
+ int i;
413
+ for (i = 0; i < sub_sc_cnt; i++) {
414
+ csc->sub_scorers[i]->destroy(csc->sub_scorers[i]);
415
+ }
416
+ free(csc->sub_scorers);
417
+ scorer_destroy_i(self);
418
+ }
419
+
420
+ static Scorer *conjunction_scorer_new(Similarity *similarity)
421
+ {
422
+ Scorer *self = scorer_new(ConjunctionScorer, similarity);
423
+
424
+ CSc(self)->first_time = true;
425
+ CSc(self)->more = true;
426
+ CSc(self)->coordinator = NULL;
427
+
428
+ self->score = &csc_score;
429
+ self->next = &csc_next;
430
+ self->skip_to = &csc_skip_to;
431
+ self->destroy = &csc_destroy;
432
+
433
+ return self;
434
+ }
435
+
436
+ static float ccsc_score(Scorer *self)
437
+ {
438
+ ConjunctionScorer *csc = CSc(self);
439
+
440
+ int doc;
441
+ if ((doc = self->doc) > csc->last_scored_doc) {
442
+ csc->last_scored_doc = doc;
443
+ csc->coordinator->num_matches += csc->ss_cnt;
444
+ }
445
+
446
+ return csc_score(self);
447
+ }
448
+
449
+ static Scorer *counting_conjunction_sum_scorer_new(
450
+ Coordinator *coordinator, Scorer **sub_scorers, int ss_cnt)
451
+ {
452
+ Scorer *self = conjunction_scorer_new(sim_create_default());
453
+ ConjunctionScorer *csc = CSc(self);
454
+ csc->coordinator = coordinator;
455
+ csc->last_scored_doc = -1;
456
+ csc->sub_scorers = ALLOC_N(Scorer *, ss_cnt);
457
+ memcpy(csc->sub_scorers, sub_scorers, sizeof(Scorer *) * ss_cnt);
458
+ csc->ss_cnt = ss_cnt;
459
+
460
+ self->score = &ccsc_score;
461
+
462
+ return self;
463
+ }
464
+
465
+ /***************************************************************************
466
+ * SingleMatchScorer
467
+ ***************************************************************************/
468
+
469
+ #define SMSc(scorer) ((SingleMatchScorer *)(scorer))
470
+
471
+ typedef struct SingleMatchScorer
472
+ {
473
+ Scorer super;
474
+ Coordinator *coordinator;
475
+ Scorer *scorer;
476
+ } SingleMatchScorer;
477
+
478
+
479
+ static float smsc_score(Scorer *self)
480
+ {
481
+ SMSc(self)->coordinator->num_matches++;
482
+ return SMSc(self)->scorer->score(SMSc(self)->scorer);
483
+ }
484
+
485
+ static bool smsc_next(Scorer *self)
486
+ {
487
+ Scorer *scorer = SMSc(self)->scorer;
488
+ if (scorer->next(scorer)) {
489
+ self->doc = scorer->doc;
490
+ return true;
491
+ }
492
+ return false;
493
+ }
494
+
495
+ static bool smsc_skip_to(Scorer *self, int doc_num)
496
+ {
497
+ Scorer *scorer = SMSc(self)->scorer;
498
+ if (scorer->skip_to(scorer, doc_num)) {
499
+ self->doc = scorer->doc;
500
+ return true;
501
+ }
502
+ return false;
503
+ }
504
+
505
+ static Explanation *smsc_explain(Scorer *self, int doc_num)
506
+ {
507
+ Scorer *scorer = SMSc(self)->scorer;
508
+ return scorer->explain(scorer, doc_num);
509
+ }
510
+
511
+ static void smsc_destroy(Scorer *self)
512
+ {
513
+ Scorer *scorer = SMSc(self)->scorer;
514
+ scorer->destroy(scorer);
515
+ scorer_destroy_i(self);
516
+ }
517
+
518
+ static Scorer *single_match_scorer_new(Coordinator *coordinator,
519
+ Scorer *scorer)
520
+ {
521
+ Scorer *self = scorer_new(SingleMatchScorer, scorer->similarity);
522
+ SMSc(self)->coordinator = coordinator;
523
+ SMSc(self)->scorer = scorer;
524
+
525
+ self->score = &smsc_score;
526
+ self->next = &smsc_next;
527
+ self->skip_to = &smsc_skip_to;
528
+ self->explain = &smsc_explain;
529
+ self->destroy = &smsc_destroy;
530
+ return self;
531
+ }
532
+
533
+ /***************************************************************************
534
+ * ReqOptSumScorer
535
+ ***************************************************************************/
536
+
537
+ #define ROSSc(scorer) ((ReqOptSumScorer *)(scorer))
538
+
539
+ typedef struct ReqOptSumScorer
540
+ {
541
+ Scorer super;
542
+ Scorer *req_scorer;
543
+ Scorer *opt_scorer;
544
+ bool first_time_opt;
545
+ } ReqOptSumScorer;
546
+
547
+ static float rossc_score(Scorer *self)
548
+ {
549
+ ReqOptSumScorer *rossc = ROSSc(self);
550
+ Scorer *req_scorer = rossc->req_scorer;
551
+ Scorer *opt_scorer = rossc->opt_scorer;
552
+ int cur_doc = req_scorer->doc;
553
+ float req_score = req_scorer->score(req_scorer);
554
+
555
+ if (rossc->first_time_opt) {
556
+ rossc->first_time_opt = false;
557
+ if (! opt_scorer->skip_to(opt_scorer, cur_doc)) {
558
+ SCORER_NULLIFY(rossc->opt_scorer);
559
+ return req_score;
560
+ }
561
+ }
562
+ else if (opt_scorer == NULL) {
563
+ return req_score;
564
+ }
565
+ else if ((opt_scorer->doc < cur_doc)
566
+ && ! opt_scorer->skip_to(opt_scorer, cur_doc)) {
567
+ SCORER_NULLIFY(rossc->opt_scorer);
568
+ return req_score;
569
+ }
570
+ /* assert (@opt_scorer != nil) and (@opt_scorer.doc() >= cur_doc) */
571
+ return (opt_scorer->doc == cur_doc)
572
+ ? req_score + opt_scorer->score(opt_scorer)
573
+ : req_score;
574
+ }
575
+
576
+ static bool rossc_next(Scorer *self)
577
+ {
578
+ Scorer *req_scorer = ROSSc(self)->req_scorer;
579
+ if (req_scorer->next(req_scorer)) {
580
+ self->doc = req_scorer->doc;
581
+ return true;
582
+ }
583
+ return false;
584
+ }
585
+
586
+ static bool rossc_skip_to(Scorer *self, int doc_num)
587
+ {
588
+ Scorer *req_scorer = ROSSc(self)->req_scorer;
589
+ if (req_scorer->skip_to(req_scorer, doc_num)) {
590
+ self->doc = req_scorer->doc;
591
+ return true;
592
+ }
593
+ return false;
594
+ }
595
+
596
+ static Explanation *rossc_explain(Scorer *self, int doc_num)
597
+ {
598
+ Scorer *req_scorer = ROSSc(self)->req_scorer;
599
+ Scorer *opt_scorer = ROSSc(self)->opt_scorer;
600
+
601
+ Explanation *e = expl_new(self->score(self),"required, optional:");
602
+ expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
603
+ expl_add_detail(e, opt_scorer->explain(opt_scorer, doc_num));
604
+ return e;
605
+ }
606
+
607
+ static void rossc_destroy(Scorer *self)
608
+ {
609
+ ReqOptSumScorer *rossc = ROSSc(self);
610
+ if (rossc->req_scorer) {
611
+ rossc->req_scorer->destroy(rossc->req_scorer);
612
+ }
613
+ if (rossc->opt_scorer) {
614
+ rossc->opt_scorer->destroy(rossc->opt_scorer);
615
+ }
616
+ scorer_destroy_i(self);
617
+ }
618
+
619
+
620
+ static Scorer *req_opt_sum_scorer_new(Scorer *req_scorer, Scorer *opt_scorer)
621
+ {
622
+ Scorer *self = scorer_new(ReqOptSumScorer, NULL);
623
+
624
+ ROSSc(self)->req_scorer = req_scorer;
625
+ ROSSc(self)->opt_scorer = opt_scorer;
626
+ ROSSc(self)->first_time_opt = true;
627
+
628
+ self->score = &rossc_score;
629
+ self->next = &rossc_next;
630
+ self->skip_to = &rossc_skip_to;
631
+ self->explain = &rossc_explain;
632
+ self->destroy = &rossc_destroy;
633
+
634
+ return self;
635
+ }
636
+
637
+ /***************************************************************************
638
+ * ReqExclScorer
639
+ ***************************************************************************/
640
+
641
+ #define RXSc(scorer) ((ReqExclScorer *)(scorer))
642
+ typedef struct ReqExclScorer
643
+ {
644
+ Scorer super;
645
+ Scorer *req_scorer;
646
+ Scorer *excl_scorer;
647
+ bool first_time;
648
+ } ReqExclScorer;
649
+
650
+ static bool rxsc_to_non_excluded(Scorer *self)
651
+ {
652
+ Scorer *req_scorer = RXSc(self)->req_scorer;
653
+ Scorer *excl_scorer = RXSc(self)->excl_scorer;
654
+ int excl_doc = excl_scorer->doc, req_doc;
655
+
656
+ do {
657
+ /* may be excluded */
658
+ req_doc = req_scorer->doc;
659
+ if (req_doc < excl_doc) {
660
+ /* req_scorer advanced to before excl_scorer, ie. not excluded */
661
+ self->doc = req_doc;
662
+ return true;
663
+ }
664
+ else if (req_doc > excl_doc) {
665
+ if (! excl_scorer->skip_to(excl_scorer, req_doc)) {
666
+ /* emptied, no more exclusions */
667
+ SCORER_NULLIFY(RXSc(self)->excl_scorer);
668
+ self->doc = req_doc;
669
+ return true;
670
+ }
671
+ excl_doc = excl_scorer->doc;
672
+ if (excl_doc > req_doc) {
673
+ self->doc = req_doc;
674
+ return true; /* not excluded */
675
+ }
676
+ }
677
+ } while (req_scorer->next(req_scorer));
678
+ /* emptied, nothing left */
679
+ SCORER_NULLIFY(RXSc(self)->req_scorer);
680
+ return false;
681
+ }
682
+
683
+ static bool rxsc_next(Scorer *self)
684
+ {
685
+ ReqExclScorer *rxsc = RXSc(self);
686
+ Scorer *req_scorer = rxsc->req_scorer;
687
+ Scorer *excl_scorer = rxsc->excl_scorer;
688
+
689
+ if (rxsc->first_time) {
690
+ if (! excl_scorer->next(excl_scorer)) {
691
+ /* emptied at start */
692
+ SCORER_NULLIFY(rxsc->excl_scorer);
693
+ excl_scorer = NULL;
694
+ }
695
+ rxsc->first_time = false;
696
+ }
697
+ if (req_scorer == NULL) {
698
+ return false;
699
+ }
700
+ if (! req_scorer->next(req_scorer)) {
701
+ /* emptied, nothing left */
702
+ SCORER_NULLIFY(rxsc->req_scorer);
703
+ return false;
704
+ }
705
+ if (excl_scorer == NULL) {
706
+ self->doc = req_scorer->doc;
707
+ /* req_scorer->next() already returned true */
708
+ return true;
709
+ }
710
+ return rxsc_to_non_excluded(self);
711
+ }
712
+
713
+ static bool rxsc_skip_to(Scorer *self, int doc_num)
714
+ {
715
+ ReqExclScorer *rxsc = RXSc(self);
716
+ Scorer *req_scorer = rxsc->req_scorer;
717
+ Scorer *excl_scorer = rxsc->excl_scorer;
718
+
719
+ if (rxsc->first_time) {
720
+ rxsc->first_time = false;
721
+ if (! excl_scorer->skip_to(excl_scorer, doc_num)) {
722
+ /* emptied */
723
+ SCORER_NULLIFY(rxsc->excl_scorer);
724
+ excl_scorer = NULL;
725
+ }
726
+ }
727
+ if (req_scorer == NULL) {
728
+ return false;
729
+ }
730
+ if (excl_scorer == NULL) {
731
+ if (req_scorer->skip_to(req_scorer, doc_num)) {
732
+ self->doc = req_scorer->doc;
733
+ return true;
734
+ }
735
+ return false;
736
+ }
737
+ if (! req_scorer->skip_to(req_scorer, doc_num)) {
738
+ SCORER_NULLIFY(rxsc->req_scorer);
739
+ return false;
740
+ }
741
+ return rxsc_to_non_excluded(self);
742
+ }
743
+
744
+ static float rxsc_score(Scorer *self)
745
+ {
746
+ Scorer *req_scorer = RXSc(self)->req_scorer;
747
+ return req_scorer->score(req_scorer);
748
+ }
749
+
750
+ static Explanation *rxsc_explain(Scorer *self, int doc_num)
751
+ {
752
+ ReqExclScorer *rxsc = RXSc(self);
753
+ Scorer *req_scorer = rxsc->req_scorer;
754
+ Scorer *excl_scorer = rxsc->excl_scorer;
755
+ Explanation *e;
756
+
757
+ if (excl_scorer->skip_to(excl_scorer, doc_num)
758
+ && excl_scorer->doc == doc_num) {
759
+ e = expl_new(0.0, "excluded:");
760
+ }
761
+ else {
762
+ e = expl_new(0.0, "not excluded:");
763
+ expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
764
+ }
765
+ return e;
766
+ }
767
+
768
+ static void rxsc_destroy(Scorer *self)
769
+ {
770
+ ReqExclScorer *rxsc = RXSc(self);
771
+ if (rxsc->req_scorer) {
772
+ rxsc->req_scorer->destroy(rxsc->req_scorer);
773
+ }
774
+ if (rxsc->excl_scorer) {
775
+ rxsc->excl_scorer->destroy(rxsc->excl_scorer);
776
+ }
777
+ scorer_destroy_i(self);
778
+ }
779
+
780
+ static Scorer *req_excl_scorer_new(Scorer *req_scorer, Scorer *excl_scorer)
781
+ {
782
+ Scorer *self = scorer_new(ReqExclScorer, NULL);
783
+ RXSc(self)->req_scorer = req_scorer;
784
+ RXSc(self)->excl_scorer = excl_scorer;
785
+ RXSc(self)->first_time = true;
786
+
787
+ self->score = &rxsc_score;
788
+ self->next = &rxsc_next;
789
+ self->skip_to = &rxsc_skip_to;
790
+ self->explain = &rxsc_explain;
791
+ self->destroy = &rxsc_destroy;
792
+
793
+ return self;
794
+ }
795
+
796
+ /***************************************************************************
797
+ * NonMatchScorer
798
+ ***************************************************************************/
799
+
800
+ static float nmsc_score(Scorer *self)
801
+ {
802
+ (void)self;
803
+ return 0.0;
804
+ }
805
+
806
+ static bool nmsc_next(Scorer *self)
807
+ {
808
+ (void)self;
809
+ return false;
810
+ }
811
+
812
+ static bool nmsc_skip_to(Scorer *self, int doc_num)
813
+ {
814
+ (void)self; (void)doc_num;
815
+ return false;
816
+ }
817
+
818
+ static Explanation *nmsc_explain(Scorer *self, int doc_num)
819
+ {
820
+ (void)self; (void)doc_num;
821
+ return expl_new(0.0, "No documents matched");
822
+ }
823
+
824
+ static Scorer *non_matching_scorer_new()
825
+ {
826
+ Scorer *self = scorer_new(Scorer, NULL);
827
+ self->score = &nmsc_score;
828
+ self->next = &nmsc_next;
829
+ self->skip_to = &nmsc_skip_to;
830
+ self->explain = &nmsc_explain;
831
+
832
+ return self;
833
+ }
834
+
835
+ /***************************************************************************
836
+ * BooleanScorer
837
+ ***************************************************************************/
838
+
839
+ #define BSc(scorer) ((BooleanScorer *)(scorer))
840
+ typedef struct BooleanScorer
841
+ {
842
+ Scorer super;
843
+ Scorer **required_scorers;
844
+ int rs_cnt;
845
+ int rs_capa;
846
+ Scorer **optional_scorers;
847
+ int os_cnt;
848
+ int os_capa;
849
+ Scorer **prohibited_scorers;
850
+ int ps_cnt;
851
+ int ps_capa;
852
+ Scorer *counting_sum_scorer;
853
+ Coordinator *coordinator;
854
+ } BooleanScorer;
855
+
856
+ static Scorer *counting_sum_scorer_create3(BooleanScorer *bsc,
857
+ Scorer *req_scorer,
858
+ Scorer *opt_scorer)
859
+ {
860
+ if (bsc->ps_cnt == 0) {
861
+ /* no prohibited */
862
+ return req_opt_sum_scorer_new(req_scorer, opt_scorer);
863
+ }
864
+ else if (bsc->ps_cnt == 1) {
865
+ /* 1 prohibited */
866
+ return req_opt_sum_scorer_new(
867
+ req_excl_scorer_new(req_scorer, bsc->prohibited_scorers[0]),
868
+ opt_scorer);
869
+ }
870
+ else {
871
+ /* more prohibited */
872
+ return req_opt_sum_scorer_new(
873
+ req_excl_scorer_new(
874
+ req_scorer,
875
+ disjunction_sum_scorer_new(bsc->prohibited_scorers,
876
+ bsc->ps_cnt, 1)),
877
+ opt_scorer);
878
+ }
879
+ }
880
+
881
+ static Scorer *counting_sum_scorer_create2(BooleanScorer *bsc,
882
+ Scorer *req_scorer,
883
+ Scorer **optional_scorers,
884
+ int os_cnt)
885
+ {
886
+ if (os_cnt == 0) {
887
+ if (bsc->ps_cnt == 0) {
888
+ return req_scorer;
889
+ }
890
+ else if (bsc->ps_cnt == 1) {
891
+ return req_excl_scorer_new(req_scorer,
892
+ bsc->prohibited_scorers[0]);
893
+ }
894
+ else {
895
+ /* no optional, more than 1 prohibited */
896
+ return req_excl_scorer_new(
897
+ req_scorer,
898
+ disjunction_sum_scorer_new(bsc->prohibited_scorers,
899
+ bsc->ps_cnt, 1));
900
+ }
901
+ }
902
+ else if (os_cnt == 1) {
903
+ return counting_sum_scorer_create3(
904
+ bsc,
905
+ req_scorer,
906
+ single_match_scorer_new(bsc->coordinator, optional_scorers[0]));
907
+ }
908
+ else {
909
+ /* more optional */
910
+ return counting_sum_scorer_create3(
911
+ bsc,
912
+ req_scorer,
913
+ counting_disjunction_sum_scorer_new(bsc->coordinator,
914
+ optional_scorers, os_cnt, 1));
915
+ }
916
+ }
917
+
918
+ static Scorer *counting_sum_scorer_create(BooleanScorer *bsc)
919
+ {
920
+ if (bsc->rs_cnt == 0) {
921
+ if (bsc->os_cnt == 0) {
922
+ int i;
923
+ /* only prohibited scorers so return non_matching scorer */
924
+ for (i = 0; i < bsc->ps_cnt; i++) {
925
+ bsc->prohibited_scorers[i]->destroy(
926
+ bsc->prohibited_scorers[i]);
927
+ }
928
+ return non_matching_scorer_new();
929
+ }
930
+ else if (bsc->os_cnt == 1) {
931
+ /* the only optional scorer is required */
932
+ return counting_sum_scorer_create2(
933
+ bsc,
934
+ single_match_scorer_new(bsc->coordinator,
935
+ bsc->optional_scorers[0]),
936
+ NULL, 0); /* no optional scorers left */
937
+ }
938
+ else {
939
+ /* more than 1 optional_scorers, no required scorers */
940
+ return counting_sum_scorer_create2(
941
+ bsc,
942
+ counting_disjunction_sum_scorer_new(bsc->coordinator,
943
+ bsc->optional_scorers,
944
+ bsc->os_cnt, 1),
945
+ NULL, 0); /* no optional scorers left */
946
+ }
947
+ }
948
+ else if (bsc->rs_cnt == 1) {
949
+ /* 1 required */
950
+ return counting_sum_scorer_create2(
951
+ bsc,
952
+ single_match_scorer_new(bsc->coordinator, bsc->required_scorers[0]),
953
+ bsc->optional_scorers, bsc->os_cnt);
954
+ }
955
+ else {
956
+ /* more required scorers */
957
+ return counting_sum_scorer_create2(
958
+ bsc,
959
+ counting_conjunction_sum_scorer_new(bsc->coordinator,
960
+ bsc->required_scorers,
961
+ bsc->rs_cnt),
962
+ bsc->optional_scorers, bsc->os_cnt);
963
+ }
964
+ }
965
+
966
+ static Scorer *bsc_init_counting_sum_scorer(BooleanScorer *bsc)
967
+ {
968
+ coord_init(bsc->coordinator);
969
+ return bsc->counting_sum_scorer = counting_sum_scorer_create(bsc);
970
+ }
971
+
972
+ static void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur)
973
+ {
974
+ BooleanScorer *bsc = BSc(self);
975
+ if (occur != BC_MUST_NOT) {
976
+ bsc->coordinator->max_coord++;
977
+ }
978
+
979
+ switch (occur) {
980
+ case BC_MUST:
981
+ RECAPA(bsc, rs_cnt, rs_capa, required_scorers, Scorer *);
982
+ bsc->required_scorers[bsc->rs_cnt++] = scorer;
983
+ break;
984
+ case BC_SHOULD:
985
+ RECAPA(bsc, os_cnt, os_capa, optional_scorers, Scorer *);
986
+ bsc->optional_scorers[bsc->os_cnt++] = scorer;
987
+ break;
988
+ case BC_MUST_NOT:
989
+ RECAPA(bsc, ps_cnt, ps_capa, prohibited_scorers, Scorer *);
990
+ bsc->prohibited_scorers[bsc->ps_cnt++] = scorer;
991
+ break;
992
+ default:
993
+ RAISE(ARG_ERROR, "Invalid value for :occur. Try :should, :must or "
994
+ ":must_not instead");
995
+ }
996
+ }
997
+
998
+ static float bsc_score(Scorer *self)
999
+ {
1000
+ BooleanScorer *bsc = BSc(self);
1001
+ Coordinator *coord = bsc->coordinator;
1002
+ float sum;
1003
+ coord->num_matches = 0;
1004
+ sum = bsc->counting_sum_scorer->score(bsc->counting_sum_scorer);
1005
+ return sum * coord->coord_factors[coord->num_matches];
1006
+ }
1007
+
1008
+ static bool bsc_next(Scorer *self)
1009
+ {
1010
+ Scorer *cnt_sum_sc = BSc(self)->counting_sum_scorer;
1011
+
1012
+ if (!cnt_sum_sc) {
1013
+ cnt_sum_sc = bsc_init_counting_sum_scorer(BSc(self));
1014
+ }
1015
+ if (cnt_sum_sc->next(cnt_sum_sc)) {
1016
+ self->doc = cnt_sum_sc->doc;
1017
+ return true;
1018
+ }
1019
+ else {
1020
+ return false;
1021
+ }
1022
+ }
1023
+
1024
+ static bool bsc_skip_to(Scorer *self, int doc_num)
1025
+ {
1026
+ Scorer *cnt_sum_sc = BSc(self)->counting_sum_scorer;
1027
+
1028
+ if (!BSc(self)->counting_sum_scorer) {
1029
+ cnt_sum_sc = bsc_init_counting_sum_scorer(BSc(self));
1030
+ }
1031
+ if (cnt_sum_sc->skip_to(cnt_sum_sc, doc_num)) {
1032
+ self->doc = cnt_sum_sc->doc;
1033
+ return true;
1034
+ }
1035
+ else {
1036
+ return false;
1037
+ }
1038
+ }
1039
+
1040
+ static void bsc_destroy(Scorer *self)
1041
+ {
1042
+ BooleanScorer *bsc = BSc(self);
1043
+ Coordinator *coord = bsc->coordinator;
1044
+
1045
+ free(coord->coord_factors);
1046
+ free(coord);
1047
+
1048
+ if (bsc->counting_sum_scorer) {
1049
+ bsc->counting_sum_scorer->destroy(bsc->counting_sum_scorer);
1050
+ }
1051
+ else {
1052
+ int i;
1053
+ for (i = 0; i < bsc->rs_cnt; i++) {
1054
+ bsc->required_scorers[i]->destroy(bsc->required_scorers[i]);
1055
+ }
1056
+
1057
+ for (i = 0; i < bsc->os_cnt; i++) {
1058
+ bsc->optional_scorers[i]->destroy(bsc->optional_scorers[i]);
1059
+ }
1060
+
1061
+ for (i = 0; i < bsc->ps_cnt; i++) {
1062
+ bsc->prohibited_scorers[i]->destroy(bsc->prohibited_scorers[i]);
1063
+ }
1064
+ }
1065
+ free(bsc->required_scorers);
1066
+ free(bsc->optional_scorers);
1067
+ free(bsc->prohibited_scorers);
1068
+ scorer_destroy_i(self);
1069
+ }
1070
+
1071
+ static Explanation *bsc_explain(Scorer *self, int doc_num)
1072
+ {
1073
+ (void)self; (void)doc_num;
1074
+ return expl_new(0.0, "This explanation is not supported");
1075
+ }
1076
+
1077
+ static Scorer *bsc_new(Similarity *similarity)
1078
+ {
1079
+ Scorer *self = scorer_new(BooleanScorer, similarity);
1080
+ BSc(self)->coordinator = coord_new(similarity);
1081
+ BSc(self)->counting_sum_scorer = NULL;
1082
+
1083
+ self->score = &bsc_score;
1084
+ self->next = &bsc_next;
1085
+ self->skip_to = &bsc_skip_to;
1086
+ self->explain = &bsc_explain;
1087
+ self->destroy = &bsc_destroy;
1088
+ return self;
1089
+ }
1090
+
1091
+ /***************************************************************************
1092
+ *
1093
+ * BooleanWeight
1094
+ *
1095
+ ***************************************************************************/
1096
+
1097
+ typedef struct BooleanWeight
1098
+ {
1099
+ Weight w;
1100
+ Weight **weights;
1101
+ int w_cnt;
1102
+ } BooleanWeight;
1103
+
1104
+
1105
+ static float bw_sum_of_squared_weights(Weight *self)
1106
+ {
1107
+ BooleanQuery *bq = BQ(self->query);
1108
+ float sum = 0.0;
1109
+ int i;
1110
+
1111
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1112
+ if (! bq->clauses[i]->is_prohibited) {
1113
+ Weight *weight = BW(self)->weights[i];
1114
+ /* sum sub-weights */
1115
+ sum += weight->sum_of_squared_weights(weight);
1116
+ }
1117
+ }
1118
+
1119
+ /* boost each sub-weight */
1120
+ sum *= self->value * self->value;
1121
+ return sum;
1122
+ }
1123
+
1124
+ static void bw_normalize(Weight *self, float normalization_factor)
1125
+ {
1126
+ BooleanQuery *bq = BQ(self->query);
1127
+ int i;
1128
+
1129
+ normalization_factor *= self->value; /* multiply by query boost */
1130
+
1131
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1132
+ if (! bq->clauses[i]->is_prohibited) {
1133
+ Weight *weight = BW(self)->weights[i];
1134
+ /* sum sub-weights */
1135
+ weight->normalize(weight, normalization_factor);
1136
+ }
1137
+ }
1138
+ }
1139
+
1140
+ static Scorer *bw_scorer(Weight *self, IndexReader *ir)
1141
+ {
1142
+ Scorer *bsc = bsc_new(self->similarity);
1143
+ BooleanQuery *bq = BQ(self->query);
1144
+ int i;
1145
+
1146
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1147
+ BooleanClause *clause = bq->clauses[i];
1148
+ Weight *weight = BW(self)->weights[i];
1149
+ Scorer *sub_scorer = weight->scorer(weight, ir);
1150
+ if (sub_scorer) {
1151
+ bsc_add_scorer(bsc, sub_scorer, clause->occur);
1152
+ }
1153
+ else if (clause->is_required) {
1154
+ bsc->destroy(bsc);
1155
+ return NULL;
1156
+ }
1157
+ }
1158
+
1159
+ return bsc;
1160
+ }
1161
+
1162
+ static char *bw_to_s(Weight *self)
1163
+ {
1164
+ return strfmt("BooleanWeight(%f)", self->value);
1165
+ }
1166
+
1167
+ static void bw_destroy(Weight *self)
1168
+ {
1169
+ int i;
1170
+
1171
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1172
+ BW(self)->weights[i]->destroy(BW(self)->weights[i]);
1173
+ }
1174
+
1175
+ free(BW(self)->weights);
1176
+ w_destroy(self);
1177
+ }
1178
+
1179
+ static Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
1180
+ {
1181
+ BooleanQuery *bq = BQ(self->query);
1182
+ Explanation *sum_expl = expl_new(0.0, "sum of:");
1183
+ Explanation *explanation;
1184
+ int coord = 0;
1185
+ int max_coord = 0;
1186
+ float coord_factor = 0.0;
1187
+ float sum = 0.0;
1188
+ int i;
1189
+
1190
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1191
+ Weight *weight = BW(self)->weights[i];
1192
+ BooleanClause *clause = bq->clauses[i];
1193
+ explanation = weight->explain(weight, ir, doc_num);
1194
+ if (!clause->is_prohibited) {
1195
+ max_coord++;
1196
+ }
1197
+ if (explanation->value > 0.0) {
1198
+ if (!clause->is_prohibited) {
1199
+ expl_add_detail(sum_expl, explanation);
1200
+ sum += explanation->value;
1201
+ coord++;
1202
+ }
1203
+ else {
1204
+ expl_destroy(explanation);
1205
+ expl_destroy(sum_expl);
1206
+ return expl_new(0.0, "match prohibited");
1207
+ }
1208
+ }
1209
+ else if (clause->is_required) {
1210
+ expl_destroy(explanation);
1211
+ expl_destroy(sum_expl);
1212
+ return expl_new(0.0, "match required");
1213
+ }
1214
+ else {
1215
+ expl_destroy(explanation);
1216
+ }
1217
+ }
1218
+ sum_expl->value = sum;
1219
+
1220
+ if (coord == 1) { /* only one clause matched */
1221
+ explanation = sum_expl; /* eliminate wrapper */
1222
+ ary_size(sum_expl->details) = 0;
1223
+ sum_expl = sum_expl->details[0];
1224
+ expl_destroy(explanation);
1225
+ }
1226
+
1227
+ coord_factor = sim_coord(self->similarity, coord, max_coord);
1228
+
1229
+ if (coord_factor == 1.0) { /* coord is no-op */
1230
+ return sum_expl; /* eliminate wrapper */
1231
+ }
1232
+ else {
1233
+ explanation = expl_new(sum * coord_factor, "product of:");
1234
+ expl_add_detail(explanation, sum_expl);
1235
+ expl_add_detail(explanation, expl_new(coord_factor, "coord(%d/%d)",
1236
+ coord, max_coord));
1237
+ return explanation;
1238
+ }
1239
+ }
1240
+
1241
+ static Weight *bw_new(Query *query, Searcher *searcher)
1242
+ {
1243
+ int i;
1244
+ Weight *self = w_new(BooleanWeight, query);
1245
+
1246
+ BW(self)->w_cnt = BQ(query)->clause_cnt;
1247
+ BW(self)->weights = ALLOC_N(Weight *, BW(self)->w_cnt);
1248
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1249
+ BW(self)->weights[i] = q_weight(BQ(query)->clauses[i]->query, searcher);
1250
+ }
1251
+
1252
+ self->normalize = &bw_normalize;
1253
+ self->scorer = &bw_scorer;
1254
+ self->explain = &bw_explain;
1255
+ self->to_s = &bw_to_s;
1256
+ self->destroy = &bw_destroy;
1257
+ self->sum_of_squared_weights = &bw_sum_of_squared_weights;
1258
+
1259
+ self->similarity = query->get_similarity(query, searcher);
1260
+ self->value = query->boost;
1261
+
1262
+ return self;
1263
+ }
1264
+
1265
+ /***************************************************************************
1266
+ *
1267
+ * BooleanClause
1268
+ *
1269
+ ***************************************************************************/
1270
+
1271
+ void bc_set_occur(BooleanClause *self, BCType occur)
1272
+ {
1273
+ self->occur = occur;
1274
+ switch (occur) {
1275
+ case BC_SHOULD:
1276
+ self->is_prohibited = false;
1277
+ self->is_required = false;
1278
+ break;
1279
+ case BC_MUST:
1280
+ self->is_prohibited = false;
1281
+ self->is_required = true;
1282
+ break;
1283
+ case BC_MUST_NOT:
1284
+ self->is_prohibited = true;
1285
+ self->is_required = false;
1286
+ break;
1287
+ default:
1288
+ RAISE(ARG_ERROR, "Invalid value for :occur. Try :occur => :should, "
1289
+ ":must or :must_not instead");
1290
+ }
1291
+ }
1292
+
1293
+ void bc_deref(BooleanClause *self)
1294
+ {
1295
+ if (--self->ref_cnt <= 0) {
1296
+ q_deref(self->query);
1297
+ free(self);
1298
+ }
1299
+ }
1300
+
1301
+ static unsigned long bc_hash(BooleanClause *self)
1302
+ {
1303
+ return ((q_hash(self->query) << 2) | self->occur);
1304
+ }
1305
+
1306
+ static int bc_eq(BooleanClause *self, BooleanClause *o)
1307
+ {
1308
+ return ((self->occur == o->occur) && q_eq(self->query, o->query));
1309
+ }
1310
+
1311
+ BooleanClause *bc_new(Query *query, BCType occur)
1312
+ {
1313
+ BooleanClause *self = ALLOC(BooleanClause);
1314
+ self->ref_cnt = 1;
1315
+ self->query = query;
1316
+ bc_set_occur(self, occur);
1317
+ return self;
1318
+ }
1319
+
1320
+ /***************************************************************************
1321
+ *
1322
+ * BooleanQuery
1323
+ *
1324
+ ***************************************************************************/
1325
+
1326
+ static MatchVector *bq_get_matchv_i(Query *self, MatchVector *mv,
1327
+ TermVector *tv)
1328
+ {
1329
+ int i;
1330
+ for (i = BQ(self)->clause_cnt - 1; i >= 0; i--) {
1331
+ if (BQ(self)->clauses[i]->occur != BC_MUST_NOT) {
1332
+ Query *q = BQ(self)->clauses[i]->query;
1333
+ q->get_matchv_i(q, mv, tv);
1334
+ }
1335
+ }
1336
+ return mv;
1337
+ }
1338
+
1339
+ static Query *bq_rewrite(Query *self, IndexReader *ir)
1340
+ {
1341
+ int i;
1342
+ const int clause_cnt = BQ(self)->clause_cnt;
1343
+ bool rewritten = false;
1344
+ bool has_non_prohibited_clause = false;
1345
+
1346
+ if (clause_cnt == 1) {
1347
+ /* optimize 1-clause queries */
1348
+ BooleanClause *clause = BQ(self)->clauses[0];
1349
+ if (! clause->is_prohibited) {
1350
+ /* just return clause. Re-write first. */
1351
+ Query *q = clause->query->rewrite(clause->query, ir);
1352
+
1353
+ if (self->boost != 1.0) {
1354
+ /* original_boost is initialized to 0.0. If it has been set to
1355
+ * something else it means this query has already been boosted
1356
+ * before so boost from the original value */
1357
+ if ((q == clause->query) && BQ(self)->original_boost) {
1358
+ /* rewrite was no-op */
1359
+ q->boost = BQ(self)->original_boost * self->boost;
1360
+ }
1361
+ else {
1362
+ /* save original boost in case query is rewritten again */
1363
+ BQ(self)->original_boost = q->boost;
1364
+ q->boost *= self->boost;
1365
+ }
1366
+ }
1367
+
1368
+ return q;
1369
+ }
1370
+ }
1371
+
1372
+ self->ref_cnt++;
1373
+ /* replace each clause's query with its rewritten query */
1374
+ for (i = 0; i < clause_cnt; i++) {
1375
+ BooleanClause *clause = BQ(self)->clauses[i];
1376
+ Query *rq = clause->query->rewrite(clause->query, ir);
1377
+ /* check for at least one non-prohibited clause */
1378
+ if (clause->is_prohibited == false) has_non_prohibited_clause = true;
1379
+ if (rq != clause->query) {
1380
+ if (!rewritten) {
1381
+ int j;
1382
+ Query *new_self = q_new(BooleanQuery);
1383
+ memcpy(new_self, self, sizeof(BooleanQuery));
1384
+ BQ(new_self)->clauses = ALLOC_N(BooleanClause *,
1385
+ BQ(self)->clause_capa);
1386
+ memcpy(BQ(new_self)->clauses, BQ(self)->clauses,
1387
+ BQ(self)->clause_capa * sizeof(BooleanClause *));
1388
+ for (j = 0; j < clause_cnt; j++) {
1389
+ REF(BQ(self)->clauses[j]);
1390
+ }
1391
+ self->ref_cnt--;
1392
+ self = new_self;
1393
+ self->ref_cnt = 1;
1394
+ rewritten = true;
1395
+ }
1396
+ DEREF(clause);
1397
+ BQ(self)->clauses[i] = bc_new(rq, clause->occur);
1398
+ } else {
1399
+ DEREF(rq);
1400
+ }
1401
+ }
1402
+ if (clause_cnt > 0 && !has_non_prohibited_clause) {
1403
+ bq_add_query_nr(self, maq_new(), BC_MUST);
1404
+ }
1405
+
1406
+ return self;
1407
+ }
1408
+
1409
+ static void bq_extract_terms(Query *self, HashSet *terms)
1410
+ {
1411
+ int i;
1412
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1413
+ BooleanClause *clause = BQ(self)->clauses[i];
1414
+ clause->query->extract_terms(clause->query, terms);
1415
+ }
1416
+ }
1417
+
1418
+ static char *bq_to_s(Query *self, Symbol field)
1419
+ {
1420
+ int i;
1421
+ BooleanClause *clause;
1422
+ Query *sub_query;
1423
+ char *buffer;
1424
+ char *clause_str;
1425
+ int bp = 0;
1426
+ int size = QUERY_STRING_START_SIZE;
1427
+ int needed;
1428
+ int clause_len;
1429
+
1430
+ buffer = ALLOC_N(char, size);
1431
+ if (self->boost != 1.0) {
1432
+ buffer[0] = '(';
1433
+ bp++;
1434
+ }
1435
+
1436
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1437
+ clause = BQ(self)->clauses[i];
1438
+ clause_str = clause->query->to_s(clause->query, field);
1439
+ clause_len = (int)strlen(clause_str);
1440
+ needed = clause_len + 5;
1441
+ while ((size - bp) < needed) {
1442
+ size *= 2;
1443
+ REALLOC_N(buffer, char, size);
1444
+ }
1445
+
1446
+ if (i > 0) {
1447
+ buffer[bp++] = ' ';
1448
+ }
1449
+ if (clause->is_prohibited) {
1450
+ buffer[bp++] = '-';
1451
+ }
1452
+ else if (clause->is_required) {
1453
+ buffer[bp++] = '+';
1454
+ }
1455
+
1456
+ sub_query = clause->query;
1457
+ if (sub_query->type == BOOLEAN_QUERY) {
1458
+ /* wrap sub-bools in parens */
1459
+ buffer[bp++] = '(';
1460
+ memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
1461
+ bp += clause_len;
1462
+ buffer[bp++] = ')';
1463
+ }
1464
+ else {
1465
+ memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
1466
+ bp += clause_len;
1467
+ }
1468
+ free(clause_str);
1469
+ }
1470
+
1471
+ if (self->boost != 1.0) {
1472
+ char *boost_str = strfmt(")^%f", self->boost);
1473
+ int boost_len = (int)strlen(boost_str);
1474
+ REALLOC_N(buffer, char, bp + boost_len + 1);
1475
+ memcpy(buffer + bp, boost_str, sizeof(char) * boost_len);
1476
+ bp += boost_len;
1477
+ free(boost_str);
1478
+ }
1479
+ buffer[bp] = 0;
1480
+ return buffer;
1481
+ }
1482
+
1483
+ static void bq_destroy(Query *self)
1484
+ {
1485
+ int i;
1486
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1487
+ bc_deref(BQ(self)->clauses[i]);
1488
+ }
1489
+ free(BQ(self)->clauses);
1490
+ if (BQ(self)->similarity) {
1491
+ BQ(self)->similarity->destroy(BQ(self)->similarity);
1492
+ }
1493
+ q_destroy_i(self);
1494
+ }
1495
+
1496
+ static float bq_coord_disabled(Similarity *sim, int overlap, int max_overlap)
1497
+ {
1498
+ (void)sim; (void)overlap; (void)max_overlap;
1499
+ return 1.0;
1500
+ }
1501
+
1502
+ static Similarity *bq_get_similarity(Query *self, Searcher *searcher)
1503
+ {
1504
+ if (!BQ(self)->similarity) {
1505
+ Similarity *sim = q_get_similarity_i(self, searcher);
1506
+ BQ(self)->similarity = ALLOC(Similarity);
1507
+ memcpy(BQ(self)->similarity, sim, sizeof(Similarity));
1508
+ BQ(self)->similarity->coord = &bq_coord_disabled;
1509
+ BQ(self)->similarity->destroy = (void (*)(Similarity *))&free;
1510
+ }
1511
+
1512
+ return BQ(self)->similarity;
1513
+ }
1514
+
1515
+ static unsigned long bq_hash(Query *self)
1516
+ {
1517
+ int i;
1518
+ unsigned long hash = 0;
1519
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1520
+ hash ^= bc_hash(BQ(self)->clauses[i]);
1521
+ }
1522
+ return (hash << 1) | BQ(self)->coord_disabled;
1523
+ }
1524
+
1525
+ static int bq_eq(Query *self, Query *o)
1526
+ {
1527
+ int i;
1528
+ BooleanQuery *bq1 = BQ(self);
1529
+ BooleanQuery *bq2 = BQ(o);
1530
+ if ((bq1->coord_disabled != bq2->coord_disabled)
1531
+ || (bq1->max_clause_cnt != bq1->max_clause_cnt)
1532
+ || (bq1->clause_cnt != bq2->clause_cnt)) {
1533
+ return false;
1534
+ }
1535
+
1536
+ for (i = 0; i < bq1->clause_cnt; i++) {
1537
+ if (!bc_eq(bq1->clauses[i], bq2->clauses[i])) {
1538
+ return false;
1539
+ }
1540
+ }
1541
+ return true;
1542
+ }
1543
+
1544
+ Query *bq_new(bool coord_disabled)
1545
+ {
1546
+ Query *self = q_new(BooleanQuery);
1547
+ BQ(self)->coord_disabled = coord_disabled;
1548
+ if (coord_disabled) {
1549
+ self->get_similarity = &bq_get_similarity;
1550
+ }
1551
+ BQ(self)->max_clause_cnt = DEFAULT_MAX_CLAUSE_COUNT;
1552
+ BQ(self)->clause_cnt = 0;
1553
+ BQ(self)->clause_capa = BOOLEAN_CLAUSES_START_CAPA;
1554
+ BQ(self)->clauses = ALLOC_N(BooleanClause *, BOOLEAN_CLAUSES_START_CAPA);
1555
+ BQ(self)->similarity = NULL;
1556
+ BQ(self)->original_boost = 0.0;
1557
+
1558
+ self->type = BOOLEAN_QUERY;
1559
+ self->rewrite = &bq_rewrite;
1560
+ self->extract_terms = &bq_extract_terms;
1561
+ self->to_s = &bq_to_s;
1562
+ self->hash = &bq_hash;
1563
+ self->eq = &bq_eq;
1564
+ self->destroy_i = &bq_destroy;
1565
+ self->create_weight_i = &bw_new;
1566
+ self->get_matchv_i = &bq_get_matchv_i;
1567
+
1568
+ return self;
1569
+ }
1570
+
1571
+ Query *bq_new_max(bool coord_disabled, int max)
1572
+ {
1573
+ Query *q = bq_new(coord_disabled);
1574
+ BQ(q)->max_clause_cnt = max;
1575
+ return q;
1576
+ }
1577
+
1578
+ BooleanClause *bq_add_clause_nr(Query *self, BooleanClause *bc)
1579
+ {
1580
+ if (BQ(self)->clause_cnt >= BQ(self)->max_clause_cnt) {
1581
+ RAISE(STATE_ERROR, "Two many clauses. The max clause limit is set to "
1582
+ "<%d> but your query has <%d> clauses. You can try increasing "
1583
+ ":max_clause_count for the BooleanQuery or using a different "
1584
+ "type of query.", BQ(self)->clause_cnt, BQ(self)->max_clause_cnt);
1585
+ }
1586
+ if (BQ(self)->clause_cnt >= BQ(self)->clause_capa) {
1587
+ BQ(self)->clause_capa *= 2;
1588
+ REALLOC_N(BQ(self)->clauses, BooleanClause *, BQ(self)->clause_capa);
1589
+ }
1590
+ BQ(self)->clauses[BQ(self)->clause_cnt] = bc;
1591
+ BQ(self)->clause_cnt++;
1592
+ return bc;
1593
+ }
1594
+
1595
+ BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
1596
+ {
1597
+ REF(bc);
1598
+ return bq_add_clause_nr(self, bc);
1599
+ }
1600
+
1601
+ BooleanClause *bq_add_query_nr(Query *self, Query *sub_query, BCType occur)
1602
+ {
1603
+ BooleanClause *bc;
1604
+ if (BQ(self)->clause_cnt >= BQ(self)->max_clause_cnt) {
1605
+ RAISE(STATE_ERROR, "Two many clauses. The max clause limit is set to "
1606
+ "<%d> but your query has <%d> clauses. You can try increasing "
1607
+ ":max_clause_count for the BooleanQuery or using a different "
1608
+ "type of query.", BQ(self)->clause_cnt, BQ(self)->max_clause_cnt);
1609
+ }
1610
+ bc = bc_new(sub_query, occur);
1611
+ bq_add_clause(self, bc);
1612
+ bc_deref(bc); /* bc was referenced unnecessarily */
1613
+ return bc;
1614
+ }
1615
+
1616
+ BooleanClause *bq_add_query(Query *self, Query *sub_query, BCType occur)
1617
+ {
1618
+ REF(sub_query);
1619
+ return bq_add_query_nr(self, sub_query, occur);
1620
+ }
1621
+