sdsykes-ferret 0.11.6.19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (195) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/analysis.c +1555 -0
  9. data/ext/analysis.h +219 -0
  10. data/ext/api.c +69 -0
  11. data/ext/api.h +27 -0
  12. data/ext/array.c +123 -0
  13. data/ext/array.h +53 -0
  14. data/ext/bitvector.c +540 -0
  15. data/ext/bitvector.h +272 -0
  16. data/ext/compound_io.c +383 -0
  17. data/ext/config.h +42 -0
  18. data/ext/document.c +156 -0
  19. data/ext/document.h +53 -0
  20. data/ext/except.c +120 -0
  21. data/ext/except.h +168 -0
  22. data/ext/extconf.rb +14 -0
  23. data/ext/ferret.c +402 -0
  24. data/ext/ferret.h +91 -0
  25. data/ext/filter.c +156 -0
  26. data/ext/fs_store.c +483 -0
  27. data/ext/global.c +418 -0
  28. data/ext/global.h +117 -0
  29. data/ext/hash.c +567 -0
  30. data/ext/hash.h +473 -0
  31. data/ext/hashset.c +170 -0
  32. data/ext/hashset.h +187 -0
  33. data/ext/header.h +58 -0
  34. data/ext/helper.c +62 -0
  35. data/ext/helper.h +13 -0
  36. data/ext/inc/lang.h +48 -0
  37. data/ext/inc/threading.h +31 -0
  38. data/ext/index.c +6425 -0
  39. data/ext/index.h +961 -0
  40. data/ext/lang.h +66 -0
  41. data/ext/libstemmer.c +92 -0
  42. data/ext/libstemmer.h +79 -0
  43. data/ext/mempool.c +87 -0
  44. data/ext/mempool.h +35 -0
  45. data/ext/modules.h +162 -0
  46. data/ext/multimapper.c +310 -0
  47. data/ext/multimapper.h +51 -0
  48. data/ext/posh.c +1006 -0
  49. data/ext/posh.h +1007 -0
  50. data/ext/priorityqueue.c +151 -0
  51. data/ext/priorityqueue.h +143 -0
  52. data/ext/q_boolean.c +1608 -0
  53. data/ext/q_const_score.c +161 -0
  54. data/ext/q_filtered_query.c +209 -0
  55. data/ext/q_fuzzy.c +268 -0
  56. data/ext/q_match_all.c +148 -0
  57. data/ext/q_multi_term.c +677 -0
  58. data/ext/q_parser.c +2825 -0
  59. data/ext/q_phrase.c +1126 -0
  60. data/ext/q_prefix.c +100 -0
  61. data/ext/q_range.c +350 -0
  62. data/ext/q_span.c +2402 -0
  63. data/ext/q_term.c +337 -0
  64. data/ext/q_wildcard.c +171 -0
  65. data/ext/r_analysis.c +2575 -0
  66. data/ext/r_index.c +3472 -0
  67. data/ext/r_qparser.c +585 -0
  68. data/ext/r_search.c +4105 -0
  69. data/ext/r_store.c +513 -0
  70. data/ext/r_utils.c +963 -0
  71. data/ext/ram_store.c +471 -0
  72. data/ext/search.c +1741 -0
  73. data/ext/search.h +885 -0
  74. data/ext/similarity.c +150 -0
  75. data/ext/similarity.h +82 -0
  76. data/ext/sort.c +983 -0
  77. data/ext/stem_ISO_8859_1_danish.c +338 -0
  78. data/ext/stem_ISO_8859_1_danish.h +16 -0
  79. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  80. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  81. data/ext/stem_ISO_8859_1_english.c +1156 -0
  82. data/ext/stem_ISO_8859_1_english.h +16 -0
  83. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  84. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  85. data/ext/stem_ISO_8859_1_french.c +1276 -0
  86. data/ext/stem_ISO_8859_1_french.h +16 -0
  87. data/ext/stem_ISO_8859_1_german.c +512 -0
  88. data/ext/stem_ISO_8859_1_german.h +16 -0
  89. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  90. data/ext/stem_ISO_8859_1_italian.h +16 -0
  91. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  92. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  93. data/ext/stem_ISO_8859_1_porter.c +776 -0
  94. data/ext/stem_ISO_8859_1_porter.h +16 -0
  95. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  96. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  97. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  98. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  99. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  100. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  101. data/ext/stem_KOI8_R_russian.c +701 -0
  102. data/ext/stem_KOI8_R_russian.h +16 -0
  103. data/ext/stem_UTF_8_danish.c +344 -0
  104. data/ext/stem_UTF_8_danish.h +16 -0
  105. data/ext/stem_UTF_8_dutch.c +653 -0
  106. data/ext/stem_UTF_8_dutch.h +16 -0
  107. data/ext/stem_UTF_8_english.c +1176 -0
  108. data/ext/stem_UTF_8_english.h +16 -0
  109. data/ext/stem_UTF_8_finnish.c +808 -0
  110. data/ext/stem_UTF_8_finnish.h +16 -0
  111. data/ext/stem_UTF_8_french.c +1296 -0
  112. data/ext/stem_UTF_8_french.h +16 -0
  113. data/ext/stem_UTF_8_german.c +526 -0
  114. data/ext/stem_UTF_8_german.h +16 -0
  115. data/ext/stem_UTF_8_italian.c +1113 -0
  116. data/ext/stem_UTF_8_italian.h +16 -0
  117. data/ext/stem_UTF_8_norwegian.c +302 -0
  118. data/ext/stem_UTF_8_norwegian.h +16 -0
  119. data/ext/stem_UTF_8_porter.c +794 -0
  120. data/ext/stem_UTF_8_porter.h +16 -0
  121. data/ext/stem_UTF_8_portuguese.c +1055 -0
  122. data/ext/stem_UTF_8_portuguese.h +16 -0
  123. data/ext/stem_UTF_8_russian.c +709 -0
  124. data/ext/stem_UTF_8_russian.h +16 -0
  125. data/ext/stem_UTF_8_spanish.c +1137 -0
  126. data/ext/stem_UTF_8_spanish.h +16 -0
  127. data/ext/stem_UTF_8_swedish.c +313 -0
  128. data/ext/stem_UTF_8_swedish.h +16 -0
  129. data/ext/stopwords.c +401 -0
  130. data/ext/store.c +692 -0
  131. data/ext/store.h +777 -0
  132. data/ext/term_vectors.c +352 -0
  133. data/ext/threading.h +31 -0
  134. data/ext/utilities.c +446 -0
  135. data/ext/win32.h +54 -0
  136. data/lib/ferret.rb +29 -0
  137. data/lib/ferret/browser.rb +246 -0
  138. data/lib/ferret/browser/s/global.js +192 -0
  139. data/lib/ferret/browser/s/style.css +148 -0
  140. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  141. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  142. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  143. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  144. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  145. data/lib/ferret/browser/views/layout.rhtml +22 -0
  146. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  147. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  148. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  149. data/lib/ferret/browser/webrick.rb +14 -0
  150. data/lib/ferret/document.rb +130 -0
  151. data/lib/ferret/field_infos.rb +44 -0
  152. data/lib/ferret/index.rb +786 -0
  153. data/lib/ferret/number_tools.rb +157 -0
  154. data/lib/ferret_version.rb +3 -0
  155. data/setup.rb +1555 -0
  156. data/test/test_all.rb +5 -0
  157. data/test/test_helper.rb +24 -0
  158. data/test/threading/number_to_spoken.rb +132 -0
  159. data/test/threading/thread_safety_index_test.rb +79 -0
  160. data/test/threading/thread_safety_read_write_test.rb +76 -0
  161. data/test/threading/thread_safety_test.rb +133 -0
  162. data/test/unit/analysis/tc_analyzer.rb +548 -0
  163. data/test/unit/analysis/tc_token_stream.rb +646 -0
  164. data/test/unit/index/tc_index.rb +762 -0
  165. data/test/unit/index/tc_index_reader.rb +699 -0
  166. data/test/unit/index/tc_index_writer.rb +437 -0
  167. data/test/unit/index/th_doc.rb +315 -0
  168. data/test/unit/largefile/tc_largefile.rb +46 -0
  169. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  170. data/test/unit/search/tc_filter.rb +135 -0
  171. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  172. data/test/unit/search/tc_index_searcher.rb +61 -0
  173. data/test/unit/search/tc_multi_searcher.rb +128 -0
  174. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  175. data/test/unit/search/tc_search_and_sort.rb +179 -0
  176. data/test/unit/search/tc_sort.rb +49 -0
  177. data/test/unit/search/tc_sort_field.rb +27 -0
  178. data/test/unit/search/tc_spans.rb +190 -0
  179. data/test/unit/search/tm_searcher.rb +384 -0
  180. data/test/unit/store/tc_fs_store.rb +77 -0
  181. data/test/unit/store/tc_ram_store.rb +35 -0
  182. data/test/unit/store/tm_store.rb +34 -0
  183. data/test/unit/store/tm_store_lock.rb +68 -0
  184. data/test/unit/tc_document.rb +81 -0
  185. data/test/unit/ts_analysis.rb +2 -0
  186. data/test/unit/ts_index.rb +2 -0
  187. data/test/unit/ts_largefile.rb +4 -0
  188. data/test/unit/ts_query_parser.rb +2 -0
  189. data/test/unit/ts_search.rb +2 -0
  190. data/test/unit/ts_store.rb +2 -0
  191. data/test/unit/ts_utils.rb +2 -0
  192. data/test/unit/utils/tc_bit_vector.rb +295 -0
  193. data/test/unit/utils/tc_number_tools.rb +117 -0
  194. data/test/unit/utils/tc_priority_queue.rb +106 -0
  195. metadata +285 -0
@@ -0,0 +1,151 @@
1
+ #include <string.h>
2
+ #include "priorityqueue.h"
3
+
4
+ #define START_CAPA 127
5
+
6
+ PriorityQueue *pq_new(int capa,
7
+ bool (*less_than)(const void *p1, const void *p2),
8
+ void (*free_elem)(void *elem))
9
+ {
10
+ PriorityQueue *pq = ALLOC(PriorityQueue);
11
+ pq->size = 0;
12
+ pq->capa = capa;
13
+ pq->mem_capa = (START_CAPA > capa ? capa : START_CAPA) + 1;
14
+ pq->heap = ALLOC_N(void *, pq->mem_capa);
15
+ pq->less_than_i = less_than;
16
+
17
+ /* need to set this yourself if you want to change it */
18
+ pq->free_elem_i = free_elem ? free_elem : &dummy_free;
19
+ return pq;
20
+ }
21
+
22
+ PriorityQueue *pq_clone(PriorityQueue *pq)
23
+ {
24
+ PriorityQueue *new_pq = ALLOC(PriorityQueue);
25
+ memcpy(new_pq, pq, sizeof(PriorityQueue));
26
+ new_pq->heap = ALLOC_N(void *, new_pq->mem_capa);
27
+ memcpy(new_pq->heap, pq->heap, sizeof(void *) * (new_pq->size + 1));
28
+
29
+ return new_pq;
30
+ }
31
+
32
+ void pq_clear(PriorityQueue *pq)
33
+ {
34
+ int i;
35
+ for (i = 1; i <= pq->size; i++) {
36
+ pq->free_elem_i(pq->heap[i]);
37
+ pq->heap[i] = NULL;
38
+ }
39
+ pq->size = 0;
40
+ }
41
+
42
+ void pq_free(PriorityQueue *pq)
43
+ {
44
+ free(pq->heap);
45
+ free(pq);
46
+ }
47
+
48
+ void pq_destroy(PriorityQueue *pq)
49
+ {
50
+ pq_clear(pq);
51
+ pq_free(pq);
52
+ }
53
+
54
+ /**
55
+ * This method is used internally by pq_push. It is similar to pq_down except
56
+ * that where pq_down reorders the elements from the top, pq_up reorders from
57
+ * the bottom.
58
+ *
59
+ * @param pq the PriorityQueue to reorder
60
+ */
61
+ static void pq_up(PriorityQueue *pq)
62
+ {
63
+ void **heap = pq->heap;
64
+ void *node;
65
+ int i = pq->size;
66
+ int j = i >> 1;
67
+
68
+ node = heap[i];
69
+
70
+ while ((j > 0) && pq->less_than_i(node, heap[j])) {
71
+ heap[i] = heap[j];
72
+ i = j;
73
+ j = j >> 1;
74
+ }
75
+ heap[i] = node;
76
+ }
77
+
78
+ void pq_down(PriorityQueue *pq)
79
+ {
80
+ register int i = 1;
81
+ register int j = 2; /* i << 1; */
82
+ register int k = 3; /* j + 1; */
83
+ register int size = pq->size;
84
+ void **heap = pq->heap;
85
+ void *node = heap[i]; /* save top node */
86
+
87
+ if ((k <= size) && (pq->less_than_i(heap[k], heap[j]))) {
88
+ j = k;
89
+ }
90
+
91
+ while ((j <= size) && pq->less_than_i(heap[j], node)) {
92
+ heap[i] = heap[j]; /* shift up child */
93
+ i = j;
94
+ j = i << 1;
95
+ k = j + 1;
96
+ if ((k <= size) && pq->less_than_i(heap[k], heap[j])) {
97
+ j = k;
98
+ }
99
+ }
100
+ heap[i] = node;
101
+ }
102
+
103
+ void pq_push(PriorityQueue *pq, void *elem)
104
+ {
105
+ pq->size++;
106
+ if (pq->size >= pq->mem_capa) {
107
+ pq->mem_capa <<= 1;
108
+ REALLOC_N(pq->heap, void *, pq->mem_capa);
109
+ }
110
+ pq->heap[pq->size] = elem;
111
+ pq_up(pq);
112
+ }
113
+
114
+ int pq_insert(PriorityQueue *pq, void *elem)
115
+ {
116
+ if (pq->size < pq->capa) {
117
+ pq_push(pq, elem);
118
+ return PQ_ADDED;
119
+ }
120
+ else if (pq->size > 0 && pq->less_than_i(pq->heap[1], elem)) {
121
+ pq->free_elem_i(pq->heap[1]);
122
+ pq->heap[1] = elem;
123
+ pq_down(pq);
124
+ return PQ_INSERTED;
125
+ }
126
+ else {
127
+ pq->free_elem_i(elem);
128
+ return PQ_DROPPED;
129
+ }
130
+ }
131
+
132
+ void *pq_top(PriorityQueue *pq)
133
+ {
134
+ return pq->size ? pq->heap[1] : NULL;
135
+ }
136
+
137
+ void *pq_pop(PriorityQueue *pq)
138
+ {
139
+ if (pq->size > 0) {
140
+ void *result = pq->heap[1]; /* save first value */
141
+ pq->heap[1] = pq->heap[pq->size]; /* move last to first */
142
+ pq->heap[pq->size] = NULL;
143
+ pq->size--;
144
+ pq_down(pq); /* adjust heap */
145
+ return result;
146
+ }
147
+ else {
148
+ return NULL;
149
+ }
150
+ }
151
+
@@ -0,0 +1,143 @@
1
+ #ifndef FRT_PRIORITYQUEUE_H
2
+ #define FRT_PRIORITYQUEUE_H
3
+
4
+ #include "global.h"
5
+
6
+ typedef bool(*lt_ft) (const void *p1, const void *p2);
7
+
8
+ /**
9
+ * A PriorityQueue has a fixed size and contains a less_than function and a
10
+ * free_elem function specific to the data type to be stored in the queue.
11
+ */
12
+ typedef struct PriorityQueue
13
+ {
14
+ int size;
15
+ int capa;
16
+ int mem_capa;
17
+ void **heap;
18
+ lt_ft less_than_i;
19
+ free_ft free_elem_i;
20
+ } PriorityQueue;
21
+
22
+ /**
23
+ * Create a new PriorityQueue setting the less_than and free_elem for this
24
+ * specific PriorityQueue.
25
+ *
26
+ * @param capa the capacity of the PriorityQueue. As more than the capacity is
27
+ * added to the queue the least valued elements drop off the bottom.
28
+ * @param less_than the function to determine whether one value is less than
29
+ * another for this particular PriorityQueue
30
+ * @param free_elem the function to free the elements in the PriorityQueue
31
+ * when it is destroyed or there is insertion overflow
32
+ * @return a newly allocated PriorityQueue
33
+ */
34
+ extern PriorityQueue *pq_new(int capa,
35
+ bool (*less_than)(const void *p1, const void *p2),
36
+ void (*free_elem)(void *elem));
37
+
38
+ /**
39
+ * Allocate a clone of the PriorityQueue. This can be used if you want to scan
40
+ * through all elements of the PriorityQueue but you don't want to have to
41
+ * remove the all and add them all again.
42
+ *
43
+ * @param pq the priority queue to clone
44
+ * @return a clone of the original priority queue
45
+ */
46
+ extern PriorityQueue *pq_clone(PriorityQueue *pq);
47
+
48
+ /**
49
+ * Clear all elements from the PriorityQueue and reset the size to 0. When
50
+ * the elements are removed from the PriorityQueue, free_elem is used to free
51
+ * them, unless it was set to NULL in which case nothing will happen to them.
52
+ *
53
+ * @param self the PriorityQueue to clear
54
+ */
55
+ extern void pq_clear(PriorityQueue *self);
56
+
57
+ /**
58
+ * Free the memory allocated to the PriorityQueue. This function does nothing
59
+ * to the elements in the PriorityQueue itself. To destroy them also, use
60
+ * pq_destroy.
61
+ *
62
+ * @param self the PriorityQueue to free
63
+ */
64
+ extern void pq_free(PriorityQueue *self);
65
+
66
+ /**
67
+ * Destroy the PriorityQueue, freeing all memory allocated to it and also
68
+ * destroying all the elements contained by it. This method is equivalent to
69
+ * calling pq_clear followed by pq_free.
70
+ *
71
+ * @param the PriorityQueue to destroy
72
+ */
73
+ extern void pq_destroy(PriorityQueue *self);
74
+
75
+ /**
76
+ * Reorder the PriorityQueue after the top element has been modified. This
77
+ * method is used especially when the PriorityQueue contains a queue of
78
+ * iterators. When the top iterator is incremented you should call this
79
+ * method.
80
+ *
81
+ * @param self the PriorityQueue to reorder
82
+ */
83
+ extern void pq_down(PriorityQueue *self);
84
+
85
+ /**
86
+ * Add another element to the PriorityQueue. This method should only be used
87
+ * when the PriorityQueue has enough space allocated to hold all elements
88
+ * added. If there is a chance that you will add more than the amount you have
89
+ * allocated then you should use pq_insert. pq_insert will handle insertion
90
+ * overflow.
91
+ *
92
+ * @param self the PriorityQueue to add the element to
93
+ * @param elem the element to add to the PriorityQueue
94
+ */
95
+ extern void pq_push(PriorityQueue *self, void *elem);
96
+
97
+ #define PQ_DROPPED 0
98
+ #define PQ_ADDED 1
99
+ #define PQ_INSERTED 2
100
+ /**
101
+ * Add another element to the PriorityQueue. Unlike pq_push, this method
102
+ * handles insertion overflow. That is, when you insert more elements than the
103
+ * capacity of the PriorityQueue, the elements are dropped off the bottom and
104
+ * freed using the free_elem function.
105
+ *
106
+ * @param self the PriorityQueue to add the element to
107
+ * @param elem the element to add to the PriorityQueue
108
+ * @returns one of three values;
109
+ * <pre>
110
+ * 0 == PQ_DROPPED the element was too small (according to the less_than
111
+ * function) so it was destroyed
112
+ * 1 == PQ_ADDED the element was successfully added
113
+ * 2 == PQ_INSERTED the element was successfully added after another
114
+ * element was dropped and destroyed
115
+ * </pre>
116
+ */
117
+ extern int pq_insert(PriorityQueue *self, void *elem);
118
+
119
+ /**
120
+ * Get the top element in the PriorityQueue.
121
+ *
122
+ * @param self the PriorityQueue to get the top from
123
+ * @return the top element in the PriorityQueue
124
+ */
125
+ extern void *pq_top(PriorityQueue *self);
126
+
127
+ /**
128
+ * Remove and return the top element in the PriorityQueue.
129
+ *
130
+ * @param self the PriorityQueue to get the top from
131
+ * @return the top element in the PriorityQueue
132
+ */
133
+ extern void *pq_pop(PriorityQueue *self);
134
+
135
+ /**
136
+ * Return true if the PriorityQueue is full.
137
+ *
138
+ * @param self the PriorityQueue to test
139
+ * @return true if the PriorityQueue is full.
140
+ */
141
+ #define pq_full(pq) ((pq)->size == (pq)->capa)
142
+
143
+ #endif
@@ -0,0 +1,1608 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+ #include "array.h"
4
+
5
+ #define BQ(query) ((BooleanQuery *)(query))
6
+ #define BW(weight) ((BooleanWeight *)(weight))
7
+
8
+ /***************************************************************************
9
+ *
10
+ * BooleanScorer
11
+ *
12
+ ***************************************************************************/
13
+
14
+ /***************************************************************************
15
+ * Coordinator
16
+ ***************************************************************************/
17
+
18
+ typedef struct Coordinator
19
+ {
20
+ int max_coord;
21
+ float *coord_factors;
22
+ Similarity *similarity;
23
+ int num_matches;
24
+ } Coordinator;
25
+
26
+ static Coordinator *coord_new(Similarity *similarity)
27
+ {
28
+ Coordinator *self = ALLOC_AND_ZERO(Coordinator);
29
+ self->similarity = similarity;
30
+ return self;
31
+ }
32
+
33
+ static Coordinator *coord_init(Coordinator *self)
34
+ {
35
+ int i;
36
+ self->coord_factors = ALLOC_N(float, self->max_coord + 1);
37
+
38
+ for (i = 0; i <= self->max_coord; i++) {
39
+ self->coord_factors[i]
40
+ = sim_coord(self->similarity, i, self->max_coord);
41
+ }
42
+
43
+ return self;
44
+ }
45
+
46
+ /***************************************************************************
47
+ * DisjunctionSumScorer
48
+ ***************************************************************************/
49
+
50
+ #define DSSc(scorer) ((DisjunctionSumScorer *)(scorer))
51
+
52
+ typedef struct DisjunctionSumScorer
53
+ {
54
+ Scorer super;
55
+ float cum_score;
56
+ int num_matches;
57
+ int min_num_matches;
58
+ Scorer **sub_scorers;
59
+ int ss_cnt;
60
+ PriorityQueue *scorer_queue;
61
+ Coordinator *coordinator;
62
+ } DisjunctionSumScorer;
63
+
64
+ static float dssc_score(Scorer *self)
65
+ {
66
+ return DSSc(self)->cum_score;
67
+ }
68
+
69
+ static void dssc_init_scorer_queue(DisjunctionSumScorer *dssc)
70
+ {
71
+ int i;
72
+ Scorer *sub_scorer;
73
+ PriorityQueue *pq = dssc->scorer_queue
74
+ = pq_new(dssc->ss_cnt, (lt_ft)&scorer_doc_less_than, NULL);
75
+
76
+ for (i = 0; i < dssc->ss_cnt; i++) {
77
+ sub_scorer = dssc->sub_scorers[i];
78
+ if (sub_scorer->next(sub_scorer)) {
79
+ pq_insert(pq, sub_scorer);
80
+ }
81
+ }
82
+ }
83
+
84
+ static bool dssc_advance_after_current(Scorer *self)
85
+ {
86
+ DisjunctionSumScorer *dssc = DSSc(self);
87
+ PriorityQueue *scorer_queue = dssc->scorer_queue;
88
+
89
+ /* repeat until minimum number of matches is found */
90
+ while (true) {
91
+ Scorer *top = (Scorer *)pq_top(scorer_queue);
92
+ self->doc = top->doc;
93
+ dssc->cum_score = top->score(top);
94
+ dssc->num_matches = 1;
95
+ /* Until all sub-scorers are after self->doc */
96
+ while (true) {
97
+ if (top->next(top)) {
98
+ pq_down(scorer_queue);
99
+ }
100
+ else {
101
+ pq_pop(scorer_queue);
102
+ if (scorer_queue->size
103
+ < (dssc->min_num_matches - dssc->num_matches)) {
104
+ /* Not enough subscorers left for a match on this
105
+ * document, also no more chance of any further match */
106
+ return false;
107
+ }
108
+ if (scorer_queue->size == 0) {
109
+ /* nothing more to advance, check for last match. */
110
+ break;
111
+ }
112
+ }
113
+ top = pq_top(scorer_queue);
114
+ if (top->doc != self->doc) {
115
+ /* All remaining subscorers are after self->doc */
116
+ break;
117
+ }
118
+ else {
119
+ dssc->cum_score += top->score(top);
120
+ dssc->num_matches++;
121
+ }
122
+ }
123
+
124
+ if (dssc->num_matches >= dssc->min_num_matches) {
125
+ return true;
126
+ }
127
+ else if (scorer_queue->size < dssc->min_num_matches) {
128
+ return false;
129
+ }
130
+ }
131
+ }
132
+
133
+ static bool dssc_next(Scorer *self)
134
+ {
135
+ if (DSSc(self)->scorer_queue == NULL) {
136
+ dssc_init_scorer_queue(DSSc(self));
137
+ }
138
+
139
+ if (DSSc(self)->scorer_queue->size < DSSc(self)->min_num_matches) {
140
+ return false;
141
+ }
142
+ else {
143
+ return dssc_advance_after_current(self);
144
+ }
145
+ }
146
+
147
+ static bool dssc_skip_to(Scorer *self, int doc_num)
148
+ {
149
+ DisjunctionSumScorer *dssc = DSSc(self);
150
+ PriorityQueue *scorer_queue = dssc->scorer_queue;
151
+
152
+ if (scorer_queue == NULL) {
153
+ dssc_init_scorer_queue(dssc);
154
+ scorer_queue = dssc->scorer_queue;
155
+ }
156
+
157
+ if (scorer_queue->size < dssc->min_num_matches) {
158
+ return false;
159
+ }
160
+ if (doc_num <= self->doc) {
161
+ doc_num = self->doc + 1;
162
+ }
163
+ while (true) {
164
+ Scorer *top = pq_top(scorer_queue);
165
+ if (top->doc >= doc_num) {
166
+ return dssc_advance_after_current(self);
167
+ }
168
+ else if (top->skip_to(top, doc_num)) {
169
+ pq_down(scorer_queue);
170
+ }
171
+ else {
172
+ pq_pop(scorer_queue);
173
+ if (scorer_queue->size < dssc->min_num_matches) {
174
+ return false;
175
+ }
176
+ }
177
+ }
178
+ }
179
+
180
+ static Explanation *dssc_explain(Scorer *self, int doc_num)
181
+ {
182
+ int i;
183
+ DisjunctionSumScorer *dssc = DSSc(self);
184
+ Scorer *sub_scorer;
185
+ Explanation *e
186
+ = expl_new(0.0, "At least %d of:", dssc->min_num_matches);
187
+ for (i = 0; i < dssc->ss_cnt; i++) {
188
+ sub_scorer = dssc->sub_scorers[i];
189
+ expl_add_detail(e, sub_scorer->explain(sub_scorer, doc_num));
190
+ }
191
+ return e;
192
+ }
193
+
194
+ static void dssc_destroy(Scorer *self)
195
+ {
196
+ DisjunctionSumScorer *dssc = DSSc(self);
197
+ int i;
198
+ for (i = 0; i < dssc->ss_cnt; i++) {
199
+ dssc->sub_scorers[i]->destroy(dssc->sub_scorers[i]);
200
+ }
201
+ if (dssc->scorer_queue) {
202
+ pq_destroy(dssc->scorer_queue);
203
+ }
204
+ scorer_destroy_i(self);
205
+ }
206
+
207
+ static Scorer *disjunction_sum_scorer_new(Scorer **sub_scorers, int ss_cnt,
208
+ int min_num_matches)
209
+ {
210
+ Scorer *self = scorer_new(DisjunctionSumScorer, NULL);
211
+ DSSc(self)->ss_cnt = ss_cnt;
212
+
213
+ /* The document number of the current match */
214
+ self->doc = -1;
215
+ DSSc(self)->cum_score = -1.0;
216
+
217
+ /* The number of subscorers that provide the current match. */
218
+ DSSc(self)->num_matches = -1;
219
+ DSSc(self)->coordinator = NULL;
220
+
221
+ #ifdef DEBUG
222
+ if (min_num_matches <= 0) {
223
+ RAISE(ARG_ERROR, "The min_num_matches value <%d> should not be less "
224
+ "than 0\n", min_num_matches);
225
+ }
226
+ if (ss_cnt <= 1) {
227
+ RAISE(ARG_ERROR, "There should be at least 2 sub_scorers in a "
228
+ "DiscjunctionSumScorer. <%d> is not enough", ss_cnt);
229
+ }
230
+ #endif
231
+
232
+ DSSc(self)->min_num_matches = min_num_matches;
233
+ DSSc(self)->sub_scorers = sub_scorers;
234
+ DSSc(self)->scorer_queue = NULL;
235
+
236
+ self->score = &dssc_score;
237
+ self->next = &dssc_next;
238
+ self->skip_to = &dssc_skip_to;
239
+ self->explain = &dssc_explain;
240
+ self->destroy = &dssc_destroy;
241
+
242
+ return self;
243
+ }
244
+
245
+ static float cdssc_score(Scorer *self)
246
+ {
247
+ DSSc(self)->coordinator->num_matches += DSSc(self)->num_matches;
248
+ return DSSc(self)->cum_score;
249
+ }
250
+
251
+ static Scorer *counting_disjunction_sum_scorer_new(
252
+ Coordinator *coordinator, Scorer **sub_scorers, int ss_cnt,
253
+ int min_num_matches)
254
+ {
255
+ Scorer *self = disjunction_sum_scorer_new(sub_scorers, ss_cnt,
256
+ min_num_matches);
257
+ DSSc(self)->coordinator = coordinator;
258
+ self->score = &cdssc_score;
259
+ return self;
260
+ }
261
+
262
+ /***************************************************************************
263
+ * ConjunctionScorer
264
+ ***************************************************************************/
265
+
266
+ #define CSc(scorer) ((ConjunctionScorer *)(scorer))
267
+
268
+ typedef struct ConjunctionScorer
269
+ {
270
+ Scorer super;
271
+ bool first_time : 1;
272
+ bool more : 1;
273
+ float coord;
274
+ Scorer **sub_scorers;
275
+ int ss_cnt;
276
+ int first_idx;
277
+ Coordinator *coordinator;
278
+ int last_scored_doc;
279
+ } ConjunctionScorer;
280
+
281
+ static void csc_sort_scorers(ConjunctionScorer *csc)
282
+ {
283
+ qsort(csc->sub_scorers, csc->ss_cnt, sizeof(Scorer *), &scorer_doc_cmp);
284
+ csc->first_idx = 0;
285
+ }
286
+
287
+ static void csc_init(Scorer *self, bool init_scorers)
288
+ {
289
+ ConjunctionScorer *csc = CSc(self);
290
+ const int sub_sc_cnt = csc->ss_cnt;
291
+
292
+ /* compute coord factor */
293
+ csc->coord = sim_coord(self->similarity, sub_sc_cnt, sub_sc_cnt);
294
+
295
+ csc->more = (sub_sc_cnt > 0);
296
+
297
+ if (init_scorers) {
298
+ int i;
299
+ /* move each scorer to its first entry */
300
+ for (i = 0; i < sub_sc_cnt; i++) {
301
+ Scorer *sub_scorer = csc->sub_scorers[i];
302
+ if (!csc->more) {
303
+ break;
304
+ }
305
+ csc->more = sub_scorer->next(sub_scorer);
306
+ }
307
+ if (csc->more) {
308
+ csc_sort_scorers(csc);
309
+ }
310
+ }
311
+
312
+ csc->first_time = false;
313
+ }
314
+
315
+ static float csc_score(Scorer *self)
316
+ {
317
+ ConjunctionScorer *csc = CSc(self);
318
+ const int sub_sc_cnt = csc->ss_cnt;
319
+ float score = 0.0; /* sum scores */
320
+ int i;
321
+ for (i = 0; i < sub_sc_cnt; i++) {
322
+ Scorer *sub_scorer = csc->sub_scorers[i];
323
+ score += sub_scorer->score(sub_scorer);
324
+ }
325
+ score *= csc->coord;
326
+ return score;
327
+ }
328
+
329
+ static bool csc_do_next(Scorer *self)
330
+ {
331
+ ConjunctionScorer *csc = CSc(self);
332
+ const int sub_sc_cnt = csc->ss_cnt;
333
+ int first_idx = csc->first_idx;
334
+ Scorer *first_sc = csc->sub_scorers[first_idx];
335
+ Scorer *last_sc = csc->sub_scorers[PREV_NUM(first_idx, sub_sc_cnt)];
336
+
337
+ /* skip to doc with all clauses */
338
+ while (csc->more && (first_sc->doc < last_sc->doc)) {
339
+ /* skip first upto last */
340
+ csc->more = first_sc->skip_to(first_sc, last_sc->doc);
341
+ /* move first to last */
342
+ last_sc = first_sc;
343
+ first_idx = NEXT_NUM(first_idx, sub_sc_cnt);
344
+ first_sc = csc->sub_scorers[first_idx];
345
+ }
346
+ self->doc = first_sc->doc;
347
+ csc->first_idx = first_idx;
348
+ return csc->more;
349
+ }
350
+
351
+ static bool csc_next(Scorer *self)
352
+ {
353
+ ConjunctionScorer *csc = CSc(self);
354
+ if (csc->first_time) {
355
+ csc_init(self, true);
356
+ }
357
+ else if (csc->more) {
358
+ /* trigger further scanning */
359
+ const int last_idx = PREV_NUM(csc->first_idx, csc->ss_cnt);
360
+ Scorer *sub_scorer = csc->sub_scorers[last_idx];
361
+ csc->more = sub_scorer->next(sub_scorer);
362
+ }
363
+ return csc_do_next(self);
364
+ }
365
+
366
+ static bool csc_skip_to(Scorer *self, int doc_num)
367
+ {
368
+ ConjunctionScorer *csc = CSc(self);
369
+ const int sub_sc_cnt = csc->ss_cnt;
370
+ int i;
371
+ bool more = csc->more;
372
+
373
+ if (csc->first_time) {
374
+ csc_init(self, true);
375
+ }
376
+
377
+ for (i = 0; i < sub_sc_cnt; i++) {
378
+ if (!more) {
379
+ break;
380
+ }
381
+ else {
382
+ Scorer *sub_scorer = csc->sub_scorers[i];
383
+ more = sub_scorer->skip_to(sub_scorer, doc_num);
384
+ }
385
+ }
386
+ if (more) {
387
+ /* resort the scorers */
388
+ csc_sort_scorers(csc);
389
+ }
390
+
391
+ csc->more = more;
392
+ return csc_do_next(self);
393
+ }
394
+
395
+ static void csc_destroy(Scorer *self)
396
+ {
397
+ ConjunctionScorer *csc = CSc(self);
398
+ const int sub_sc_cnt = csc->ss_cnt;
399
+ int i;
400
+ for (i = 0; i < sub_sc_cnt; i++) {
401
+ csc->sub_scorers[i]->destroy(csc->sub_scorers[i]);
402
+ }
403
+ free(csc->sub_scorers);
404
+ scorer_destroy_i(self);
405
+ }
406
+
407
+ static Scorer *conjunction_scorer_new(Similarity *similarity)
408
+ {
409
+ Scorer *self = scorer_new(ConjunctionScorer, similarity);
410
+
411
+ CSc(self)->first_time = true;
412
+ CSc(self)->more = true;
413
+ CSc(self)->coordinator = NULL;
414
+
415
+ self->score = &csc_score;
416
+ self->next = &csc_next;
417
+ self->skip_to = &csc_skip_to;
418
+ self->destroy = &csc_destroy;
419
+
420
+ return self;
421
+ }
422
+
423
+ static float ccsc_score(Scorer *self)
424
+ {
425
+ ConjunctionScorer *csc = CSc(self);
426
+
427
+ int doc;
428
+ if ((doc = self->doc) > csc->last_scored_doc) {
429
+ csc->last_scored_doc = doc;
430
+ csc->coordinator->num_matches += csc->ss_cnt;
431
+ }
432
+
433
+ return csc_score(self);
434
+ }
435
+
436
+ static Scorer *counting_conjunction_sum_scorer_new(
437
+ Coordinator *coordinator, Scorer **sub_scorers, int ss_cnt)
438
+ {
439
+ Scorer *self = conjunction_scorer_new(sim_create_default());
440
+ ConjunctionScorer *csc = CSc(self);
441
+ csc->coordinator = coordinator;
442
+ csc->last_scored_doc = -1;
443
+ csc->sub_scorers = ALLOC_N(Scorer *, ss_cnt);
444
+ memcpy(csc->sub_scorers, sub_scorers, sizeof(Scorer *) * ss_cnt);
445
+ csc->ss_cnt = ss_cnt;
446
+
447
+ self->score = &ccsc_score;
448
+
449
+ return self;
450
+ }
451
+
452
+ /***************************************************************************
453
+ * SingleMatchScorer
454
+ ***************************************************************************/
455
+
456
+ #define SMSc(scorer) ((SingleMatchScorer *)(scorer))
457
+
458
+ typedef struct SingleMatchScorer
459
+ {
460
+ Scorer super;
461
+ Coordinator *coordinator;
462
+ Scorer *scorer;
463
+ } SingleMatchScorer;
464
+
465
+
466
+ static float smsc_score(Scorer *self)
467
+ {
468
+ SMSc(self)->coordinator->num_matches++;
469
+ return SMSc(self)->scorer->score(SMSc(self)->scorer);
470
+ }
471
+
472
+ static bool smsc_next(Scorer *self)
473
+ {
474
+ Scorer *scorer = SMSc(self)->scorer;
475
+ if (scorer->next(scorer)) {
476
+ self->doc = scorer->doc;
477
+ return true;
478
+ }
479
+ return false;
480
+ }
481
+
482
+ static bool smsc_skip_to(Scorer *self, int doc_num)
483
+ {
484
+ Scorer *scorer = SMSc(self)->scorer;
485
+ if (scorer->skip_to(scorer, doc_num)) {
486
+ self->doc = scorer->doc;
487
+ return true;
488
+ }
489
+ return false;
490
+ }
491
+
492
+ static Explanation *smsc_explain(Scorer *self, int doc_num)
493
+ {
494
+ Scorer *scorer = SMSc(self)->scorer;
495
+ return scorer->explain(scorer, doc_num);
496
+ }
497
+
498
+ static void smsc_destroy(Scorer *self)
499
+ {
500
+ Scorer *scorer = SMSc(self)->scorer;
501
+ scorer->destroy(scorer);
502
+ scorer_destroy_i(self);
503
+ }
504
+
505
+ static Scorer *single_match_scorer_new(Coordinator *coordinator,
506
+ Scorer *scorer)
507
+ {
508
+ Scorer *self = scorer_new(SingleMatchScorer, scorer->similarity);
509
+ SMSc(self)->coordinator = coordinator;
510
+ SMSc(self)->scorer = scorer;
511
+
512
+ self->score = &smsc_score;
513
+ self->next = &smsc_next;
514
+ self->skip_to = &smsc_skip_to;
515
+ self->explain = &smsc_explain;
516
+ self->destroy = &smsc_destroy;
517
+ return self;
518
+ }
519
+
520
+ /***************************************************************************
521
+ * ReqOptSumScorer
522
+ ***************************************************************************/
523
+
524
+ #define ROSSc(scorer) ((ReqOptSumScorer *)(scorer))
525
+
526
+ typedef struct ReqOptSumScorer
527
+ {
528
+ Scorer super;
529
+ Scorer *req_scorer;
530
+ Scorer *opt_scorer;
531
+ bool first_time_opt;
532
+ } ReqOptSumScorer;
533
+
534
+ static float rossc_score(Scorer *self)
535
+ {
536
+ ReqOptSumScorer *rossc = ROSSc(self);
537
+ Scorer *req_scorer = rossc->req_scorer;
538
+ Scorer *opt_scorer = rossc->opt_scorer;
539
+ int cur_doc = req_scorer->doc;
540
+ float req_score = req_scorer->score(req_scorer);
541
+
542
+ if (rossc->first_time_opt) {
543
+ rossc->first_time_opt = false;
544
+ if (! opt_scorer->skip_to(opt_scorer, cur_doc)) {
545
+ SCORER_NULLIFY(rossc->opt_scorer);
546
+ return req_score;
547
+ }
548
+ }
549
+ else if (opt_scorer == NULL) {
550
+ return req_score;
551
+ }
552
+ else if ((opt_scorer->doc < cur_doc)
553
+ && ! opt_scorer->skip_to(opt_scorer, cur_doc)) {
554
+ SCORER_NULLIFY(rossc->opt_scorer);
555
+ return req_score;
556
+ }
557
+ /* assert (@opt_scorer != nil) and (@opt_scorer.doc() >= cur_doc) */
558
+ return (opt_scorer->doc == cur_doc)
559
+ ? req_score + opt_scorer->score(opt_scorer)
560
+ : req_score;
561
+ }
562
+
563
+ static bool rossc_next(Scorer *self)
564
+ {
565
+ Scorer *req_scorer = ROSSc(self)->req_scorer;
566
+ if (req_scorer->next(req_scorer)) {
567
+ self->doc = req_scorer->doc;
568
+ return true;
569
+ }
570
+ return false;
571
+ }
572
+
573
+ static bool rossc_skip_to(Scorer *self, int doc_num)
574
+ {
575
+ Scorer *req_scorer = ROSSc(self)->req_scorer;
576
+ if (req_scorer->skip_to(req_scorer, doc_num)) {
577
+ self->doc = req_scorer->doc;
578
+ return true;
579
+ }
580
+ return false;
581
+ }
582
+
583
+ static Explanation *rossc_explain(Scorer *self, int doc_num)
584
+ {
585
+ Scorer *req_scorer = ROSSc(self)->req_scorer;
586
+ Scorer *opt_scorer = ROSSc(self)->opt_scorer;
587
+
588
+ Explanation *e = expl_new(self->score(self),"required, optional:");
589
+ expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
590
+ expl_add_detail(e, opt_scorer->explain(opt_scorer, doc_num));
591
+ return e;
592
+ }
593
+
594
+ static void rossc_destroy(Scorer *self)
595
+ {
596
+ ReqOptSumScorer *rossc = ROSSc(self);
597
+ if (rossc->req_scorer) {
598
+ rossc->req_scorer->destroy(rossc->req_scorer);
599
+ }
600
+ if (rossc->opt_scorer) {
601
+ rossc->opt_scorer->destroy(rossc->opt_scorer);
602
+ }
603
+ scorer_destroy_i(self);
604
+ }
605
+
606
+
607
+ static Scorer *req_opt_sum_scorer_new(Scorer *req_scorer, Scorer *opt_scorer)
608
+ {
609
+ Scorer *self = scorer_new(ReqOptSumScorer, NULL);
610
+
611
+ ROSSc(self)->req_scorer = req_scorer;
612
+ ROSSc(self)->opt_scorer = opt_scorer;
613
+ ROSSc(self)->first_time_opt = true;
614
+
615
+ self->score = &rossc_score;
616
+ self->next = &rossc_next;
617
+ self->skip_to = &rossc_skip_to;
618
+ self->explain = &rossc_explain;
619
+ self->destroy = &rossc_destroy;
620
+
621
+ return self;
622
+ }
623
+
624
+ /***************************************************************************
625
+ * ReqExclScorer
626
+ ***************************************************************************/
627
+
628
+ #define RXSc(scorer) ((ReqExclScorer *)(scorer))
629
+ typedef struct ReqExclScorer
630
+ {
631
+ Scorer super;
632
+ Scorer *req_scorer;
633
+ Scorer *excl_scorer;
634
+ bool first_time;
635
+ } ReqExclScorer;
636
+
637
+ static bool rxsc_to_non_excluded(Scorer *self)
638
+ {
639
+ Scorer *req_scorer = RXSc(self)->req_scorer;
640
+ Scorer *excl_scorer = RXSc(self)->excl_scorer;
641
+ int excl_doc = excl_scorer->doc, req_doc;
642
+
643
+ do {
644
+ /* may be excluded */
645
+ req_doc = req_scorer->doc;
646
+ if (req_doc < excl_doc) {
647
+ /* req_scorer advanced to before excl_scorer, ie. not excluded */
648
+ self->doc = req_doc;
649
+ return true;
650
+ }
651
+ else if (req_doc > excl_doc) {
652
+ if (! excl_scorer->skip_to(excl_scorer, req_doc)) {
653
+ /* emptied, no more exclusions */
654
+ SCORER_NULLIFY(RXSc(self)->excl_scorer);
655
+ self->doc = req_doc;
656
+ return true;
657
+ }
658
+ excl_doc = excl_scorer->doc;
659
+ if (excl_doc > req_doc) {
660
+ self->doc = req_doc;
661
+ return true; /* not excluded */
662
+ }
663
+ }
664
+ } while (req_scorer->next(req_scorer));
665
+ /* emptied, nothing left */
666
+ SCORER_NULLIFY(RXSc(self)->req_scorer);
667
+ return false;
668
+ }
669
+
670
+ static bool rxsc_next(Scorer *self)
671
+ {
672
+ ReqExclScorer *rxsc = RXSc(self);
673
+ Scorer *req_scorer = rxsc->req_scorer;
674
+ Scorer *excl_scorer = rxsc->excl_scorer;
675
+
676
+ if (rxsc->first_time) {
677
+ if (! excl_scorer->next(excl_scorer)) {
678
+ /* emptied at start */
679
+ SCORER_NULLIFY(rxsc->excl_scorer);
680
+ excl_scorer = NULL;
681
+ }
682
+ rxsc->first_time = false;
683
+ }
684
+ if (req_scorer == NULL) {
685
+ return false;
686
+ }
687
+ if (! req_scorer->next(req_scorer)) {
688
+ /* emptied, nothing left */
689
+ SCORER_NULLIFY(rxsc->req_scorer);
690
+ return false;
691
+ }
692
+ if (excl_scorer == NULL) {
693
+ self->doc = req_scorer->doc;
694
+ /* req_scorer->next() already returned true */
695
+ return true;
696
+ }
697
+ return rxsc_to_non_excluded(self);
698
+ }
699
+
700
+ static bool rxsc_skip_to(Scorer *self, int doc_num)
701
+ {
702
+ ReqExclScorer *rxsc = RXSc(self);
703
+ Scorer *req_scorer = rxsc->req_scorer;
704
+ Scorer *excl_scorer = rxsc->excl_scorer;
705
+
706
+ if (rxsc->first_time) {
707
+ rxsc->first_time = false;
708
+ if (! excl_scorer->skip_to(excl_scorer, doc_num)) {
709
+ /* emptied */
710
+ SCORER_NULLIFY(rxsc->excl_scorer);
711
+ excl_scorer = NULL;
712
+ }
713
+ }
714
+ if (req_scorer == NULL) {
715
+ return false;
716
+ }
717
+ if (excl_scorer == NULL) {
718
+ if (req_scorer->skip_to(req_scorer, doc_num)) {
719
+ self->doc = req_scorer->doc;
720
+ return true;
721
+ }
722
+ return false;
723
+ }
724
+ if (! req_scorer->skip_to(req_scorer, doc_num)) {
725
+ SCORER_NULLIFY(rxsc->req_scorer);
726
+ return false;
727
+ }
728
+ return rxsc_to_non_excluded(self);
729
+ }
730
+
731
+ static float rxsc_score(Scorer *self)
732
+ {
733
+ Scorer *req_scorer = RXSc(self)->req_scorer;
734
+ return req_scorer->score(req_scorer);
735
+ }
736
+
737
+ static Explanation *rxsc_explain(Scorer *self, int doc_num)
738
+ {
739
+ ReqExclScorer *rxsc = RXSc(self);
740
+ Scorer *req_scorer = rxsc->req_scorer;
741
+ Scorer *excl_scorer = rxsc->excl_scorer;
742
+ Explanation *e;
743
+
744
+ if (excl_scorer->skip_to(excl_scorer, doc_num)
745
+ && excl_scorer->doc == doc_num) {
746
+ e = expl_new(0.0, "excluded:");
747
+ }
748
+ else {
749
+ e = expl_new(0.0, "not excluded:");
750
+ expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
751
+ }
752
+ return e;
753
+ }
754
+
755
+ static void rxsc_destroy(Scorer *self)
756
+ {
757
+ ReqExclScorer *rxsc = RXSc(self);
758
+ if (rxsc->req_scorer) {
759
+ rxsc->req_scorer->destroy(rxsc->req_scorer);
760
+ }
761
+ if (rxsc->excl_scorer) {
762
+ rxsc->excl_scorer->destroy(rxsc->excl_scorer);
763
+ }
764
+ scorer_destroy_i(self);
765
+ }
766
+
767
+ static Scorer *req_excl_scorer_new(Scorer *req_scorer, Scorer *excl_scorer)
768
+ {
769
+ Scorer *self = scorer_new(ReqExclScorer, NULL);
770
+ RXSc(self)->req_scorer = req_scorer;
771
+ RXSc(self)->excl_scorer = excl_scorer;
772
+ RXSc(self)->first_time = true;
773
+
774
+ self->score = &rxsc_score;
775
+ self->next = &rxsc_next;
776
+ self->skip_to = &rxsc_skip_to;
777
+ self->explain = &rxsc_explain;
778
+ self->destroy = &rxsc_destroy;
779
+
780
+ return self;
781
+ }
782
+
783
+ /***************************************************************************
784
+ * NonMatchScorer
785
+ ***************************************************************************/
786
+
787
+ static float nmsc_score(Scorer *self)
788
+ {
789
+ (void)self;
790
+ return 0.0;
791
+ }
792
+
793
+ static bool nmsc_next(Scorer *self)
794
+ {
795
+ (void)self;
796
+ return false;
797
+ }
798
+
799
+ static bool nmsc_skip_to(Scorer *self, int doc_num)
800
+ {
801
+ (void)self; (void)doc_num;
802
+ return false;
803
+ }
804
+
805
+ static Explanation *nmsc_explain(Scorer *self, int doc_num)
806
+ {
807
+ (void)self; (void)doc_num;
808
+ return expl_new(0.0, "No documents matched");
809
+ }
810
+
811
+ static Scorer *non_matching_scorer_new()
812
+ {
813
+ Scorer *self = scorer_new(Scorer, NULL);
814
+ self->score = &nmsc_score;
815
+ self->next = &nmsc_next;
816
+ self->skip_to = &nmsc_skip_to;
817
+ self->explain = &nmsc_explain;
818
+
819
+ return self;
820
+ }
821
+
822
+ /***************************************************************************
823
+ * BooleanScorer
824
+ ***************************************************************************/
825
+
826
+ #define BSc(scorer) ((BooleanScorer *)(scorer))
827
+ typedef struct BooleanScorer
828
+ {
829
+ Scorer super;
830
+ Scorer **required_scorers;
831
+ int rs_cnt;
832
+ int rs_capa;
833
+ Scorer **optional_scorers;
834
+ int os_cnt;
835
+ int os_capa;
836
+ Scorer **prohibited_scorers;
837
+ int ps_cnt;
838
+ int ps_capa;
839
+ Scorer *counting_sum_scorer;
840
+ Coordinator *coordinator;
841
+ } BooleanScorer;
842
+
843
+ static Scorer *counting_sum_scorer_create3(BooleanScorer *bsc,
844
+ Scorer *req_scorer,
845
+ Scorer *opt_scorer)
846
+ {
847
+ if (bsc->ps_cnt == 0) {
848
+ /* no prohibited */
849
+ return req_opt_sum_scorer_new(req_scorer, opt_scorer);
850
+ }
851
+ else if (bsc->ps_cnt == 1) {
852
+ /* 1 prohibited */
853
+ return req_opt_sum_scorer_new(
854
+ req_excl_scorer_new(req_scorer, bsc->prohibited_scorers[0]),
855
+ opt_scorer);
856
+ }
857
+ else {
858
+ /* more prohibited */
859
+ return req_opt_sum_scorer_new(
860
+ req_excl_scorer_new(
861
+ req_scorer,
862
+ disjunction_sum_scorer_new(bsc->prohibited_scorers,
863
+ bsc->ps_cnt, 1)),
864
+ opt_scorer);
865
+ }
866
+ }
867
+
868
+ static Scorer *counting_sum_scorer_create2(BooleanScorer *bsc,
869
+ Scorer *req_scorer,
870
+ Scorer **optional_scorers,
871
+ int os_cnt)
872
+ {
873
+ if (os_cnt == 0) {
874
+ if (bsc->ps_cnt == 0) {
875
+ return req_scorer;
876
+ }
877
+ else if (bsc->ps_cnt == 1) {
878
+ return req_excl_scorer_new(req_scorer,
879
+ bsc->prohibited_scorers[0]);
880
+ }
881
+ else {
882
+ /* no optional, more than 1 prohibited */
883
+ return req_excl_scorer_new(
884
+ req_scorer,
885
+ disjunction_sum_scorer_new(bsc->prohibited_scorers,
886
+ bsc->ps_cnt, 1));
887
+ }
888
+ }
889
+ else if (os_cnt == 1) {
890
+ return counting_sum_scorer_create3(
891
+ bsc,
892
+ req_scorer,
893
+ single_match_scorer_new(bsc->coordinator, optional_scorers[0]));
894
+ }
895
+ else {
896
+ /* more optional */
897
+ return counting_sum_scorer_create3(
898
+ bsc,
899
+ req_scorer,
900
+ counting_disjunction_sum_scorer_new(bsc->coordinator,
901
+ optional_scorers, os_cnt, 1));
902
+ }
903
+ }
904
+
905
+ static Scorer *counting_sum_scorer_create(BooleanScorer *bsc)
906
+ {
907
+ if (bsc->rs_cnt == 0) {
908
+ if (bsc->os_cnt == 0) {
909
+ int i;
910
+ /* only prohibited scorers so return non_matching scorer */
911
+ for (i = 0; i < bsc->ps_cnt; i++) {
912
+ bsc->prohibited_scorers[i]->destroy(
913
+ bsc->prohibited_scorers[i]);
914
+ }
915
+ return non_matching_scorer_new();
916
+ }
917
+ else if (bsc->os_cnt == 1) {
918
+ /* the only optional scorer is required */
919
+ return counting_sum_scorer_create2(
920
+ bsc,
921
+ single_match_scorer_new(bsc->coordinator,
922
+ bsc->optional_scorers[0]),
923
+ NULL, 0); /* no optional scorers left */
924
+ }
925
+ else {
926
+ /* more than 1 optional_scorers, no required scorers */
927
+ return counting_sum_scorer_create2(
928
+ bsc,
929
+ counting_disjunction_sum_scorer_new(bsc->coordinator,
930
+ bsc->optional_scorers,
931
+ bsc->os_cnt, 1),
932
+ NULL, 0); /* no optional scorers left */
933
+ }
934
+ }
935
+ else if (bsc->rs_cnt == 1) {
936
+ /* 1 required */
937
+ return counting_sum_scorer_create2(
938
+ bsc,
939
+ single_match_scorer_new(bsc->coordinator, bsc->required_scorers[0]),
940
+ bsc->optional_scorers, bsc->os_cnt);
941
+ }
942
+ else {
943
+ /* more required scorers */
944
+ return counting_sum_scorer_create2(
945
+ bsc,
946
+ counting_conjunction_sum_scorer_new(bsc->coordinator,
947
+ bsc->required_scorers,
948
+ bsc->rs_cnt),
949
+ bsc->optional_scorers, bsc->os_cnt);
950
+ }
951
+ }
952
+
953
+ static Scorer *bsc_init_counting_sum_scorer(BooleanScorer *bsc)
954
+ {
955
+ coord_init(bsc->coordinator);
956
+ return bsc->counting_sum_scorer = counting_sum_scorer_create(bsc);
957
+ }
958
+
959
+ static void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur)
960
+ {
961
+ BooleanScorer *bsc = BSc(self);
962
+ if (occur != BC_MUST_NOT) {
963
+ bsc->coordinator->max_coord++;
964
+ }
965
+
966
+ switch (occur) {
967
+ case BC_MUST:
968
+ RECAPA(bsc, rs_cnt, rs_capa, required_scorers, Scorer *);
969
+ bsc->required_scorers[bsc->rs_cnt++] = scorer;
970
+ break;
971
+ case BC_SHOULD:
972
+ RECAPA(bsc, os_cnt, os_capa, optional_scorers, Scorer *);
973
+ bsc->optional_scorers[bsc->os_cnt++] = scorer;
974
+ break;
975
+ case BC_MUST_NOT:
976
+ RECAPA(bsc, ps_cnt, ps_capa, prohibited_scorers, Scorer *);
977
+ bsc->prohibited_scorers[bsc->ps_cnt++] = scorer;
978
+ break;
979
+ default:
980
+ RAISE(ARG_ERROR, "Invalid value for :occur. Try :should, :must or "
981
+ ":must_not instead");
982
+ }
983
+ }
984
+
985
+ static float bsc_score(Scorer *self)
986
+ {
987
+ BooleanScorer *bsc = BSc(self);
988
+ Coordinator *coord = bsc->coordinator;
989
+ float sum;
990
+ coord->num_matches = 0;
991
+ sum = bsc->counting_sum_scorer->score(bsc->counting_sum_scorer);
992
+ return sum * coord->coord_factors[coord->num_matches];
993
+ }
994
+
995
+ static bool bsc_next(Scorer *self)
996
+ {
997
+ Scorer *cnt_sum_sc = BSc(self)->counting_sum_scorer;
998
+
999
+ if (!cnt_sum_sc) {
1000
+ cnt_sum_sc = bsc_init_counting_sum_scorer(BSc(self));
1001
+ }
1002
+ if (cnt_sum_sc->next(cnt_sum_sc)) {
1003
+ self->doc = cnt_sum_sc->doc;
1004
+ return true;
1005
+ }
1006
+ else {
1007
+ return false;
1008
+ }
1009
+ }
1010
+
1011
+ static bool bsc_skip_to(Scorer *self, int doc_num)
1012
+ {
1013
+ Scorer *cnt_sum_sc = BSc(self)->counting_sum_scorer;
1014
+
1015
+ if (!BSc(self)->counting_sum_scorer) {
1016
+ cnt_sum_sc = bsc_init_counting_sum_scorer(BSc(self));
1017
+ }
1018
+ if (cnt_sum_sc->skip_to(cnt_sum_sc, doc_num)) {
1019
+ self->doc = cnt_sum_sc->doc;
1020
+ return true;
1021
+ }
1022
+ else {
1023
+ return false;
1024
+ }
1025
+ }
1026
+
1027
+ static void bsc_destroy(Scorer *self)
1028
+ {
1029
+ BooleanScorer *bsc = BSc(self);
1030
+ Coordinator *coord = bsc->coordinator;
1031
+
1032
+ free(coord->coord_factors);
1033
+ free(coord);
1034
+
1035
+ if (bsc->counting_sum_scorer) {
1036
+ bsc->counting_sum_scorer->destroy(bsc->counting_sum_scorer);
1037
+ }
1038
+ else {
1039
+ int i;
1040
+ for (i = 0; i < bsc->rs_cnt; i++) {
1041
+ bsc->required_scorers[i]->destroy(bsc->required_scorers[i]);
1042
+ }
1043
+
1044
+ for (i = 0; i < bsc->os_cnt; i++) {
1045
+ bsc->optional_scorers[i]->destroy(bsc->optional_scorers[i]);
1046
+ }
1047
+
1048
+ for (i = 0; i < bsc->ps_cnt; i++) {
1049
+ bsc->prohibited_scorers[i]->destroy(bsc->prohibited_scorers[i]);
1050
+ }
1051
+ }
1052
+ free(bsc->required_scorers);
1053
+ free(bsc->optional_scorers);
1054
+ free(bsc->prohibited_scorers);
1055
+ scorer_destroy_i(self);
1056
+ }
1057
+
1058
+ static Explanation *bsc_explain(Scorer *self, int doc_num)
1059
+ {
1060
+ (void)self; (void)doc_num;
1061
+ return expl_new(0.0, "This explanation is not supported");
1062
+ }
1063
+
1064
+ static Scorer *bsc_new(Similarity *similarity)
1065
+ {
1066
+ Scorer *self = scorer_new(BooleanScorer, similarity);
1067
+ BSc(self)->coordinator = coord_new(similarity);
1068
+ BSc(self)->counting_sum_scorer = NULL;
1069
+
1070
+ self->score = &bsc_score;
1071
+ self->next = &bsc_next;
1072
+ self->skip_to = &bsc_skip_to;
1073
+ self->explain = &bsc_explain;
1074
+ self->destroy = &bsc_destroy;
1075
+ return self;
1076
+ }
1077
+
1078
+ /***************************************************************************
1079
+ *
1080
+ * BooleanWeight
1081
+ *
1082
+ ***************************************************************************/
1083
+
1084
+ typedef struct BooleanWeight
1085
+ {
1086
+ Weight w;
1087
+ Weight **weights;
1088
+ int w_cnt;
1089
+ } BooleanWeight;
1090
+
1091
+
1092
+ static float bw_sum_of_squared_weights(Weight *self)
1093
+ {
1094
+ BooleanQuery *bq = BQ(self->query);
1095
+ float sum = 0.0;
1096
+ int i;
1097
+
1098
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1099
+ if (! bq->clauses[i]->is_prohibited) {
1100
+ Weight *weight = BW(self)->weights[i];
1101
+ /* sum sub-weights */
1102
+ sum += weight->sum_of_squared_weights(weight);
1103
+ }
1104
+ }
1105
+
1106
+ /* boost each sub-weight */
1107
+ sum *= self->value * self->value;
1108
+ return sum;
1109
+ }
1110
+
1111
+ static void bw_normalize(Weight *self, float normalization_factor)
1112
+ {
1113
+ BooleanQuery *bq = BQ(self->query);
1114
+ int i;
1115
+
1116
+ normalization_factor *= self->value; /* multiply by query boost */
1117
+
1118
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1119
+ if (! bq->clauses[i]->is_prohibited) {
1120
+ Weight *weight = BW(self)->weights[i];
1121
+ /* sum sub-weights */
1122
+ weight->normalize(weight, normalization_factor);
1123
+ }
1124
+ }
1125
+ }
1126
+
1127
+ static Scorer *bw_scorer(Weight *self, IndexReader *ir)
1128
+ {
1129
+ Scorer *bsc = bsc_new(self->similarity);
1130
+ BooleanQuery *bq = BQ(self->query);
1131
+ int i;
1132
+
1133
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1134
+ BooleanClause *clause = bq->clauses[i];
1135
+ Weight *weight = BW(self)->weights[i];
1136
+ Scorer *sub_scorer = weight->scorer(weight, ir);
1137
+ if (sub_scorer) {
1138
+ bsc_add_scorer(bsc, sub_scorer, clause->occur);
1139
+ }
1140
+ else if (clause->is_required) {
1141
+ bsc->destroy(bsc);
1142
+ return NULL;
1143
+ }
1144
+ }
1145
+
1146
+ return bsc;
1147
+ }
1148
+
1149
+ static char *bw_to_s(Weight *self)
1150
+ {
1151
+ return strfmt("BooleanWeight(%f)", self->value);
1152
+ }
1153
+
1154
+ static void bw_destroy(Weight *self)
1155
+ {
1156
+ int i;
1157
+
1158
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1159
+ BW(self)->weights[i]->destroy(BW(self)->weights[i]);
1160
+ }
1161
+
1162
+ free(BW(self)->weights);
1163
+ w_destroy(self);
1164
+ }
1165
+
1166
+ static Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
1167
+ {
1168
+ BooleanQuery *bq = BQ(self->query);
1169
+ Explanation *sum_expl = expl_new(0.0, "sum of:");
1170
+ Explanation *explanation;
1171
+ int coord = 0;
1172
+ int max_coord = 0;
1173
+ float coord_factor = 0.0;
1174
+ float sum = 0.0;
1175
+ int i;
1176
+
1177
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1178
+ Weight *weight = BW(self)->weights[i];
1179
+ BooleanClause *clause = bq->clauses[i];
1180
+ explanation = weight->explain(weight, ir, doc_num);
1181
+ if (!clause->is_prohibited) {
1182
+ max_coord++;
1183
+ }
1184
+ if (explanation->value > 0.0) {
1185
+ if (!clause->is_prohibited) {
1186
+ expl_add_detail(sum_expl, explanation);
1187
+ sum += explanation->value;
1188
+ coord++;
1189
+ }
1190
+ else {
1191
+ expl_destroy(explanation);
1192
+ expl_destroy(sum_expl);
1193
+ return expl_new(0.0, "match prohibited");
1194
+ }
1195
+ }
1196
+ else if (clause->is_required) {
1197
+ expl_destroy(explanation);
1198
+ expl_destroy(sum_expl);
1199
+ return expl_new(0.0, "match required");
1200
+ }
1201
+ else {
1202
+ expl_destroy(explanation);
1203
+ }
1204
+ }
1205
+ sum_expl->value = sum;
1206
+
1207
+ if (coord == 1) { /* only one clause matched */
1208
+ explanation = sum_expl; /* eliminate wrapper */
1209
+ ary_size(sum_expl->details) = 0;
1210
+ sum_expl = sum_expl->details[0];
1211
+ expl_destroy(explanation);
1212
+ }
1213
+
1214
+ coord_factor = sim_coord(self->similarity, coord, max_coord);
1215
+
1216
+ if (coord_factor == 1.0) { /* coord is no-op */
1217
+ return sum_expl; /* eliminate wrapper */
1218
+ }
1219
+ else {
1220
+ explanation = expl_new(sum * coord_factor, "product of:");
1221
+ expl_add_detail(explanation, sum_expl);
1222
+ expl_add_detail(explanation, expl_new(coord_factor, "coord(%d/%d)",
1223
+ coord, max_coord));
1224
+ return explanation;
1225
+ }
1226
+ }
1227
+
1228
+ static Weight *bw_new(Query *query, Searcher *searcher)
1229
+ {
1230
+ int i;
1231
+ Weight *self = w_new(BooleanWeight, query);
1232
+
1233
+ BW(self)->w_cnt = BQ(query)->clause_cnt;
1234
+ BW(self)->weights = ALLOC_N(Weight *, BW(self)->w_cnt);
1235
+ for (i = 0; i < BW(self)->w_cnt; i++) {
1236
+ BW(self)->weights[i] = q_weight(BQ(query)->clauses[i]->query, searcher);
1237
+ }
1238
+
1239
+ self->normalize = &bw_normalize;
1240
+ self->scorer = &bw_scorer;
1241
+ self->explain = &bw_explain;
1242
+ self->to_s = &bw_to_s;
1243
+ self->destroy = &bw_destroy;
1244
+ self->sum_of_squared_weights = &bw_sum_of_squared_weights;
1245
+
1246
+ self->similarity = query->get_similarity(query, searcher);
1247
+ self->value = query->boost;
1248
+
1249
+ return self;
1250
+ }
1251
+
1252
+ /***************************************************************************
1253
+ *
1254
+ * BooleanClause
1255
+ *
1256
+ ***************************************************************************/
1257
+
1258
+ void bc_set_occur(BooleanClause *self, enum BC_TYPE occur)
1259
+ {
1260
+ self->occur = occur;
1261
+ switch (occur) {
1262
+ case BC_SHOULD:
1263
+ self->is_prohibited = false;
1264
+ self->is_required = false;
1265
+ break;
1266
+ case BC_MUST:
1267
+ self->is_prohibited = false;
1268
+ self->is_required = true;
1269
+ break;
1270
+ case BC_MUST_NOT:
1271
+ self->is_prohibited = true;
1272
+ self->is_required = false;
1273
+ break;
1274
+ default:
1275
+ RAISE(ARG_ERROR, "Invalid value for :occur. Try :occur => :should, "
1276
+ ":must or :must_not instead");
1277
+ }
1278
+ }
1279
+
1280
+ void bc_deref(BooleanClause *self)
1281
+ {
1282
+ if (--self->ref_cnt <= 0) {
1283
+ q_deref(self->query);
1284
+ free(self);
1285
+ }
1286
+ }
1287
+
1288
+ static unsigned long bc_hash(BooleanClause *self)
1289
+ {
1290
+ return ((q_hash(self->query) << 2) | self->occur);
1291
+ }
1292
+
1293
+ static int bc_eq(BooleanClause *self, BooleanClause *o)
1294
+ {
1295
+ return ((self->occur == o->occur) && q_eq(self->query, o->query));
1296
+ }
1297
+
1298
+ BooleanClause *bc_new(Query *query, enum BC_TYPE occur)
1299
+ {
1300
+ BooleanClause *self = ALLOC(BooleanClause);
1301
+ self->ref_cnt = 1;
1302
+ self->query = query;
1303
+ bc_set_occur(self, occur);
1304
+ return self;
1305
+ }
1306
+
1307
+ /***************************************************************************
1308
+ *
1309
+ * BooleanQuery
1310
+ *
1311
+ ***************************************************************************/
1312
+
1313
+ static MatchVector *bq_get_matchv_i(Query *self, MatchVector *mv,
1314
+ TermVector *tv)
1315
+ {
1316
+ int i;
1317
+ for (i = BQ(self)->clause_cnt - 1; i >= 0; i--) {
1318
+ if (BQ(self)->clauses[i]->occur != BC_MUST_NOT) {
1319
+ Query *q = BQ(self)->clauses[i]->query;
1320
+ q->get_matchv_i(q, mv, tv);
1321
+ }
1322
+ }
1323
+ return mv;
1324
+ }
1325
+
1326
+ static Query *bq_rewrite(Query *self, IndexReader *ir)
1327
+ {
1328
+ int i;
1329
+ const int clause_cnt = BQ(self)->clause_cnt;
1330
+ bool rewritten = false;
1331
+ bool has_non_prohibited_clause = false;
1332
+
1333
+ if (clause_cnt == 1) {
1334
+ /* optimize 1-clause queries */
1335
+ BooleanClause *clause = BQ(self)->clauses[0];
1336
+ if (! clause->is_prohibited) {
1337
+ /* just return clause. Re-write first. */
1338
+ Query *q = clause->query->rewrite(clause->query, ir);
1339
+
1340
+ if (self->boost != 1.0) {
1341
+ /* original_boost is initialized to 0.0. If it has been set to
1342
+ * something else it means this query has already been boosted
1343
+ * before so boost from the original value */
1344
+ if ((q == clause->query) && BQ(self)->original_boost) {
1345
+ /* rewrite was no-op */
1346
+ q->boost = BQ(self)->original_boost * self->boost;
1347
+ }
1348
+ else {
1349
+ /* save original boost in case query is rewritten again */
1350
+ BQ(self)->original_boost = q->boost;
1351
+ q->boost *= self->boost;
1352
+ }
1353
+ }
1354
+
1355
+ return q;
1356
+ }
1357
+ }
1358
+
1359
+ self->ref_cnt++;
1360
+ /* replace each clause's query with its rewritten query */
1361
+ for (i = 0; i < clause_cnt; i++) {
1362
+ BooleanClause *clause = BQ(self)->clauses[i];
1363
+ Query *rq = clause->query->rewrite(clause->query, ir);
1364
+ /* check for at least one non-prohibited clause */
1365
+ if (clause->is_prohibited == false) has_non_prohibited_clause = true;
1366
+ if (rq != clause->query) {
1367
+ if (!rewritten) {
1368
+ int j;
1369
+ Query *new_self = q_new(BooleanQuery);
1370
+ memcpy(new_self, self, sizeof(BooleanQuery));
1371
+ BQ(new_self)->clauses = ALLOC_N(BooleanClause *,
1372
+ BQ(self)->clause_capa);
1373
+ memcpy(BQ(new_self)->clauses, BQ(self)->clauses,
1374
+ BQ(self)->clause_capa * sizeof(BooleanClause *));
1375
+ for (j = 0; j < clause_cnt; j++) {
1376
+ REF(BQ(self)->clauses[j]);
1377
+ }
1378
+ self->ref_cnt--;
1379
+ self = new_self;
1380
+ self->ref_cnt = 1;
1381
+ rewritten = true;
1382
+ }
1383
+ DEREF(clause);
1384
+ BQ(self)->clauses[i] = bc_new(rq, clause->occur);
1385
+ } else {
1386
+ DEREF(rq);
1387
+ }
1388
+ }
1389
+ if (clause_cnt > 0 && !has_non_prohibited_clause) {
1390
+ bq_add_query_nr(self, maq_new(), BC_MUST);
1391
+ }
1392
+
1393
+ return self;
1394
+ }
1395
+
1396
+ static void bq_extract_terms(Query *self, HashSet *terms)
1397
+ {
1398
+ int i;
1399
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1400
+ BooleanClause *clause = BQ(self)->clauses[i];
1401
+ clause->query->extract_terms(clause->query, terms);
1402
+ }
1403
+ }
1404
+
1405
+ static char *bq_to_s(Query *self, const char *field)
1406
+ {
1407
+ int i;
1408
+ BooleanClause *clause;
1409
+ Query *sub_query;
1410
+ char *buffer;
1411
+ char *clause_str;
1412
+ int bp = 0;
1413
+ int size = QUERY_STRING_START_SIZE;
1414
+ int needed;
1415
+ int clause_len;
1416
+
1417
+ buffer = ALLOC_N(char, size);
1418
+ if (self->boost != 1.0) {
1419
+ buffer[0] = '(';
1420
+ bp++;
1421
+ }
1422
+
1423
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1424
+ clause = BQ(self)->clauses[i];
1425
+ clause_str = clause->query->to_s(clause->query, field);
1426
+ clause_len = (int)strlen(clause_str);
1427
+ needed = clause_len + 5;
1428
+ while ((size - bp) < needed) {
1429
+ size *= 2;
1430
+ REALLOC_N(buffer, char, size);
1431
+ }
1432
+
1433
+ if (i > 0) {
1434
+ buffer[bp++] = ' ';
1435
+ }
1436
+ if (clause->is_prohibited) {
1437
+ buffer[bp++] = '-';
1438
+ }
1439
+ else if (clause->is_required) {
1440
+ buffer[bp++] = '+';
1441
+ }
1442
+
1443
+ sub_query = clause->query;
1444
+ if (sub_query->type == BOOLEAN_QUERY) {
1445
+ /* wrap sub-bools in parens */
1446
+ buffer[bp++] = '(';
1447
+ memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
1448
+ bp += clause_len;
1449
+ buffer[bp++] = ')';
1450
+ }
1451
+ else {
1452
+ memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
1453
+ bp += clause_len;
1454
+ }
1455
+ free(clause_str);
1456
+ }
1457
+
1458
+ if (self->boost != 1.0) {
1459
+ char *boost_str = strfmt(")^%f", self->boost);
1460
+ int boost_len = (int)strlen(boost_str);
1461
+ REALLOC_N(buffer, char, bp + boost_len + 1);
1462
+ memcpy(buffer + bp, boost_str, sizeof(char) * boost_len);
1463
+ bp += boost_len;
1464
+ free(boost_str);
1465
+ }
1466
+ buffer[bp] = 0;
1467
+ return buffer;
1468
+ }
1469
+
1470
+ static void bq_destroy(Query *self)
1471
+ {
1472
+ int i;
1473
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1474
+ bc_deref(BQ(self)->clauses[i]);
1475
+ }
1476
+ free(BQ(self)->clauses);
1477
+ if (BQ(self)->similarity) {
1478
+ BQ(self)->similarity->destroy(BQ(self)->similarity);
1479
+ }
1480
+ q_destroy_i(self);
1481
+ }
1482
+
1483
+ static float bq_coord_disabled(Similarity *sim, int overlap, int max_overlap)
1484
+ {
1485
+ (void)sim; (void)overlap; (void)max_overlap;
1486
+ return 1.0;
1487
+ }
1488
+
1489
+ static Similarity *bq_get_similarity(Query *self, Searcher *searcher)
1490
+ {
1491
+ if (!BQ(self)->similarity) {
1492
+ Similarity *sim = q_get_similarity_i(self, searcher);
1493
+ BQ(self)->similarity = ALLOC(Similarity);
1494
+ memcpy(BQ(self)->similarity, sim, sizeof(Similarity));
1495
+ BQ(self)->similarity->coord = &bq_coord_disabled;
1496
+ BQ(self)->similarity->destroy = (void (*)(Similarity *))&free;
1497
+ }
1498
+
1499
+ return BQ(self)->similarity;
1500
+ }
1501
+
1502
+ static unsigned long bq_hash(Query *self)
1503
+ {
1504
+ int i;
1505
+ unsigned long hash = 0;
1506
+ for (i = 0; i < BQ(self)->clause_cnt; i++) {
1507
+ hash ^= bc_hash(BQ(self)->clauses[i]);
1508
+ }
1509
+ return (hash << 1) | BQ(self)->coord_disabled;
1510
+ }
1511
+
1512
+ static int bq_eq(Query *self, Query *o)
1513
+ {
1514
+ int i;
1515
+ BooleanQuery *bq1 = BQ(self);
1516
+ BooleanQuery *bq2 = BQ(o);
1517
+ if ((bq1->coord_disabled != bq2->coord_disabled)
1518
+ || (bq1->max_clause_cnt != bq1->max_clause_cnt)
1519
+ || (bq1->clause_cnt != bq2->clause_cnt)) {
1520
+ return false;
1521
+ }
1522
+
1523
+ for (i = 0; i < bq1->clause_cnt; i++) {
1524
+ if (!bc_eq(bq1->clauses[i], bq2->clauses[i])) {
1525
+ return false;
1526
+ }
1527
+ }
1528
+ return true;
1529
+ }
1530
+
1531
+ Query *bq_new(bool coord_disabled)
1532
+ {
1533
+ Query *self = q_new(BooleanQuery);
1534
+ BQ(self)->coord_disabled = coord_disabled;
1535
+ if (coord_disabled) {
1536
+ self->get_similarity = &bq_get_similarity;
1537
+ }
1538
+ BQ(self)->max_clause_cnt = DEFAULT_MAX_CLAUSE_COUNT;
1539
+ BQ(self)->clause_cnt = 0;
1540
+ BQ(self)->clause_capa = BOOLEAN_CLAUSES_START_CAPA;
1541
+ BQ(self)->clauses = ALLOC_N(BooleanClause *, BOOLEAN_CLAUSES_START_CAPA);
1542
+ BQ(self)->similarity = NULL;
1543
+ BQ(self)->original_boost = 0.0;
1544
+
1545
+ self->type = BOOLEAN_QUERY;
1546
+ self->rewrite = &bq_rewrite;
1547
+ self->extract_terms = &bq_extract_terms;
1548
+ self->to_s = &bq_to_s;
1549
+ self->hash = &bq_hash;
1550
+ self->eq = &bq_eq;
1551
+ self->destroy_i = &bq_destroy;
1552
+ self->create_weight_i = &bw_new;
1553
+ self->get_matchv_i = &bq_get_matchv_i;
1554
+
1555
+ return self;
1556
+ }
1557
+
1558
+ Query *bq_new_max(bool coord_disabled, int max)
1559
+ {
1560
+ Query *q = bq_new(coord_disabled);
1561
+ BQ(q)->max_clause_cnt = max;
1562
+ return q;
1563
+ }
1564
+
1565
+ BooleanClause *bq_add_clause_nr(Query *self, BooleanClause *bc)
1566
+ {
1567
+ if (BQ(self)->clause_cnt >= BQ(self)->max_clause_cnt) {
1568
+ RAISE(STATE_ERROR, "Two many clauses. The max clause limit is set to "
1569
+ "<%d> but your query has <%d> clauses. You can try increasing "
1570
+ ":max_clause_count for the BooleanQuery or using a different "
1571
+ "type of query.", BQ(self)->clause_cnt, BQ(self)->max_clause_cnt);
1572
+ }
1573
+ if (BQ(self)->clause_cnt >= BQ(self)->clause_capa) {
1574
+ BQ(self)->clause_capa *= 2;
1575
+ REALLOC_N(BQ(self)->clauses, BooleanClause *, BQ(self)->clause_capa);
1576
+ }
1577
+ BQ(self)->clauses[BQ(self)->clause_cnt] = bc;
1578
+ BQ(self)->clause_cnt++;
1579
+ return bc;
1580
+ }
1581
+
1582
+ BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
1583
+ {
1584
+ REF(bc);
1585
+ return bq_add_clause_nr(self, bc);
1586
+ }
1587
+
1588
+ BooleanClause *bq_add_query_nr(Query *self, Query *sub_query, enum BC_TYPE occur)
1589
+ {
1590
+ BooleanClause *bc;
1591
+ if (BQ(self)->clause_cnt >= BQ(self)->max_clause_cnt) {
1592
+ RAISE(STATE_ERROR, "Two many clauses. The max clause limit is set to "
1593
+ "<%d> but your query has <%d> clauses. You can try increasing "
1594
+ ":max_clause_count for the BooleanQuery or using a different "
1595
+ "type of query.", BQ(self)->clause_cnt, BQ(self)->max_clause_cnt);
1596
+ }
1597
+ bc = bc_new(sub_query, occur);
1598
+ bq_add_clause(self, bc);
1599
+ bc_deref(bc); /* bc was referenced unnecessarily */
1600
+ return bc;
1601
+ }
1602
+
1603
+ BooleanClause *bq_add_query(Query *self, Query *sub_query, enum BC_TYPE occur)
1604
+ {
1605
+ REF(sub_query);
1606
+ return bq_add_query_nr(self, sub_query, occur);
1607
+ }
1608
+