jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
data/ext/q_span.c ADDED
@@ -0,0 +1,2390 @@
1
+ #include <string.h>
2
+ #include <limits.h>
3
+ #include "search.h"
4
+ #include "hashset.h"
5
+ #include "symbol.h"
6
+ #include "internal.h"
7
+
8
+ #define CLAUSE_INIT_CAPA 4
9
+
10
+ /*****************************************************************************
11
+ *
12
+ * SpanQuery
13
+ *
14
+ *****************************************************************************/
15
+
16
+ /***************************************************************************
17
+ * SpanQuery
18
+ ***************************************************************************/
19
+
20
+ #define SpQ(query) ((SpanQuery *)(query))
21
+
22
+ static unsigned long spanq_hash(Query *self)
23
+ {
24
+ return SpQ(self)->field ? sym_hash(SpQ(self)->field) : 0;
25
+ }
26
+
27
+ static int spanq_eq(Query *self, Query *o)
28
+ {
29
+ return SpQ(self)->field == SpQ(o)->field;
30
+ }
31
+
32
+ static void spanq_destroy_i(Query *self)
33
+ {
34
+ q_destroy_i(self);
35
+ }
36
+
37
+ static MatchVector *mv_to_term_mv(MatchVector *term_mv, MatchVector *full_mv,
38
+ HashSet *terms, TermVector *tv)
39
+ {
40
+ HashSetEntry *hse;
41
+ for (hse = terms->first; hse; hse = hse->next) {
42
+ char *term = (char *)hse->elem;
43
+ TVTerm *tv_term = tv_get_tv_term(tv, term);
44
+ if (tv_term) {
45
+ int i, m_idx = 0;
46
+ for (i = 0; i < tv_term->freq; i++) {
47
+ int pos = tv_term->positions[i];
48
+ for (; m_idx < full_mv->size; m_idx++) {
49
+ if (pos <= full_mv->matches[m_idx].end) {
50
+ if (pos >= full_mv->matches[m_idx].start) {
51
+ matchv_add(term_mv, pos, pos);
52
+ }
53
+ break;
54
+ }
55
+ }
56
+ }
57
+ }
58
+ }
59
+
60
+ return term_mv;
61
+ }
62
+
63
+ /***************************************************************************
64
+ * TVTermDocEnum
65
+ * dummy TermDocEnum used by the highlighter to find matches
66
+ ***************************************************************************/
67
+
68
+ #define TV_TDE(tde) ((TVTermDocEnum *)(tde))
69
+
70
+ typedef struct TVTermDocEnum
71
+ {
72
+ TermDocEnum super;
73
+ int doc;
74
+ int index;
75
+ int freq;
76
+ int *positions;
77
+ TermVector *tv;
78
+ } TVTermDocEnum;
79
+
80
+ static void tv_tde_seek(TermDocEnum *tde, int field_num, const char *term)
81
+ {
82
+ TVTermDocEnum *tv_tde = TV_TDE(tde);
83
+ TVTerm *tv_term = tv_get_tv_term(tv_tde->tv, term);
84
+ (void)field_num;
85
+ if (tv_term) {
86
+ tv_tde->doc = -1;
87
+ tv_tde->index = 0;
88
+ tv_tde->freq = tv_term->freq;
89
+ tv_tde->positions = tv_term->positions;
90
+ }
91
+ else {
92
+ tv_tde->doc = INT_MAX;
93
+ }
94
+ }
95
+
96
+ static bool tv_tde_next(TermDocEnum *tde)
97
+ {
98
+ if (TV_TDE(tde)->doc == -1) {
99
+ TV_TDE(tde)->doc = 0;
100
+ return true;
101
+ }
102
+ else {
103
+ TV_TDE(tde)->doc = INT_MAX;
104
+ return false;
105
+ }
106
+ }
107
+
108
+ static bool tv_tde_skip_to(TermDocEnum *tde, int doc_num)
109
+ {
110
+ if (doc_num == 0) {
111
+ TV_TDE(tde)->doc = 0;
112
+ return true;
113
+ }
114
+ else {
115
+ TV_TDE(tde)->doc = INT_MAX;
116
+ return false;
117
+ }
118
+ }
119
+
120
+ static int tv_tde_next_position(TermDocEnum *tde)
121
+ {
122
+ return TV_TDE(tde)->positions[TV_TDE(tde)->index++];
123
+ }
124
+
125
+ static int tv_tde_freq(TermDocEnum *tde)
126
+ {
127
+ return TV_TDE(tde)->freq;
128
+ }
129
+
130
+ static int tv_tde_doc_num(TermDocEnum *tde)
131
+ {
132
+ return TV_TDE(tde)->doc;
133
+ }
134
+
135
+ static TermDocEnum *spanq_ir_term_positions(IndexReader *ir)
136
+ {
137
+ TVTermDocEnum *tv_tde = ALLOC(TVTermDocEnum);
138
+ TermDocEnum *tde = (TermDocEnum *)tv_tde;
139
+ tv_tde->tv = (TermVector *)ir->store;
140
+ tde->seek = &tv_tde_seek;
141
+ tde->doc_num = &tv_tde_doc_num;
142
+ tde->freq = &tv_tde_freq;
143
+ tde->next = &tv_tde_next;
144
+ tde->skip_to = &tv_tde_skip_to;
145
+ tde->next_position = &tv_tde_next_position;
146
+ tde->close = (void (*)(TermDocEnum *tde))&free;
147
+
148
+ return tde;
149
+ }
150
+
151
+ static MatchVector *spanq_get_matchv_i(Query *self, MatchVector *mv,
152
+ TermVector *tv)
153
+ {
154
+ if (SpQ(self)->field == tv->field) {
155
+ SpanEnum *sp_enum;
156
+ IndexReader *ir = ALLOC(IndexReader);
157
+ MatchVector *full_mv = matchv_new();
158
+ HashSet *terms = SpQ(self)->get_terms(self);
159
+ /* FIXME What is going on here? Need to document this! */
160
+ ir->fis = fis_new(STORE_NO, INDEX_NO, TERM_VECTOR_NO);
161
+ fis_add_field(ir->fis,
162
+ fi_new(tv->field, STORE_NO, INDEX_NO, TERM_VECTOR_NO));
163
+ ir->store = (Store *)tv;
164
+ ir->term_positions = &spanq_ir_term_positions;
165
+ sp_enum = SpQ(self)->get_spans(self, ir);
166
+ while (sp_enum->next(sp_enum)) {
167
+ matchv_add(full_mv,
168
+ sp_enum->start(sp_enum),
169
+ sp_enum->end(sp_enum) - 1);
170
+ }
171
+ sp_enum->destroy(sp_enum);
172
+
173
+ fis_deref(ir->fis);
174
+ free(ir);
175
+
176
+ matchv_compact(full_mv);
177
+ mv_to_term_mv(mv, full_mv, terms, tv);
178
+ matchv_destroy(full_mv);
179
+ hs_destroy(terms);
180
+ }
181
+ return mv;
182
+ }
183
+
184
+ /***************************************************************************
185
+ *
186
+ * SpanScorer
187
+ *
188
+ ***************************************************************************/
189
+
190
+ #define SpSc(scorer) ((SpanScorer *)(scorer))
191
+ typedef struct SpanScorer
192
+ {
193
+ Scorer super;
194
+ IndexReader *ir;
195
+ SpanEnum *spans;
196
+ Similarity *sim;
197
+ uchar *norms;
198
+ Weight *weight;
199
+ float value;
200
+ float freq;
201
+ bool first_time : 1;
202
+ bool more : 1;
203
+ } SpanScorer;
204
+
205
+ static float spansc_score(Scorer *self)
206
+ {
207
+ SpanScorer *spansc = SpSc(self);
208
+ float raw = sim_tf(spansc->sim, spansc->freq) * spansc->value;
209
+
210
+ /* normalize */
211
+ return raw * sim_decode_norm(self->similarity, spansc->norms[self->doc]);
212
+ }
213
+
214
+ static bool spansc_next(Scorer *self)
215
+ {
216
+ SpanScorer *spansc = SpSc(self);
217
+ SpanEnum *se = spansc->spans;
218
+ int match_length;
219
+
220
+ if (spansc->first_time) {
221
+ spansc->more = se->next(se);
222
+ spansc->first_time = false;
223
+ }
224
+
225
+ if (!spansc->more) {
226
+ return false;
227
+ }
228
+
229
+ spansc->freq = 0.0;
230
+ self->doc = se->doc(se);
231
+
232
+ do {
233
+ match_length = se->end(se) - se->start(se);
234
+ spansc->freq += sim_sloppy_freq(spansc->sim, match_length);
235
+ spansc->more = se->next(se);
236
+ } while (spansc->more && (self->doc == se->doc(se)));
237
+
238
+ return (spansc->more || (spansc->freq != 0.0));
239
+ }
240
+
241
+ static bool spansc_skip_to(Scorer *self, int target)
242
+ {
243
+ SpanScorer *spansc = SpSc(self);
244
+ SpanEnum *se = spansc->spans;
245
+
246
+ spansc->more = se->skip_to(se, target);
247
+ if (!spansc->more) {
248
+ return false;
249
+ }
250
+
251
+ spansc->freq = 0.0;
252
+ self->doc = se->doc(se);
253
+
254
+ while (spansc->more && (se->doc(se) == target)) {
255
+ spansc->freq += sim_sloppy_freq(spansc->sim, se->end(se) - se->start(se));
256
+ spansc->more = se->next(se);
257
+ if (spansc->first_time) {
258
+ spansc->first_time = false;
259
+ }
260
+ }
261
+
262
+ return (spansc->more || (spansc->freq != 0.0));
263
+ }
264
+
265
+ static Explanation *spansc_explain(Scorer *self, int target)
266
+ {
267
+ Explanation *tf_explanation;
268
+ SpanScorer *spansc = SpSc(self);
269
+ float phrase_freq;
270
+ self->skip_to(self, target);
271
+ phrase_freq = (self->doc == target) ? spansc->freq : (float)0.0;
272
+
273
+ tf_explanation = expl_new(sim_tf(self->similarity, phrase_freq),
274
+ "tf(phrase_freq(%f)", phrase_freq);
275
+
276
+ return tf_explanation;
277
+ }
278
+
279
+ static void spansc_destroy(Scorer *self)
280
+ {
281
+ SpanScorer *spansc = SpSc(self);
282
+ if (spansc->spans) {
283
+ spansc->spans->destroy(spansc->spans);
284
+ }
285
+ scorer_destroy_i(self);
286
+ }
287
+
288
+ static Scorer *spansc_new(Weight *weight, IndexReader *ir)
289
+ {
290
+ Scorer *self = NULL;
291
+ const int field_num = fis_get_field_num(ir->fis, SpQ(weight->query)->field);
292
+ if (field_num >= 0) {
293
+ Query *spanq = weight->query;
294
+ self = scorer_new(SpanScorer, weight->similarity);
295
+
296
+ SpSc(self)->first_time = true;
297
+ SpSc(self)->more = true;
298
+ SpSc(self)->spans = SpQ(spanq)->get_spans(spanq, ir);
299
+ SpSc(self)->sim = weight->similarity;
300
+ SpSc(self)->norms = ir->get_norms(ir, field_num);
301
+ SpSc(self)->weight = weight;
302
+ SpSc(self)->value = weight->value;
303
+ SpSc(self)->freq = 0.0;
304
+
305
+ self->score = &spansc_score;
306
+ self->next = &spansc_next;
307
+ self->skip_to = &spansc_skip_to;
308
+ self->explain = &spansc_explain;
309
+ self->destroy = &spansc_destroy;
310
+ }
311
+ return self;
312
+ }
313
+
314
+ /*****************************************************************************
315
+ * SpanTermEnum
316
+ *****************************************************************************/
317
+
318
+ #define SpTEn(span_enum) ((SpanTermEnum *)(span_enum))
319
+ #define SpTQ(query) ((SpanTermQuery *)(query))
320
+
321
+ typedef struct SpanTermEnum
322
+ {
323
+ SpanEnum super;
324
+ TermDocEnum *positions;
325
+ int position;
326
+ int doc;
327
+ int count;
328
+ int freq;
329
+ } SpanTermEnum;
330
+
331
+
332
+ static bool spante_next(SpanEnum *self)
333
+ {
334
+ SpanTermEnum *ste = SpTEn(self);
335
+ TermDocEnum *tde = ste->positions;
336
+
337
+ if (ste->count == ste->freq) {
338
+ if (! tde->next(tde)) {
339
+ ste->doc = INT_MAX;
340
+ return false;
341
+ }
342
+ ste->doc = tde->doc_num(tde);
343
+ ste->freq = tde->freq(tde);
344
+ ste->count = 0;
345
+ }
346
+ ste->position = tde->next_position(tde);
347
+ ste->count++;
348
+ return true;
349
+ }
350
+
351
+ static bool spante_skip_to(SpanEnum *self, int target)
352
+ {
353
+ SpanTermEnum *ste = SpTEn(self);
354
+ TermDocEnum *tde = ste->positions;
355
+
356
+ /* are we already at the correct position? */
357
+ /* FIXME: perhaps this the the better solution but currently it ->skip_to
358
+ * does a ->next not matter what
359
+ if (ste->doc >= target) {
360
+ return true;
361
+ }
362
+ */
363
+
364
+ if (! tde->skip_to(tde, target)) {
365
+ ste->doc = INT_MAX;
366
+ return false;
367
+ }
368
+
369
+ ste->doc = tde->doc_num(tde);
370
+ ste->freq = tde->freq(tde);
371
+ ste->count = 0;
372
+
373
+ ste->position = tde->next_position(tde);
374
+ ste->count++;
375
+ return true;
376
+ }
377
+
378
+ static int spante_doc(SpanEnum *self)
379
+ {
380
+ return SpTEn(self)->doc;
381
+ }
382
+
383
+ static int spante_start(SpanEnum *self)
384
+ {
385
+ return SpTEn(self)->position;
386
+ }
387
+
388
+ static int spante_end(SpanEnum *self)
389
+ {
390
+ return SpTEn(self)->position + 1;
391
+ }
392
+
393
+ static char *spante_to_s(SpanEnum *self)
394
+ {
395
+ char *query_str = self->query->to_s(self->query, NULL);
396
+ char pos_str[20];
397
+ size_t len = strlen(query_str);
398
+ int pos;
399
+ char *str = ALLOC_N(char, len + 40);
400
+
401
+ if (self->doc(self) < 0) {
402
+ sprintf(pos_str, "START");
403
+ }
404
+ else {
405
+ if (self->doc(self) == INT_MAX) {
406
+ sprintf(pos_str, "END");
407
+ }
408
+ else {
409
+ pos = SpTEn(self)->position;
410
+ sprintf(pos_str, "%d", self->doc(self) - pos);
411
+ }
412
+ }
413
+ sprintf(str, "SpanTermEnum(%s)@%s", query_str, pos_str);
414
+ free(query_str);
415
+ return str;
416
+ }
417
+
418
+ static void spante_destroy(SpanEnum *self)
419
+ {
420
+ TermDocEnum *tde = SpTEn(self)->positions;
421
+ tde->close(tde);
422
+ free(self);
423
+ }
424
+
425
+ static SpanEnum *spante_new(Query *query, IndexReader *ir)
426
+ {
427
+ char *term = SpTQ(query)->term;
428
+ SpanEnum *self = (SpanEnum *)ALLOC(SpanTermEnum);
429
+
430
+ SpTEn(self)->positions = ir_term_positions_for(ir, SpQ(query)->field,
431
+ term);
432
+ SpTEn(self)->position = -1;
433
+ SpTEn(self)->doc = -1;
434
+ SpTEn(self)->count = 0;
435
+ SpTEn(self)->freq = 0;
436
+
437
+ self->query = query;
438
+ self->next = &spante_next;
439
+ self->skip_to = &spante_skip_to;
440
+ self->doc = &spante_doc;
441
+ self->start = &spante_start;
442
+ self->end = &spante_end;
443
+ self->destroy = &spante_destroy;
444
+ self->to_s = &spante_to_s;
445
+
446
+ return self;
447
+ }
448
+
449
+ /*****************************************************************************
450
+ * SpanMultiTermEnum
451
+ *****************************************************************************/
452
+
453
+ /* * TermPosEnumWrapper * */
454
+ #define TPE_READ_SIZE 16
455
+
456
+ typedef struct TermPosEnumWrapper
457
+ {
458
+ const char *term;
459
+ TermDocEnum *tpe;
460
+ int doc;
461
+ int pos;
462
+ } TermPosEnumWrapper;
463
+
464
+ static bool tpew_less_than(const TermPosEnumWrapper *tpew1,
465
+ const TermPosEnumWrapper *tpew2)
466
+ {
467
+ return (tpew1->doc < tpew2->doc)
468
+ || (tpew1->doc == tpew2->doc && tpew1->pos < tpew2->pos);
469
+ }
470
+
471
+ static bool tpew_next(TermPosEnumWrapper *self)
472
+ {
473
+ TermDocEnum *tpe = self->tpe;
474
+ if (0 > (self->pos = tpe->next_position(tpe))) {
475
+ if (!tpe->next(tpe)) return false;
476
+ self->doc = tpe->doc_num(tpe);
477
+ self->pos = tpe->next_position(tpe);
478
+ }
479
+ return true;
480
+ }
481
+
482
+ static bool tpew_skip_to(TermPosEnumWrapper *self, int doc_num)
483
+ {
484
+ TermDocEnum *tpe = self->tpe;
485
+
486
+ if (tpe->skip_to(tpe, doc_num)) {
487
+ self->doc = tpe->doc_num(tpe);
488
+ self->pos = tpe->next_position(tpe);
489
+ return true;
490
+ }
491
+ else {
492
+ return false;
493
+ }
494
+ }
495
+
496
+ static void tpew_destroy(TermPosEnumWrapper *self)
497
+ {
498
+ self->tpe->close(self->tpe);
499
+ free(self);
500
+ }
501
+
502
+ static TermPosEnumWrapper *tpew_new(const char *term, TermDocEnum *tpe)
503
+ {
504
+ TermPosEnumWrapper *self = ALLOC_AND_ZERO(TermPosEnumWrapper);
505
+ self->term = term;
506
+ self->tpe = tpe;
507
+ self->doc = -1;
508
+ self->pos = -1;
509
+ return self;
510
+ }
511
+ #define SpMTEn(span_enum) ((SpanMultiTermEnum *)(span_enum))
512
+ #define SpMTQ(query) ((SpanMultiTermQuery *)(query))
513
+
514
+ typedef struct SpanMultiTermEnum
515
+ {
516
+ SpanEnum super;
517
+ PriorityQueue *tpew_pq;
518
+ TermPosEnumWrapper **tpews;
519
+ int tpew_cnt;
520
+ int pos;
521
+ int doc;
522
+ } SpanMultiTermEnum;
523
+
524
+ static bool spanmte_next(SpanEnum *self)
525
+ {
526
+ int curr_doc, curr_pos;
527
+ TermPosEnumWrapper *tpew;
528
+ SpanMultiTermEnum *mte = SpMTEn(self);
529
+ PriorityQueue *tpew_pq = mte->tpew_pq;
530
+ if (tpew_pq == NULL) {
531
+ TermPosEnumWrapper **tpews = mte->tpews;
532
+ int i;
533
+ tpew_pq = pq_new(mte->tpew_cnt, (lt_ft)tpew_less_than, (free_ft)NULL);
534
+ for (i = mte->tpew_cnt - 1; i >= 0; i--) {
535
+ if (tpew_next(tpews[i])) {
536
+ pq_push(tpew_pq, tpews[i]);
537
+ }
538
+ }
539
+ mte->tpew_pq = tpew_pq;
540
+ }
541
+
542
+ tpew = (TermPosEnumWrapper *)pq_top(tpew_pq);
543
+ if (tpew == NULL) {
544
+ return false;
545
+ }
546
+
547
+ mte->doc = curr_doc = tpew->doc;
548
+ mte->pos = curr_pos = tpew->pos;
549
+
550
+ do {
551
+ if (tpew_next(tpew)) {
552
+ pq_down(tpew_pq);
553
+ }
554
+ else {
555
+ pq_pop(tpew_pq);
556
+ }
557
+ } while (((tpew = (TermPosEnumWrapper *)pq_top(tpew_pq)) != NULL)
558
+ && tpew->doc == curr_doc && tpew->pos == curr_pos);
559
+ return true;
560
+ }
561
+
562
+ static bool spanmte_skip_to(SpanEnum *self, int target)
563
+ {
564
+ SpanMultiTermEnum *mte = SpMTEn(self);
565
+ PriorityQueue *tpew_pq = mte->tpew_pq;
566
+ TermPosEnumWrapper *tpew;
567
+ if (tpew_pq == NULL) {
568
+ TermPosEnumWrapper **tpews = mte->tpews;
569
+ int i;
570
+ tpew_pq = pq_new(mte->tpew_cnt, (lt_ft)tpew_less_than, (free_ft)NULL);
571
+ for (i = mte->tpew_cnt - 1; i >= 0; i--) {
572
+ tpew_skip_to(tpews[i], target);
573
+ pq_push(tpew_pq, tpews[i]);
574
+ }
575
+ mte->tpew_pq = tpew_pq;
576
+ }
577
+ if (tpew_pq->size == 0) {
578
+ mte->doc = -1;
579
+ return false;
580
+ }
581
+ while ((tpew = (TermPosEnumWrapper *)pq_top(tpew_pq)) != NULL
582
+ && (target > tpew->doc)) {
583
+ if (tpew_skip_to(tpew, target)) {
584
+ pq_down(tpew_pq);
585
+ }
586
+ else {
587
+ pq_pop(tpew_pq);
588
+ }
589
+ }
590
+ return spanmte_next(self);
591
+ }
592
+
593
+ static int spanmte_doc(SpanEnum *self)
594
+ {
595
+ return SpMTEn(self)->doc;
596
+ }
597
+
598
+ static int spanmte_start(SpanEnum *self)
599
+ {
600
+ return SpMTEn(self)->pos;
601
+ }
602
+
603
+ static int spanmte_end(SpanEnum *self)
604
+ {
605
+ return SpMTEn(self)->pos + 1;
606
+ }
607
+
608
+ static void spanmte_destroy(SpanEnum *self)
609
+ {
610
+ SpanMultiTermEnum *mte = SpMTEn(self);
611
+ int i;
612
+ if (mte->tpew_pq) pq_destroy(mte->tpew_pq);
613
+ for (i = 0; i < mte->tpew_cnt; i++) {
614
+ tpew_destroy(mte->tpews[i]);
615
+ }
616
+ free(mte->tpews);
617
+ free(self);
618
+ }
619
+
620
+ static SpanEnum *spanmte_new(Query *query, IndexReader *ir)
621
+ {
622
+ SpanEnum *self = (SpanEnum *)ALLOC(SpanMultiTermEnum);
623
+ SpanMultiTermEnum *smte = SpMTEn(self);
624
+ SpanMultiTermQuery *smtq = SpMTQ(query);
625
+ int i;
626
+
627
+
628
+ smte->tpews = ALLOC_N(TermPosEnumWrapper *, smtq->term_cnt);
629
+ for (i = 0; i < smtq->term_cnt; i++) {
630
+ char *term = smtq->terms[i];
631
+ smte->tpews[i] = tpew_new(term,
632
+ ir_term_positions_for(ir, SpQ(query)->field, term));
633
+ }
634
+ smte->tpew_cnt = smtq->term_cnt;
635
+ smte->tpew_pq = NULL;
636
+ smte->pos = -1;
637
+ smte->doc = -1;
638
+
639
+ self->query = query;
640
+ self->next = &spanmte_next;
641
+ self->skip_to = &spanmte_skip_to;
642
+ self->doc = &spanmte_doc;
643
+ self->start = &spanmte_start;
644
+ self->end = &spanmte_end;
645
+ self->destroy = &spanmte_destroy;
646
+ self->to_s = &spante_to_s;
647
+
648
+ return self;
649
+ }
650
+
651
+
652
+ /*****************************************************************************
653
+ * SpanFirstEnum
654
+ *****************************************************************************/
655
+
656
+ #define SpFEn(span_enum) ((SpanFirstEnum *)(span_enum))
657
+ #define SpFQ(query) ((SpanFirstQuery *)(query))
658
+
659
+ typedef struct SpanFirstEnum
660
+ {
661
+ SpanEnum super;
662
+ SpanEnum *sub_enum;
663
+ } SpanFirstEnum;
664
+
665
+
666
+ static bool spanfe_next(SpanEnum *self)
667
+ {
668
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
669
+ int end = SpFQ(self->query)->end;
670
+ while (sub_enum->next(sub_enum)) { /* scan to next match */
671
+ if (sub_enum->end(sub_enum) <= end) {
672
+ return true;
673
+ }
674
+ }
675
+ return false;
676
+ }
677
+
678
+ static bool spanfe_skip_to(SpanEnum *self, int target)
679
+ {
680
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
681
+ int end = SpFQ(self->query)->end;
682
+
683
+ if (! sub_enum->skip_to(sub_enum, target)) {
684
+ return false;
685
+ }
686
+
687
+ if (sub_enum->end(sub_enum) <= end) { /* there is a match */
688
+ return true;
689
+ }
690
+
691
+ return spanfe_next(self); /* scan to next match */
692
+ }
693
+
694
+ static int spanfe_doc(SpanEnum *self)
695
+ {
696
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
697
+ return sub_enum->doc(sub_enum);
698
+ }
699
+
700
+ static int spanfe_start(SpanEnum *self)
701
+ {
702
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
703
+ return sub_enum->start(sub_enum);
704
+ }
705
+
706
+ static int spanfe_end(SpanEnum *self)
707
+ {
708
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
709
+ return sub_enum->end(sub_enum);
710
+ }
711
+
712
+ static char *spanfe_to_s(SpanEnum *self)
713
+ {
714
+ char *query_str = self->query->to_s(self->query, NULL);
715
+ char *res = strfmt("SpanFirstEnum(%s)", query_str);
716
+ free(query_str);
717
+ return res;
718
+ }
719
+
720
+ static void spanfe_destroy(SpanEnum *self)
721
+ {
722
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
723
+ sub_enum->destroy(sub_enum);
724
+ free(self);
725
+ }
726
+
727
+ static SpanEnum *spanfe_new(Query *query, IndexReader *ir)
728
+ {
729
+ SpanEnum *self = (SpanEnum *)ALLOC(SpanFirstEnum);
730
+ SpanFirstQuery *sfq = SpFQ(query);
731
+
732
+ SpFEn(self)->sub_enum = SpQ(sfq->match)->get_spans(sfq->match, ir);
733
+
734
+ self->query = query;
735
+ self->next = &spanfe_next;
736
+ self->skip_to = &spanfe_skip_to;
737
+ self->doc = &spanfe_doc;
738
+ self->start = &spanfe_start;
739
+ self->end = &spanfe_end;
740
+ self->destroy = &spanfe_destroy;
741
+ self->to_s = &spanfe_to_s;
742
+
743
+ return self;
744
+ }
745
+
746
+
747
+ /*****************************************************************************
748
+ * SpanOrEnum
749
+ *****************************************************************************/
750
+
751
+ #define SpOEn(span_enum) ((SpanOrEnum *)(span_enum))
752
+ #define SpOQ(query) ((SpanOrQuery *)(query))
753
+
754
+ typedef struct SpanOrEnum
755
+ {
756
+ SpanEnum super;
757
+ PriorityQueue *queue;
758
+ SpanEnum **span_enums;
759
+ int s_cnt;
760
+ bool first_time : 1;
761
+ } SpanOrEnum;
762
+
763
+
764
+ static bool span_less_than(SpanEnum *s1, SpanEnum *s2)
765
+ {
766
+ int doc_diff, start_diff;
767
+ doc_diff = s1->doc(s1) - s2->doc(s2);
768
+ if (doc_diff == 0) {
769
+ start_diff = s1->start(s1) - s2->start(s2);
770
+ if (start_diff == 0) {
771
+ return s1->end(s1) < s2->end(s2);
772
+ }
773
+ else {
774
+ return start_diff < 0;
775
+ }
776
+ }
777
+ else {
778
+ return doc_diff < 0;
779
+ }
780
+ }
781
+
782
+ static bool spanoe_next(SpanEnum *self)
783
+ {
784
+ SpanOrEnum *soe = SpOEn(self);
785
+ SpanEnum *se;
786
+ int i;
787
+
788
+ if (soe->first_time) { /* first time -- initialize */
789
+ for (i = 0; i < soe->s_cnt; i++) {
790
+ se = soe->span_enums[i];
791
+ if (se->next(se)) { /* move to first entry */
792
+ pq_push(soe->queue, se);
793
+ }
794
+ }
795
+ soe->first_time = false;
796
+ return soe->queue->size != 0;
797
+ }
798
+
799
+ if (soe->queue->size == 0) {
800
+ return false; /* all done */
801
+ }
802
+
803
+ se = (SpanEnum *)pq_top(soe->queue);
804
+ if (se->next(se)) { /* move to next */
805
+ pq_down(soe->queue);
806
+ return true;
807
+ }
808
+
809
+ pq_pop(soe->queue); /* exhausted a clause */
810
+
811
+ return soe->queue->size != 0;
812
+ }
813
+
814
+ static bool spanoe_skip_to(SpanEnum *self, int target)
815
+ {
816
+ SpanOrEnum *soe = SpOEn(self);
817
+ SpanEnum *se;
818
+ int i;
819
+
820
+ if (soe->first_time) { /* first time -- initialize */
821
+ for (i = 0; i < soe->s_cnt; i++) {
822
+ se = soe->span_enums[i];
823
+ if (se->skip_to(se, target)) {/* move to target */
824
+ pq_push(soe->queue, se);
825
+ }
826
+ }
827
+ soe->first_time = false;
828
+ }
829
+ else {
830
+ while ((soe->queue->size != 0) &&
831
+ ((se = (SpanEnum *)pq_top(soe->queue))->doc(se) < target)) {
832
+ if (se->skip_to(se, target)) {
833
+ pq_down(soe->queue);
834
+ }
835
+ else {
836
+ pq_pop(soe->queue);
837
+ }
838
+ }
839
+ }
840
+
841
+ return soe->queue->size != 0;
842
+ }
843
+
844
+ #define SpOEn_Top_SE(self) (SpanEnum *)pq_top(SpOEn(self)->queue)
845
+
846
+ static int spanoe_doc(SpanEnum *self)
847
+ {
848
+ SpanEnum *se = SpOEn_Top_SE(self);
849
+ return se->doc(se);
850
+ }
851
+
852
+ static int spanoe_start(SpanEnum *self)
853
+ {
854
+ SpanEnum *se = SpOEn_Top_SE(self);
855
+ return se->start(se);
856
+ }
857
+
858
+ static int spanoe_end(SpanEnum *self)
859
+ {
860
+ SpanEnum *se = SpOEn_Top_SE(self);
861
+ return se->end(se);
862
+ }
863
+
864
+ static char *spanoe_to_s(SpanEnum *self)
865
+ {
866
+ SpanOrEnum *soe = SpOEn(self);
867
+ char *query_str = self->query->to_s(self->query, NULL);
868
+ char doc_str[62];
869
+ size_t len = strlen(query_str);
870
+ char *str = ALLOC_N(char, len + 80);
871
+
872
+ if (soe->first_time) {
873
+ sprintf(doc_str, "START");
874
+ }
875
+ else {
876
+ if (soe->queue->size == 0) {
877
+ sprintf(doc_str, "END");
878
+ }
879
+ else {
880
+ sprintf(doc_str, "%d:%d-%d", self->doc(self),
881
+ self->start(self), self->end(self));
882
+ }
883
+ }
884
+ sprintf(str, "SpanOrEnum(%s)@%s", query_str, doc_str);
885
+ free(query_str);
886
+ return str;
887
+ }
888
+
889
+ static void spanoe_destroy(SpanEnum *self)
890
+ {
891
+ SpanEnum *se;
892
+ SpanOrEnum *soe = SpOEn(self);
893
+ int i;
894
+ pq_destroy(soe->queue);
895
+ for (i = 0; i < soe->s_cnt; i++) {
896
+ se = soe->span_enums[i];
897
+ se->destroy(se);
898
+ }
899
+ free(soe->span_enums);
900
+ free(self);
901
+ }
902
+
903
+ static SpanEnum *spanoe_new(Query *query, IndexReader *ir)
904
+ {
905
+ Query *clause;
906
+ SpanEnum *self = (SpanEnum *)ALLOC(SpanOrEnum);
907
+ SpanOrQuery *soq = SpOQ(query);
908
+ int i;
909
+
910
+ SpOEn(self)->first_time = true;
911
+ SpOEn(self)->s_cnt = soq->c_cnt;
912
+ SpOEn(self)->span_enums = ALLOC_N(SpanEnum *, SpOEn(self)->s_cnt);
913
+
914
+ for (i = 0; i < SpOEn(self)->s_cnt; i++) {
915
+ clause = soq->clauses[i];
916
+ SpOEn(self)->span_enums[i] = SpQ(clause)->get_spans(clause, ir);
917
+ }
918
+
919
+ SpOEn(self)->queue = pq_new(SpOEn(self)->s_cnt, (lt_ft)&span_less_than,
920
+ (free_ft)NULL);
921
+
922
+ self->query = query;
923
+ self->next = &spanoe_next;
924
+ self->skip_to = &spanoe_skip_to;
925
+ self->doc = &spanoe_doc;
926
+ self->start = &spanoe_start;
927
+ self->end = &spanoe_end;
928
+ self->destroy = &spanoe_destroy;
929
+ self->to_s = &spanoe_to_s;
930
+
931
+ return self;
932
+ }
933
+
934
+ /*****************************************************************************
935
+ * SpanNearEnum
936
+ *****************************************************************************/
937
+
938
+ #define SpNEn(span_enum) ((SpanNearEnum *)(span_enum))
939
+ #define SpNQ(query) ((SpanNearQuery *)(query))
940
+
941
+ typedef struct SpanNearEnum
942
+ {
943
+ SpanEnum super;
944
+ SpanEnum **span_enums;
945
+ int s_cnt;
946
+ int slop;
947
+ int current;
948
+ int doc;
949
+ int start;
950
+ int end;
951
+ bool first_time : 1;
952
+ bool in_order : 1;
953
+ } SpanNearEnum;
954
+
955
+
956
+ #define SpNEn_NEXT() do {\
957
+ sne->current = (sne->current+1) % sne->s_cnt;\
958
+ se = sne->span_enums[sne->current];\
959
+ } while (0);
960
+
961
+ static bool sne_init(SpanNearEnum *sne)
962
+ {
963
+ SpanEnum *se = sne->span_enums[sne->current];
964
+ int prev_doc = se->doc(se);
965
+ int i;
966
+
967
+ for (i = 1; i < sne->s_cnt; i++) {
968
+ SpNEn_NEXT();
969
+ if (!se->skip_to(se, prev_doc)) {
970
+ return false;
971
+ }
972
+ prev_doc = se->doc(se);
973
+ }
974
+ return true;
975
+ }
976
+
977
+ static bool sne_goto_next_doc(SpanNearEnum *sne)
978
+ {
979
+ SpanEnum *se = sne->span_enums[sne->current];
980
+ int prev_doc = se->doc(se);
981
+
982
+ SpNEn_NEXT();
983
+
984
+ while (se->doc(se) < prev_doc) {
985
+ if (! se->skip_to(se, prev_doc)) {
986
+ return false;
987
+ }
988
+ prev_doc = se->doc(se);
989
+ SpNEn_NEXT();
990
+ }
991
+ return true;
992
+ }
993
+
994
+ static bool sne_next_unordered_match(SpanEnum *self)
995
+ {
996
+ SpanNearEnum *sne = SpNEn(self);
997
+ SpanEnum *se, *min_se = NULL;
998
+ int i;
999
+ int max_end, end, min_start, start, doc;
1000
+ int lengths_sum;
1001
+
1002
+ while (true) {
1003
+ max_end = 0;
1004
+ min_start = INT_MAX;
1005
+ lengths_sum = 0;
1006
+
1007
+ for (i = 0; i < sne->s_cnt; i++) {
1008
+ se = sne->span_enums[i];
1009
+ if ((end=se->end(se)) > max_end) {
1010
+ max_end = end;
1011
+ }
1012
+ if ((start=se->start(se)) < min_start) {
1013
+ min_start = start;
1014
+ min_se = se;
1015
+ sne->current = i; /* current should point to the minimum span */
1016
+ }
1017
+ lengths_sum += end - start;
1018
+ }
1019
+
1020
+ if ((max_end - min_start - lengths_sum) <= sne->slop) {
1021
+ /* we have a match */
1022
+ sne->start = min_start;
1023
+ sne->end = max_end;
1024
+ sne->doc = min_se->doc(min_se);
1025
+ return true;
1026
+ }
1027
+
1028
+ /* increment the minimum span_enum and try again */
1029
+ doc = min_se->doc(min_se);
1030
+ if (!min_se->next(min_se)) {
1031
+ return false;
1032
+ }
1033
+ if (doc < min_se->doc(min_se)) {
1034
+ if (!sne_goto_next_doc(sne)) return false;
1035
+ }
1036
+ }
1037
+ }
1038
+
1039
+ static bool sne_next_ordered_match(SpanEnum *self)
1040
+ {
1041
+ SpanNearEnum *sne = SpNEn(self);
1042
+ SpanEnum *se;
1043
+ int i;
1044
+ int prev_doc, prev_start, prev_end;
1045
+ int doc=0, start=0, end=0;
1046
+ int lengths_sum;
1047
+
1048
+ while (true) {
1049
+ se = sne->span_enums[0];
1050
+
1051
+ prev_doc = se->doc(se);
1052
+ sne->start = prev_start = se->start(se);
1053
+ prev_end = se->end(se);
1054
+
1055
+ i = 1;
1056
+ lengths_sum = prev_end - prev_start;
1057
+
1058
+ while (i < sne->s_cnt) {
1059
+ se = sne->span_enums[i];
1060
+ doc = se->doc(se);
1061
+ start = se->start(se);
1062
+ end = se->end(se);
1063
+ while ((doc == prev_doc) && ((start < prev_start) ||
1064
+ ((start == prev_start) && (end < prev_end)))) {
1065
+ if (!se->next(se)) {
1066
+ return false;
1067
+ }
1068
+ doc = se->doc(se);
1069
+ start = se->start(se);
1070
+ end = se->end(se);
1071
+ }
1072
+ if (doc != prev_doc) {
1073
+ sne->current = i;
1074
+ if (!sne_goto_next_doc(sne)) {
1075
+ return false;
1076
+ }
1077
+ break;
1078
+ }
1079
+ i++;
1080
+ lengths_sum += end - start;
1081
+ prev_doc = doc;
1082
+ prev_start = start;
1083
+ prev_end = end;
1084
+ }
1085
+ if (i == sne->s_cnt) {
1086
+ if ((end - sne->start - lengths_sum) <= sne->slop) {
1087
+ /* we have a match */
1088
+ sne->end = end;
1089
+ sne->doc = doc;
1090
+
1091
+ /* the minimum span is always the first span so it needs to be
1092
+ * incremented next time around */
1093
+ sne->current = 0;
1094
+ return true;
1095
+
1096
+ }
1097
+ else {
1098
+ se = sne->span_enums[0];
1099
+ if (!se->next(se)) {
1100
+ return false;
1101
+ }
1102
+ if (se->doc(se) != prev_doc) {
1103
+ sne->current = 0;
1104
+ if (!sne_goto_next_doc(sne)) {
1105
+ return false;
1106
+ }
1107
+ }
1108
+ }
1109
+ }
1110
+ }
1111
+ }
1112
+
1113
+ static bool sne_next_match(SpanEnum *self)
1114
+ {
1115
+ SpanNearEnum *sne = SpNEn(self);
1116
+ SpanEnum *se_curr, *se_next;
1117
+
1118
+ if (!sne->first_time) {
1119
+ if (!sne_init(sne)) {
1120
+ return false;
1121
+ }
1122
+ sne->first_time = false;
1123
+ }
1124
+ se_curr = sne->span_enums[sne->current];
1125
+ se_next = sne->span_enums[(sne->current+1)%sne->s_cnt];
1126
+ if (se_curr->doc(se_curr) > se_next->doc(se_next)) {
1127
+ if (!sne_goto_next_doc(sne)) {
1128
+ return false;
1129
+ }
1130
+ }
1131
+
1132
+ if (sne->in_order) {
1133
+ return sne_next_ordered_match(self);
1134
+ }
1135
+ else {
1136
+ return sne_next_unordered_match(self);
1137
+ }
1138
+ }
1139
+
1140
+ static bool spanne_next(SpanEnum *self)
1141
+ {
1142
+ SpanNearEnum *sne = SpNEn(self);
1143
+ SpanEnum *se;
1144
+
1145
+ se = sne->span_enums[sne->current];
1146
+ if (!se->next(se)) return false;
1147
+
1148
+ return sne_next_match(self);
1149
+ }
1150
+
1151
+ static bool spanne_skip_to(SpanEnum *self, int target)
1152
+ {
1153
+ SpanEnum *se = SpNEn(self)->span_enums[SpNEn(self)->current];
1154
+ if (!se->skip_to(se, target)) {
1155
+ return false;
1156
+ }
1157
+
1158
+ return sne_next_match(self);
1159
+ }
1160
+
1161
+ static int spanne_doc(SpanEnum *self)
1162
+ {
1163
+ return SpNEn(self)->doc;
1164
+ }
1165
+
1166
+ static int spanne_start(SpanEnum *self)
1167
+ {
1168
+ return SpNEn(self)->start;
1169
+ }
1170
+
1171
+ static int spanne_end(SpanEnum *self)
1172
+ {
1173
+ return SpNEn(self)->end;
1174
+ }
1175
+
1176
+ static char *spanne_to_s(SpanEnum *self)
1177
+ {
1178
+ SpanNearEnum *sne = SpNEn(self);
1179
+ char *query_str = self->query->to_s(self->query, NULL);
1180
+ char doc_str[62];
1181
+ size_t len = strlen(query_str);
1182
+ char *str = ALLOC_N(char, len + 80);
1183
+
1184
+ if (sne->first_time) {
1185
+ sprintf(doc_str, "START");
1186
+ }
1187
+ else {
1188
+ sprintf(doc_str, "%d:%d-%d", self->doc(self),
1189
+ self->start(self), self->end(self));
1190
+ }
1191
+ sprintf(str, "SpanNearEnum(%s)@%s", query_str, doc_str);
1192
+ free(query_str);
1193
+ return str;
1194
+ }
1195
+
1196
+ static void spanne_destroy(SpanEnum *self)
1197
+ {
1198
+ SpanEnum *se;
1199
+ SpanNearEnum *sne = SpNEn(self);
1200
+ int i;
1201
+ for (i = 0; i < sne->s_cnt; i++) {
1202
+ se = sne->span_enums[i];
1203
+ se->destroy(se);
1204
+ }
1205
+ free(sne->span_enums);
1206
+ free(self);
1207
+ }
1208
+
1209
+ static SpanEnum *spanne_new(Query *query, IndexReader *ir)
1210
+ {
1211
+ int i;
1212
+ Query *clause;
1213
+ SpanEnum *self = (SpanEnum *)ALLOC(SpanNearEnum);
1214
+ SpanNearQuery *snq = SpNQ(query);
1215
+
1216
+ SpNEn(self)->first_time = true;
1217
+ SpNEn(self)->in_order = snq->in_order;
1218
+ SpNEn(self)->slop = snq->slop;
1219
+ SpNEn(self)->s_cnt = snq->c_cnt;
1220
+ SpNEn(self)->span_enums = ALLOC_N(SpanEnum *, SpNEn(self)->s_cnt);
1221
+
1222
+ for (i = 0; i < SpNEn(self)->s_cnt; i++) {
1223
+ clause = snq->clauses[i];
1224
+ SpNEn(self)->span_enums[i] = SpQ(clause)->get_spans(clause, ir);
1225
+ }
1226
+ SpNEn(self)->current = 0;
1227
+
1228
+ SpNEn(self)->doc = -1;
1229
+ SpNEn(self)->start = -1;
1230
+ SpNEn(self)->end = -1;
1231
+
1232
+ self->query = query;
1233
+ self->next = &spanne_next;
1234
+ self->skip_to = &spanne_skip_to;
1235
+ self->doc = &spanne_doc;
1236
+ self->start = &spanne_start;
1237
+ self->end = &spanne_end;
1238
+ self->destroy = &spanne_destroy;
1239
+ self->to_s = &spanne_to_s;
1240
+
1241
+ return self;
1242
+ }
1243
+
1244
+ /*****************************************************************************
1245
+ *
1246
+ * SpanNotEnum
1247
+ *
1248
+ *****************************************************************************/
1249
+
1250
+ #define SpXEn(span_enum) ((SpanNotEnum *)(span_enum))
1251
+ #define SpXQ(query) ((SpanNotQuery *)(query))
1252
+
1253
+ typedef struct SpanNotEnum
1254
+ {
1255
+ SpanEnum super;
1256
+ SpanEnum *inc;
1257
+ SpanEnum *exc;
1258
+ bool more_inc : 1;
1259
+ bool more_exc : 1;
1260
+ } SpanNotEnum;
1261
+
1262
+
1263
+ static bool spanxe_next(SpanEnum *self)
1264
+ {
1265
+ SpanNotEnum *sxe = SpXEn(self);
1266
+ SpanEnum *inc = sxe->inc, *exc = sxe->exc;
1267
+ if (sxe->more_inc) { /* move to next incl */
1268
+ sxe->more_inc = inc->next(inc);
1269
+ }
1270
+
1271
+ while (sxe->more_inc && sxe->more_exc) {
1272
+ if (inc->doc(inc) > exc->doc(exc)) { /* skip excl */
1273
+ sxe->more_exc = exc->skip_to(exc, inc->doc(inc));
1274
+ }
1275
+
1276
+ while (sxe->more_exc /* while excl is before */
1277
+ && (inc->doc(inc) == exc->doc(exc))
1278
+ && (exc->end(exc) <= inc->start(inc))) {
1279
+ sxe->more_exc = exc->next(exc); /* increment excl */
1280
+ }
1281
+
1282
+ if (! sxe->more_exc || /* if no intersection */
1283
+ (inc->doc(inc) != exc->doc(exc)) ||
1284
+ inc->end(inc) <= exc->start(exc)) {
1285
+ break; /* we found a match */
1286
+ }
1287
+
1288
+ sxe->more_inc = inc->next(inc); /* intersected: keep scanning */
1289
+ }
1290
+ return sxe->more_inc;
1291
+ }
1292
+
1293
+ static bool spanxe_skip_to(SpanEnum *self, int target)
1294
+ {
1295
+ SpanNotEnum *sxe = SpXEn(self);
1296
+ SpanEnum *inc = sxe->inc, *exc = sxe->exc;
1297
+ int doc;
1298
+
1299
+ if (sxe->more_inc) { /* move to next incl */
1300
+ if (!(sxe->more_inc=sxe->inc->skip_to(sxe->inc, target))) return false;
1301
+ }
1302
+
1303
+ if (sxe->more_inc && ((doc=inc->doc(inc)) > exc->doc(exc))) {
1304
+ sxe->more_exc = exc->skip_to(exc, doc);
1305
+ }
1306
+
1307
+ while (sxe->more_exc /* while excl is before */
1308
+ && inc->doc(inc) == exc->doc(exc)
1309
+ && exc->end(exc) <= inc->start(inc)) {
1310
+ sxe->more_exc = exc->next(exc); /* increment excl */
1311
+ }
1312
+
1313
+ if (!sxe->more_exc || /* if no intersection */
1314
+ inc->doc(inc) != exc->doc(exc) ||
1315
+ inc->end(inc) <= exc->start(exc)) {
1316
+ return true; /* we found a match */
1317
+ }
1318
+
1319
+ return spanxe_next(self); /* scan to next match */
1320
+ }
1321
+
1322
+ static int spanxe_doc(SpanEnum *self)
1323
+ {
1324
+ SpanEnum *inc = SpXEn(self)->inc;
1325
+ return inc->doc(inc);
1326
+ }
1327
+
1328
+ static int spanxe_start(SpanEnum *self)
1329
+ {
1330
+ SpanEnum *inc = SpXEn(self)->inc;
1331
+ return inc->start(inc);
1332
+ }
1333
+
1334
+ static int spanxe_end(SpanEnum *self)
1335
+ {
1336
+ SpanEnum *inc = SpXEn(self)->inc;
1337
+ return inc->end(inc);
1338
+ }
1339
+
1340
+ static char *spanxe_to_s(SpanEnum *self)
1341
+ {
1342
+ char *query_str = self->query->to_s(self->query, NULL);
1343
+ char *res = strfmt("SpanNotEnum(%s)", query_str);
1344
+ free(query_str);
1345
+ return res;
1346
+ }
1347
+
1348
+ static void spanxe_destroy(SpanEnum *self)
1349
+ {
1350
+ SpanNotEnum *sxe = SpXEn(self);
1351
+ sxe->inc->destroy(sxe->inc);
1352
+ sxe->exc->destroy(sxe->exc);
1353
+ free(self);
1354
+ }
1355
+
1356
+ static SpanEnum *spanxe_new(Query *query, IndexReader *ir)
1357
+ {
1358
+ SpanEnum *self = (SpanEnum *)ALLOC(SpanNotEnum);
1359
+ SpanNotEnum *sxe = SpXEn(self);
1360
+ SpanNotQuery *sxq = SpXQ(query);
1361
+
1362
+ sxe->inc = SpQ(sxq->inc)->get_spans(sxq->inc, ir);
1363
+ sxe->exc = SpQ(sxq->exc)->get_spans(sxq->exc, ir);
1364
+ sxe->more_inc = true;
1365
+ sxe->more_exc = sxe->exc->next(sxe->exc);
1366
+
1367
+ self->query = query;
1368
+ self->next = &spanxe_next;
1369
+ self->skip_to = &spanxe_skip_to;
1370
+ self->doc = &spanxe_doc;
1371
+ self->start = &spanxe_start;
1372
+ self->end = &spanxe_end;
1373
+ self->destroy = &spanxe_destroy;
1374
+ self->to_s = &spanxe_to_s;
1375
+
1376
+ return self;
1377
+ }
1378
+
1379
+ /*****************************************************************************
1380
+ *
1381
+ * SpanWeight
1382
+ *
1383
+ *****************************************************************************/
1384
+
1385
+ #define SpW(weight) ((SpanWeight *)(weight))
1386
+ typedef struct SpanWeight
1387
+ {
1388
+ Weight super;
1389
+ HashSet *terms;
1390
+ } SpanWeight;
1391
+
1392
+ static Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
1393
+ {
1394
+ Explanation *expl;
1395
+ Explanation *idf_expl1;
1396
+ Explanation *idf_expl2;
1397
+ Explanation *query_expl;
1398
+ Explanation *qnorm_expl;
1399
+ Explanation *field_expl;
1400
+ Explanation *tf_expl;
1401
+ Scorer *scorer;
1402
+ uchar *field_norms;
1403
+ float field_norm;
1404
+ Explanation *field_norm_expl;
1405
+ const char *field = S(SpQ(self->query)->field);
1406
+
1407
+ char *query_str;
1408
+ HashSet *terms = SpW(self)->terms;
1409
+ const int field_num = fis_get_field_num(ir->fis, SpQ(self->query)->field);
1410
+ char *doc_freqs = NULL;
1411
+ size_t df_i = 0;
1412
+ HashSetEntry *hse;
1413
+
1414
+ if (field_num < 0) {
1415
+ return expl_new(0.0, "field \"%s\" does not exist in the index", field);
1416
+ }
1417
+
1418
+ query_str = self->query->to_s(self->query, NULL);
1419
+
1420
+ for (hse = terms->first; hse; hse = hse->next) {
1421
+ char *term = (char *)hse->elem;
1422
+ REALLOC_N(doc_freqs, char, df_i + strlen(term) + 23);
1423
+ df_i += sprintf(doc_freqs + df_i, "%s=%d, ", term,
1424
+ ir->doc_freq(ir, field_num, term));
1425
+ }
1426
+ /* remove the ',' at the end of the string if it exists */
1427
+ if (terms->size > 0) {
1428
+ df_i -= 2;
1429
+ doc_freqs[df_i] = '\0';
1430
+ }
1431
+ else {
1432
+ doc_freqs = "";
1433
+ }
1434
+
1435
+ expl = expl_new(0.0, "weight(%s in %d), product of:", query_str, target);
1436
+
1437
+ /* We need two of these as it's included in both the query explanation
1438
+ * and the field explanation */
1439
+ idf_expl1 = expl_new(self->idf, "idf(%s: %s)", field, doc_freqs);
1440
+ idf_expl2 = expl_new(self->idf, "idf(%s: %s)", field, doc_freqs);
1441
+ if (terms->size > 0) {
1442
+ free(doc_freqs); /* only free if allocated */
1443
+ }
1444
+
1445
+ /* explain query weight */
1446
+ query_expl = expl_new(0.0, "query_weight(%s), product of:", query_str);
1447
+
1448
+ if (self->query->boost != 1.0) {
1449
+ expl_add_detail(query_expl, expl_new(self->query->boost, "boost"));
1450
+ }
1451
+
1452
+ expl_add_detail(query_expl, idf_expl1);
1453
+
1454
+ qnorm_expl = expl_new(self->qnorm, "query_norm");
1455
+ expl_add_detail(query_expl, qnorm_expl);
1456
+
1457
+ query_expl->value = self->query->boost * idf_expl1->value * qnorm_expl->value;
1458
+
1459
+ expl_add_detail(expl, query_expl);
1460
+
1461
+ /* explain field weight */
1462
+ field_expl = expl_new(0.0, "field_weight(%s:%s in %d), product of:",
1463
+ field, query_str, target);
1464
+ free(query_str);
1465
+
1466
+ scorer = self->scorer(self, ir);
1467
+ tf_expl = scorer->explain(scorer, target);
1468
+ scorer->destroy(scorer);
1469
+ expl_add_detail(field_expl, tf_expl);
1470
+ expl_add_detail(field_expl, idf_expl2);
1471
+
1472
+ field_norms = ir->get_norms(ir, field_num);
1473
+ field_norm = (field_norms
1474
+ ? sim_decode_norm(self->similarity, field_norms[target])
1475
+ : (float)0.0);
1476
+ field_norm_expl = expl_new(field_norm, "field_norm(field=%s, doc=%d)",
1477
+ field, target);
1478
+ expl_add_detail(field_expl, field_norm_expl);
1479
+
1480
+ field_expl->value = tf_expl->value * idf_expl2->value * field_norm_expl->value;
1481
+
1482
+ /* combine them */
1483
+ if (query_expl->value == 1.0) {
1484
+ expl_destroy(expl);
1485
+ return field_expl;
1486
+ }
1487
+ else {
1488
+ expl->value = (query_expl->value * field_expl->value);
1489
+ expl_add_detail(expl, field_expl);
1490
+ return expl;
1491
+ }
1492
+ }
1493
+
1494
+ static char *spanw_to_s(Weight *self)
1495
+ {
1496
+ return strfmt("SpanWeight(%f)", self->value);
1497
+ }
1498
+
1499
+ static void spanw_destroy(Weight *self)
1500
+ {
1501
+ hs_destroy(SpW(self)->terms);
1502
+ w_destroy(self);
1503
+ }
1504
+
1505
+ static Weight *spanw_new(Query *query, Searcher *searcher)
1506
+ {
1507
+ HashSetEntry *hse;
1508
+ Weight *self = w_new(SpanWeight, query);
1509
+ HashSet *terms = SpQ(query)->get_terms(query);
1510
+
1511
+ SpW(self)->terms = terms;
1512
+ self->scorer = &spansc_new;
1513
+ self->explain = &spanw_explain;
1514
+ self->to_s = &spanw_to_s;
1515
+ self->destroy = &spanw_destroy;
1516
+
1517
+ self->similarity = query->get_similarity(query, searcher);
1518
+
1519
+ self->idf = 0.0;
1520
+
1521
+ for (hse = terms->first; hse; hse = hse->next) {
1522
+ self->idf += sim_idf_term(self->similarity, SpQ(query)->field,
1523
+ (char *)hse->elem, searcher);
1524
+ }
1525
+
1526
+ return self;
1527
+ }
1528
+
1529
+ /*****************************************************************************
1530
+ * SpanTermQuery
1531
+ *****************************************************************************/
1532
+
1533
+ static char *spantq_to_s(Query *self, Symbol default_field)
1534
+ {
1535
+ if (default_field && default_field == SpQ(self)->field) {
1536
+ return strfmt("span_terms(%s)", SpTQ(self)->term);
1537
+ }
1538
+ else {
1539
+ return strfmt("span_terms(%s:%s)", S(SpQ(self)->field), SpTQ(self)->term);
1540
+ }
1541
+ }
1542
+
1543
+ static void spantq_destroy_i(Query *self)
1544
+ {
1545
+ free(SpTQ(self)->term);
1546
+ spanq_destroy_i(self);
1547
+ }
1548
+
1549
+ static void spantq_extract_terms(Query *self, HashSet *terms)
1550
+ {
1551
+ hs_add(terms, term_new(SpQ(self)->field, SpTQ(self)->term));
1552
+ }
1553
+
1554
+ static HashSet *spantq_get_terms(Query *self)
1555
+ {
1556
+ HashSet *terms = hs_new_str(&free);
1557
+ hs_add(terms, estrdup(SpTQ(self)->term));
1558
+ return terms;
1559
+ }
1560
+
1561
+ static unsigned long spantq_hash(Query *self)
1562
+ {
1563
+ return spanq_hash(self) ^ str_hash(SpTQ(self)->term);
1564
+ }
1565
+
1566
+ static int spantq_eq(Query *self, Query *o)
1567
+ {
1568
+ return spanq_eq(self, o) && strcmp(SpTQ(self)->term, SpTQ(o)->term) == 0;
1569
+ }
1570
+
1571
+ Query *spantq_new(Symbol field, const char *term)
1572
+ {
1573
+ Query *self = q_new(SpanTermQuery);
1574
+
1575
+ SpTQ(self)->term = estrdup(term);
1576
+ SpQ(self)->field = field;
1577
+ SpQ(self)->get_spans = &spante_new;
1578
+ SpQ(self)->get_terms = &spantq_get_terms;
1579
+
1580
+ self->type = SPAN_TERM_QUERY;
1581
+ self->extract_terms = &spantq_extract_terms;
1582
+ self->to_s = &spantq_to_s;
1583
+ self->hash = &spantq_hash;
1584
+ self->eq = &spantq_eq;
1585
+ self->destroy_i = &spantq_destroy_i;
1586
+ self->create_weight_i = &spanw_new;
1587
+ self->get_matchv_i = &spanq_get_matchv_i;
1588
+ return self;
1589
+ }
1590
+
1591
+ /*****************************************************************************
1592
+ * SpanMultiTermQuery
1593
+ *****************************************************************************/
1594
+
1595
+ static char *spanmtq_to_s(Query *self, Symbol field)
1596
+ {
1597
+ char *terms = NULL, *p;
1598
+ int len = 3, i;
1599
+ SpanMultiTermQuery *smtq = SpMTQ(self);
1600
+ for (i = 0; i < smtq->term_cnt; i++) {
1601
+ len += strlen(smtq->terms[i]) + 2;
1602
+ }
1603
+ p = terms = ALLOC_N(char, len);
1604
+ *(p++) = '[';
1605
+ for (i = 0; i < smtq->term_cnt; i++) {
1606
+ if (i != 0) *(p++) = ',';
1607
+ strcpy(p, smtq->terms[i]);
1608
+ p += strlen(smtq->terms[i]);
1609
+ }
1610
+ *(p++) = ']';
1611
+ *p = '\0';
1612
+
1613
+ if (field == SpQ(self)->field) {
1614
+ p = strfmt("span_terms(%s)", terms);
1615
+ }
1616
+ else {
1617
+ p = strfmt("span_terms(%s:%s)", S(SpQ(self)->field), terms);
1618
+ }
1619
+ free(terms);
1620
+ return p;
1621
+ }
1622
+
1623
+ static void spanmtq_destroy_i(Query *self)
1624
+ {
1625
+ SpanMultiTermQuery *smtq = SpMTQ(self);
1626
+ int i;
1627
+ for (i = 0; i < smtq->term_cnt; i++) {
1628
+ free(smtq->terms[i]);
1629
+ }
1630
+ free(smtq->terms);
1631
+ spanq_destroy_i(self);
1632
+ }
1633
+
1634
+ static void spanmtq_extract_terms(Query *self, HashSet *terms)
1635
+ {
1636
+ SpanMultiTermQuery *smtq = SpMTQ(self);
1637
+ int i;
1638
+ for (i = 0; i < smtq->term_cnt; i++) {
1639
+ hs_add(terms, term_new(SpQ(self)->field, smtq->terms[i]));
1640
+ }
1641
+ }
1642
+
1643
+ static HashSet *spanmtq_get_terms(Query *self)
1644
+ {
1645
+ HashSet *terms = hs_new_str(&free);
1646
+ SpanMultiTermQuery *smtq = SpMTQ(self);
1647
+ int i;
1648
+ for (i = 0; i < smtq->term_cnt; i++) {
1649
+ hs_add(terms, estrdup(smtq->terms[i]));
1650
+ }
1651
+ return terms;
1652
+ }
1653
+
1654
+ static unsigned long spanmtq_hash(Query *self)
1655
+ {
1656
+ unsigned long hash = spanq_hash(self);
1657
+ SpanMultiTermQuery *smtq = SpMTQ(self);
1658
+ int i;
1659
+ for (i = 0; i < smtq->term_cnt; i++) {
1660
+ hash ^= str_hash(smtq->terms[i]);
1661
+ }
1662
+ return hash;
1663
+ }
1664
+
1665
+ static int spanmtq_eq(Query *self, Query *o)
1666
+ {
1667
+ SpanMultiTermQuery *smtq = SpMTQ(self);
1668
+ SpanMultiTermQuery *smtqo = SpMTQ(o);
1669
+ int i;
1670
+ if (!spanq_eq(self, o)) return false;
1671
+ if (smtq->term_cnt != smtqo->term_cnt) return false;
1672
+ for (i = 0; i < smtq->term_cnt; i++) {
1673
+ if (strcmp(smtq->terms[i], smtqo->terms[i]) != 0) return false;
1674
+ }
1675
+ return true;;
1676
+ }
1677
+
1678
+ Query *spanmtq_new_conf(Symbol field, int max_terms)
1679
+ {
1680
+ Query *self = q_new(SpanMultiTermQuery);
1681
+
1682
+ SpMTQ(self)->terms = ALLOC_N(char *, max_terms);
1683
+ SpMTQ(self)->term_cnt = 0;
1684
+ SpMTQ(self)->term_capa = max_terms;
1685
+
1686
+ SpQ(self)->field = field;
1687
+ SpQ(self)->get_spans = &spanmte_new;
1688
+ SpQ(self)->get_terms = &spanmtq_get_terms;
1689
+
1690
+ self->type = SPAN_MULTI_TERM_QUERY;
1691
+ self->extract_terms = &spanmtq_extract_terms;
1692
+ self->to_s = &spanmtq_to_s;
1693
+ self->hash = &spanmtq_hash;
1694
+ self->eq = &spanmtq_eq;
1695
+ self->destroy_i = &spanmtq_destroy_i;
1696
+ self->create_weight_i = &spanw_new;
1697
+ self->get_matchv_i = &spanq_get_matchv_i;
1698
+
1699
+ return self;
1700
+ }
1701
+
1702
+ Query *spanmtq_new(Symbol field)
1703
+ {
1704
+ return spanmtq_new_conf(field, SPAN_MULTI_TERM_QUERY_CAPA);
1705
+ }
1706
+
1707
+ void spanmtq_add_term(Query *self, const char *term)
1708
+ {
1709
+ SpanMultiTermQuery *smtq = SpMTQ(self);
1710
+ if (smtq->term_cnt < smtq->term_capa) {
1711
+ smtq->terms[smtq->term_cnt++] = estrdup(term);
1712
+ }
1713
+ }
1714
+
1715
+ /*****************************************************************************
1716
+ *
1717
+ * SpanFirstQuery
1718
+ *
1719
+ *****************************************************************************/
1720
+
1721
+ static char *spanfq_to_s(Query *self, Symbol field)
1722
+ {
1723
+ Query *match = SpFQ(self)->match;
1724
+ char *q_str = match->to_s(match, field);
1725
+ char *res = strfmt("span_first(%s, %d)", q_str, SpFQ(self)->end);
1726
+ free(q_str);
1727
+ return res;
1728
+ }
1729
+
1730
+ static void spanfq_extract_terms(Query *self, HashSet *terms)
1731
+ {
1732
+ SpFQ(self)->match->extract_terms(SpFQ(self)->match, terms);
1733
+ }
1734
+
1735
+ static HashSet *spanfq_get_terms(Query *self)
1736
+ {
1737
+ SpanFirstQuery *sfq = SpFQ(self);
1738
+ return SpQ(sfq->match)->get_terms(sfq->match);
1739
+ }
1740
+
1741
+ static Query *spanfq_rewrite(Query *self, IndexReader *ir)
1742
+ {
1743
+ Query *q, *rq;
1744
+
1745
+ q = SpFQ(self)->match;
1746
+ rq = q->rewrite(q, ir);
1747
+ q_deref(q);
1748
+ SpFQ(self)->match = rq;
1749
+
1750
+ self->ref_cnt++;
1751
+ return self; /* no clauses rewrote */
1752
+ }
1753
+
1754
+ static void spanfq_destroy_i(Query *self)
1755
+ {
1756
+ q_deref(SpFQ(self)->match);
1757
+ spanq_destroy_i(self);
1758
+ }
1759
+
1760
+ static unsigned long spanfq_hash(Query *self)
1761
+ {
1762
+ return spanq_hash(self) ^ SpFQ(self)->match->hash(SpFQ(self)->match)
1763
+ ^ SpFQ(self)->end;
1764
+ }
1765
+
1766
+ static int spanfq_eq(Query *self, Query *o)
1767
+ {
1768
+ SpanFirstQuery *sfq1 = SpFQ(self);
1769
+ SpanFirstQuery *sfq2 = SpFQ(o);
1770
+ return spanq_eq(self, o) && sfq1->match->eq(sfq1->match, sfq2->match)
1771
+ && (sfq1->end == sfq2->end);
1772
+ }
1773
+
1774
+ Query *spanfq_new_nr(Query *match, int end)
1775
+ {
1776
+ Query *self = q_new(SpanFirstQuery);
1777
+
1778
+ SpFQ(self)->match = match;
1779
+ SpFQ(self)->end = end;
1780
+
1781
+ SpQ(self)->field = SpQ(match)->field;
1782
+ SpQ(self)->get_spans = &spanfe_new;
1783
+ SpQ(self)->get_terms = &spanfq_get_terms;
1784
+
1785
+ self->type = SPAN_FIRST_QUERY;
1786
+ self->rewrite = &spanfq_rewrite;
1787
+ self->extract_terms = &spanfq_extract_terms;
1788
+ self->to_s = &spanfq_to_s;
1789
+ self->hash = &spanfq_hash;
1790
+ self->eq = &spanfq_eq;
1791
+ self->destroy_i = &spanfq_destroy_i;
1792
+ self->create_weight_i = &spanw_new;
1793
+ self->get_matchv_i = &spanq_get_matchv_i;
1794
+
1795
+ return self;
1796
+ }
1797
+
1798
+ Query *spanfq_new(Query *match, int end)
1799
+ {
1800
+ REF(match);
1801
+ return spanfq_new_nr(match, end);
1802
+ }
1803
+
1804
+ /*****************************************************************************
1805
+ *
1806
+ * SpanOrQuery
1807
+ *
1808
+ *****************************************************************************/
1809
+
1810
+ static char *spanoq_to_s(Query *self, Symbol field)
1811
+ {
1812
+ int i;
1813
+ SpanOrQuery *soq = SpOQ(self);
1814
+ char *res, *res_p;
1815
+ char **q_strs = ALLOC_N(char *, soq->c_cnt);
1816
+ int len = 50;
1817
+ for (i = 0; i < soq->c_cnt; i++) {
1818
+ Query *clause = soq->clauses[i];
1819
+ q_strs[i] = clause->to_s(clause, field);
1820
+ len += strlen(q_strs[i]) + 2;
1821
+ }
1822
+
1823
+ res_p = res = ALLOC_N(char, len);
1824
+ res_p += sprintf(res_p, "span_or[");
1825
+ for (i = 0; i < soq->c_cnt; i++) {
1826
+ if (i != 0) *(res_p++) = ',';
1827
+ res_p += sprintf(res_p, "%s", q_strs[i]);
1828
+ free(q_strs[i]);
1829
+ }
1830
+ free(q_strs);
1831
+
1832
+ *(res_p)++ = ']';
1833
+ *res_p = 0;
1834
+ return res;
1835
+ }
1836
+
1837
+ static void spanoq_extract_terms(Query *self, HashSet *terms)
1838
+ {
1839
+ SpanOrQuery *soq = SpOQ(self);
1840
+ int i;
1841
+ for (i = 0; i < soq->c_cnt; i++) {
1842
+ Query *clause = soq->clauses[i];
1843
+ clause->extract_terms(clause, terms);
1844
+ }
1845
+ }
1846
+
1847
+ static HashSet *spanoq_get_terms(Query *self)
1848
+ {
1849
+ SpanOrQuery *soq = SpOQ(self);
1850
+ HashSet *terms = hs_new_str(&free);
1851
+ int i;
1852
+ for (i = 0; i < soq->c_cnt; i++) {
1853
+ Query *clause = soq->clauses[i];
1854
+ HashSet *sub_terms = SpQ(clause)->get_terms(clause);
1855
+ hs_merge(terms, sub_terms);
1856
+ }
1857
+
1858
+ return terms;
1859
+ }
1860
+
1861
+ static SpanEnum *spanoq_get_spans(Query *self, IndexReader *ir)
1862
+ {
1863
+ SpanOrQuery *soq = SpOQ(self);
1864
+ if (soq->c_cnt == 1) {
1865
+ Query *q = soq->clauses[0];
1866
+ return SpQ(q)->get_spans(q, ir);
1867
+ }
1868
+
1869
+ return spanoe_new(self, ir);
1870
+ }
1871
+
1872
+ static Query *spanoq_rewrite(Query *self, IndexReader *ir)
1873
+ {
1874
+ SpanOrQuery *soq = SpOQ(self);
1875
+ int i;
1876
+
1877
+ /* replace clauses with their rewritten queries */
1878
+ for (i = 0; i < soq->c_cnt; i++) {
1879
+ Query *clause = soq->clauses[i];
1880
+ Query *rewritten = clause->rewrite(clause, ir);
1881
+ q_deref(clause);
1882
+ soq->clauses[i] = rewritten;
1883
+ }
1884
+
1885
+ self->ref_cnt++;
1886
+ return self;
1887
+ }
1888
+
1889
+ static void spanoq_destroy_i(Query *self)
1890
+ {
1891
+ SpanOrQuery *soq = SpOQ(self);
1892
+
1893
+ int i;
1894
+ for (i = 0; i < soq->c_cnt; i++) {
1895
+ Query *clause = soq->clauses[i];
1896
+ q_deref(clause);
1897
+ }
1898
+ free(soq->clauses);
1899
+
1900
+ spanq_destroy_i(self);
1901
+ }
1902
+
1903
+ static unsigned long spanoq_hash(Query *self)
1904
+ {
1905
+ int i;
1906
+ unsigned long hash = spanq_hash(self);
1907
+ SpanOrQuery *soq = SpOQ(self);
1908
+
1909
+ for (i = 0; i < soq->c_cnt; i++) {
1910
+ Query *q = soq->clauses[i];
1911
+ hash ^= q->hash(q);
1912
+ }
1913
+ return hash;
1914
+ }
1915
+
1916
+ static int spanoq_eq(Query *self, Query *o)
1917
+ {
1918
+ int i;
1919
+ Query *q1, *q2;
1920
+ SpanOrQuery *soq1 = SpOQ(self);
1921
+ SpanOrQuery *soq2 = SpOQ(o);
1922
+
1923
+ if (!spanq_eq(self, o) || soq1->c_cnt != soq2->c_cnt) {
1924
+ return false;
1925
+ }
1926
+ for (i = 0; i < soq1->c_cnt; i++) {
1927
+ q1 = soq1->clauses[i];
1928
+ q2 = soq2->clauses[i];
1929
+ if (!q1->eq(q1, q2)) {
1930
+ return false;
1931
+ }
1932
+ }
1933
+ return true;
1934
+ }
1935
+
1936
+ Query *spanoq_new()
1937
+ {
1938
+ Query *self = q_new(SpanOrQuery);
1939
+ SpOQ(self)->clauses = ALLOC_N(Query *, CLAUSE_INIT_CAPA);
1940
+ SpOQ(self)->c_capa = CLAUSE_INIT_CAPA;
1941
+
1942
+ SpQ(self)->field = NULL;
1943
+ SpQ(self)->get_spans = &spanoq_get_spans;
1944
+ SpQ(self)->get_terms = &spanoq_get_terms;
1945
+
1946
+ self->type = SPAN_OR_QUERY;
1947
+ self->rewrite = &spanoq_rewrite;
1948
+ self->extract_terms = &spanoq_extract_terms;
1949
+ self->to_s = &spanoq_to_s;
1950
+ self->hash = &spanoq_hash;
1951
+ self->eq = &spanoq_eq;
1952
+ self->destroy_i = &spanoq_destroy_i;
1953
+ self->create_weight_i = &spanw_new;
1954
+ self->get_matchv_i = &spanq_get_matchv_i;
1955
+
1956
+ return self;
1957
+ }
1958
+
1959
+ Query *spanoq_add_clause_nr(Query *self, Query *clause)
1960
+ {
1961
+ const int curr_index = SpOQ(self)->c_cnt++;
1962
+ if (clause->type < SPAN_TERM_QUERY || clause->type > SPAN_NEAR_QUERY) {
1963
+ RAISE(ARG_ERROR, "Tried to add a %s to a SpanOrQuery. This is not a "
1964
+ "SpanQuery.", q_get_query_name(clause->type));
1965
+ }
1966
+ if (curr_index == 0) {
1967
+ SpQ(self)->field = SpQ(clause)->field;
1968
+ }
1969
+ else if (SpQ(self)->field != SpQ(clause)->field) {
1970
+ RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
1971
+ "Attempted to add a SpanQuery with field \"%s\" to a SpanOrQuery "
1972
+ "with field \"%s\"", S(SpQ(clause)->field), S(SpQ(self)->field));
1973
+ }
1974
+ if (curr_index >= SpOQ(self)->c_capa) {
1975
+ SpOQ(self)->c_capa <<= 1;
1976
+ REALLOC_N(SpOQ(self)->clauses, Query *, SpOQ(self)->c_capa);
1977
+ }
1978
+ SpOQ(self)->clauses[curr_index] = clause;
1979
+ return clause;
1980
+ }
1981
+
1982
+ Query *spanoq_add_clause(Query *self, Query *clause)
1983
+ {
1984
+ REF(clause);
1985
+ return spanoq_add_clause_nr(self, clause);
1986
+ }
1987
+
1988
+ /*****************************************************************************
1989
+ *
1990
+ * SpanNearQuery
1991
+ *
1992
+ *****************************************************************************/
1993
+
1994
+ static char *spannq_to_s(Query *self, Symbol field)
1995
+ {
1996
+ int i;
1997
+ SpanNearQuery *snq = SpNQ(self);
1998
+ char *res, *res_p;
1999
+ char **q_strs = ALLOC_N(char *, snq->c_cnt);
2000
+ int len = 50;
2001
+ for (i = 0; i < snq->c_cnt; i++) {
2002
+ Query *clause = snq->clauses[i];
2003
+ q_strs[i] = clause->to_s(clause, field);
2004
+ len += strlen(q_strs[i]);
2005
+ }
2006
+
2007
+ res_p = res = ALLOC_N(char, len);
2008
+ res_p += sprintf(res_p, "span_near[");
2009
+ for (i = 0; i < snq->c_cnt; i++) {
2010
+ if (i != 0) *(res_p)++ = ',';
2011
+ res_p += sprintf(res_p, "%s", q_strs[i]);
2012
+ free(q_strs[i]);
2013
+ }
2014
+ free(q_strs);
2015
+
2016
+ *(res_p++) = ']';
2017
+ *res_p = 0;
2018
+ return res;
2019
+ }
2020
+
2021
+ static void spannq_extract_terms(Query *self, HashSet *terms)
2022
+ {
2023
+ SpanNearQuery *snq = SpNQ(self);
2024
+ int i;
2025
+ for (i = 0; i < snq->c_cnt; i++) {
2026
+ Query *clause = snq->clauses[i];
2027
+ clause->extract_terms(clause, terms);
2028
+ }
2029
+ }
2030
+
2031
+ static HashSet *spannq_get_terms(Query *self)
2032
+ {
2033
+ SpanNearQuery *snq = SpNQ(self);
2034
+ HashSet *terms = hs_new_str(&free);
2035
+ int i;
2036
+ for (i = 0; i < snq->c_cnt; i++) {
2037
+ Query *clause = snq->clauses[i];
2038
+ HashSet *sub_terms = SpQ(clause)->get_terms(clause);
2039
+ hs_merge(terms, sub_terms);
2040
+ }
2041
+
2042
+ return terms;
2043
+ }
2044
+
2045
+ static SpanEnum *spannq_get_spans(Query *self, IndexReader *ir)
2046
+ {
2047
+ SpanNearQuery *snq = SpNQ(self);
2048
+
2049
+ if (snq->c_cnt == 1) {
2050
+ Query *q = snq->clauses[0];
2051
+ return SpQ(q)->get_spans(q, ir);
2052
+ }
2053
+
2054
+ return spanne_new(self, ir);
2055
+ }
2056
+
2057
+ static Query *spannq_rewrite(Query *self, IndexReader *ir)
2058
+ {
2059
+ SpanNearQuery *snq = SpNQ(self);
2060
+ int i;
2061
+ for (i = 0; i < snq->c_cnt; i++) {
2062
+ Query *clause = snq->clauses[i];
2063
+ Query *rewritten = clause->rewrite(clause, ir);
2064
+ q_deref(clause);
2065
+ snq->clauses[i] = rewritten;
2066
+ }
2067
+
2068
+ self->ref_cnt++;
2069
+ return self;
2070
+ }
2071
+
2072
+ static void spannq_destroy(Query *self)
2073
+ {
2074
+ SpanNearQuery *snq = SpNQ(self);
2075
+
2076
+ int i;
2077
+ for (i = 0; i < snq->c_cnt; i++) {
2078
+ Query *clause = snq->clauses[i];
2079
+ q_deref(clause);
2080
+ }
2081
+ free(snq->clauses);
2082
+
2083
+ spanq_destroy_i(self);
2084
+ }
2085
+
2086
+ static unsigned long spannq_hash(Query *self)
2087
+ {
2088
+ int i;
2089
+ unsigned long hash = spanq_hash(self);
2090
+ SpanNearQuery *snq = SpNQ(self);
2091
+
2092
+ for (i = 0; i < snq->c_cnt; i++) {
2093
+ Query *q = snq->clauses[i];
2094
+ hash ^= q->hash(q);
2095
+ }
2096
+ return ((hash ^ snq->slop) << 1) | snq->in_order;
2097
+ }
2098
+
2099
+ static int spannq_eq(Query *self, Query *o)
2100
+ {
2101
+ int i;
2102
+ Query *q1, *q2;
2103
+ SpanNearQuery *snq1 = SpNQ(self);
2104
+ SpanNearQuery *snq2 = SpNQ(o);
2105
+ if (! spanq_eq(self, o)
2106
+ || (snq1->c_cnt != snq2->c_cnt)
2107
+ || (snq1->slop != snq2->slop)
2108
+ || (snq1->in_order != snq2->in_order)) {
2109
+ return false;
2110
+ }
2111
+
2112
+ for (i = 0; i < snq1->c_cnt; i++) {
2113
+ q1 = snq1->clauses[i];
2114
+ q2 = snq2->clauses[i];
2115
+ if (!q1->eq(q1, q2)) {
2116
+ return false;
2117
+ }
2118
+ }
2119
+
2120
+ return true;
2121
+ }
2122
+
2123
+ Query *spannq_new(int slop, bool in_order)
2124
+ {
2125
+ Query *self = q_new(SpanNearQuery);
2126
+
2127
+ SpNQ(self)->clauses = ALLOC_N(Query *, CLAUSE_INIT_CAPA);
2128
+ SpNQ(self)->c_capa = CLAUSE_INIT_CAPA;
2129
+ SpNQ(self)->slop = slop;
2130
+ SpNQ(self)->in_order = in_order;
2131
+
2132
+ SpQ(self)->get_spans = &spannq_get_spans;
2133
+ SpQ(self)->get_terms = &spannq_get_terms;
2134
+ SpQ(self)->field = NULL;
2135
+
2136
+ self->type = SPAN_NEAR_QUERY;
2137
+ self->rewrite = &spannq_rewrite;
2138
+ self->extract_terms = &spannq_extract_terms;
2139
+ self->to_s = &spannq_to_s;
2140
+ self->hash = &spannq_hash;
2141
+ self->eq = &spannq_eq;
2142
+ self->destroy_i = &spannq_destroy;
2143
+ self->create_weight_i = &spanw_new;
2144
+ self->get_matchv_i = &spanq_get_matchv_i;
2145
+
2146
+ return self;
2147
+ }
2148
+
2149
+ Query *spannq_add_clause_nr(Query *self, Query *clause)
2150
+ {
2151
+ const int curr_index = SpNQ(self)->c_cnt++;
2152
+ if (clause->type < SPAN_TERM_QUERY || clause->type > SPAN_NEAR_QUERY) {
2153
+ RAISE(ARG_ERROR, "Tried to add a %s to a SpanNearQuery. This is not a "
2154
+ "SpanQuery.", q_get_query_name(clause->type));
2155
+ }
2156
+ if (curr_index == 0) {
2157
+ SpQ(self)->field = SpQ(clause)->field;
2158
+ }
2159
+ else if (SpQ(self)->field != SpQ(clause)->field) {
2160
+ RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
2161
+ "Attempted to add a SpanQuery with field \"%s\" to SpanNearQuery "
2162
+ "with field \"%s\"", S(SpQ(clause)->field), S(SpQ(self)->field));
2163
+ }
2164
+ if (curr_index >= SpNQ(self)->c_capa) {
2165
+ SpNQ(self)->c_capa <<= 1;
2166
+ REALLOC_N(SpNQ(self)->clauses, Query *, SpNQ(self)->c_capa);
2167
+ }
2168
+ SpNQ(self)->clauses[curr_index] = clause;
2169
+ return clause;
2170
+ }
2171
+
2172
+ Query *spannq_add_clause(Query *self, Query *clause)
2173
+ {
2174
+ REF(clause);
2175
+ return spannq_add_clause_nr(self, clause);
2176
+ }
2177
+
2178
+ /*****************************************************************************
2179
+ *
2180
+ * SpanNotQuery
2181
+ *
2182
+ *****************************************************************************/
2183
+
2184
+ static char *spanxq_to_s(Query *self, Symbol field)
2185
+ {
2186
+ SpanNotQuery *sxq = SpXQ(self);
2187
+ char *inc_s = sxq->inc->to_s(sxq->inc, field);
2188
+ char *exc_s = sxq->exc->to_s(sxq->exc, field);
2189
+ char *res = strfmt("span_not(inc:<%s>, exc:<%s>)", inc_s, exc_s);
2190
+
2191
+ free(inc_s);
2192
+ free(exc_s);
2193
+ return res;
2194
+ }
2195
+
2196
+ static void spanxq_extract_terms(Query *self, HashSet *terms)
2197
+ {
2198
+ SpXQ(self)->inc->extract_terms(SpXQ(self)->inc, terms);
2199
+ }
2200
+
2201
+ static HashSet *spanxq_get_terms(Query *self)
2202
+ {
2203
+ return SpQ(SpXQ(self)->inc)->get_terms(SpXQ(self)->inc);
2204
+ }
2205
+
2206
+ static Query *spanxq_rewrite(Query *self, IndexReader *ir)
2207
+ {
2208
+ SpanNotQuery *sxq = SpXQ(self);
2209
+ Query *q, *rq;
2210
+
2211
+ /* rewrite inclusive query */
2212
+ q = sxq->inc;
2213
+ rq = q->rewrite(q, ir);
2214
+ q_deref(q);
2215
+ sxq->inc = rq;
2216
+
2217
+ /* rewrite exclusive query */
2218
+ q = sxq->exc;
2219
+ rq = q->rewrite(q, ir);
2220
+ q_deref(q);
2221
+ sxq->exc = rq;
2222
+
2223
+ self->ref_cnt++;
2224
+ return self;
2225
+ }
2226
+
2227
+ static void spanxq_destroy(Query *self)
2228
+ {
2229
+ SpanNotQuery *sxq = SpXQ(self);
2230
+
2231
+ q_deref(sxq->inc);
2232
+ q_deref(sxq->exc);
2233
+
2234
+ spanq_destroy_i(self);
2235
+ }
2236
+
2237
+ static unsigned long spanxq_hash(Query *self)
2238
+ {
2239
+ SpanNotQuery *sxq = SpXQ(self);
2240
+ return spanq_hash(self) ^ sxq->inc->hash(sxq->inc)
2241
+ ^ sxq->exc->hash(sxq->exc);
2242
+ }
2243
+
2244
+ static int spanxq_eq(Query *self, Query *o)
2245
+ {
2246
+ SpanNotQuery *sxq1 = SpXQ(self);
2247
+ SpanNotQuery *sxq2 = SpXQ(o);
2248
+ return spanq_eq(self, o) && sxq1->inc->eq(sxq1->inc, sxq2->inc)
2249
+ && sxq1->exc->eq(sxq1->exc, sxq2->exc);
2250
+ }
2251
+
2252
+
2253
+ Query *spanxq_new_nr(Query *inc, Query *exc)
2254
+ {
2255
+ Query *self;
2256
+ if (SpQ(inc)->field != SpQ(inc)->field) {
2257
+ RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
2258
+ "Attempted to add a SpanQuery with field \"%s\" along with a "
2259
+ "SpanQuery with field \"%s\" to an SpanNotQuery",
2260
+ S(SpQ(inc)->field), S(SpQ(exc)->field));
2261
+ }
2262
+ self = q_new(SpanNotQuery);
2263
+
2264
+ SpXQ(self)->inc = inc;
2265
+ SpXQ(self)->exc = exc;
2266
+
2267
+ SpQ(self)->field = SpQ(inc)->field;
2268
+ SpQ(self)->get_spans = &spanxe_new;
2269
+ SpQ(self)->get_terms = &spanxq_get_terms;
2270
+
2271
+ self->type = SPAN_NOT_QUERY;
2272
+ self->rewrite = &spanxq_rewrite;
2273
+ self->extract_terms = &spanxq_extract_terms;
2274
+ self->to_s = &spanxq_to_s;
2275
+ self->hash = &spanxq_hash;
2276
+ self->eq = &spanxq_eq;
2277
+ self->destroy_i = &spanxq_destroy;
2278
+ self->create_weight_i = &spanw_new;
2279
+ self->get_matchv_i = &spanq_get_matchv_i;
2280
+
2281
+ return self;
2282
+ }
2283
+
2284
+ Query *spanxq_new(Query *inc, Query *exc)
2285
+ {
2286
+ REF(inc);
2287
+ REF(exc);
2288
+ return spanxq_new_nr(inc, exc);
2289
+ }
2290
+
2291
+
2292
+ /*****************************************************************************
2293
+ *
2294
+ * Rewritables
2295
+ *
2296
+ *****************************************************************************/
2297
+
2298
+ /*****************************************************************************
2299
+ *
2300
+ * SpanPrefixQuery
2301
+ *
2302
+ *****************************************************************************/
2303
+
2304
+ #define SpPfxQ(query) ((SpanPrefixQuery *)(query))
2305
+
2306
+ static char *spanprq_to_s(Query *self, Symbol default_field)
2307
+ {
2308
+ char *buffer, *bptr;
2309
+ const char *prefix = SpPfxQ(self)->prefix;
2310
+ Symbol field = SpQ(self)->field;
2311
+ size_t plen = strlen(prefix);
2312
+ size_t flen = sym_len(field);
2313
+
2314
+ bptr = buffer = ALLOC_N(char, plen + flen + 35);
2315
+
2316
+ if (default_field == NULL || (field != default_field)) {
2317
+ bptr += sprintf(bptr, "%s:", S(field));
2318
+ }
2319
+
2320
+ bptr += sprintf(bptr, "%s*", prefix);
2321
+ if (self->boost != 1.0) {
2322
+ *bptr = '^';
2323
+ dbl_to_s(++bptr, self->boost);
2324
+ }
2325
+
2326
+ return buffer;
2327
+ }
2328
+
2329
+ static Query *spanprq_rewrite(Query *self, IndexReader *ir)
2330
+ {
2331
+ const int field_num = fis_get_field_num(ir->fis, SpQ(self)->field);
2332
+ Query *volatile q = spanmtq_new_conf(SpQ(self)->field, SpPfxQ(self)->max_terms);
2333
+ q->boost = self->boost; /* set the boost */
2334
+
2335
+ if (field_num >= 0) {
2336
+ const char *prefix = SpPfxQ(self)->prefix;
2337
+ TermEnum *te = ir->terms_from(ir, field_num, prefix);
2338
+ const char *term = te->curr_term;
2339
+ size_t prefix_len = strlen(prefix);
2340
+
2341
+ TRY
2342
+ do {
2343
+ if (strncmp(term, prefix, prefix_len) != 0) {
2344
+ break;
2345
+ }
2346
+ spanmtq_add_term(q, term); /* found a match */
2347
+ } while (te->next(te));
2348
+ XFINALLY
2349
+ te->close(te);
2350
+ XENDTRY
2351
+ }
2352
+
2353
+ return q;
2354
+ }
2355
+
2356
+ static void spanprq_destroy(Query *self)
2357
+ {
2358
+ free(SpPfxQ(self)->prefix);
2359
+ spanq_destroy_i(self);
2360
+ }
2361
+
2362
+ static unsigned long spanprq_hash(Query *self)
2363
+ {
2364
+ return sym_hash(SpQ(self)->field) ^ str_hash(SpPfxQ(self)->prefix);
2365
+ }
2366
+
2367
+ static int spanprq_eq(Query *self, Query *o)
2368
+ {
2369
+ return (strcmp(SpPfxQ(self)->prefix, SpPfxQ(o)->prefix) == 0)
2370
+ && (SpQ(self)->field == SpQ(o)->field);
2371
+ }
2372
+
2373
+ Query *spanprq_new(Symbol field, const char *prefix)
2374
+ {
2375
+ Query *self = q_new(SpanPrefixQuery);
2376
+
2377
+ SpQ(self)->field = field;
2378
+ SpPfxQ(self)->prefix = estrdup(prefix);
2379
+ SpPfxQ(self)->max_terms = SPAN_PREFIX_QUERY_MAX_TERMS;
2380
+
2381
+ self->type = SPAN_PREFIX_QUERY;
2382
+ self->rewrite = &spanprq_rewrite;
2383
+ self->to_s = &spanprq_to_s;
2384
+ self->hash = &spanprq_hash;
2385
+ self->eq = &spanprq_eq;
2386
+ self->destroy_i = &spanprq_destroy;
2387
+ self->create_weight_i = &q_create_weight_unsup;
2388
+
2389
+ return self;
2390
+ }