sdsykes-ferret 0.11.6.19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (195) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/analysis.c +1555 -0
  9. data/ext/analysis.h +219 -0
  10. data/ext/api.c +69 -0
  11. data/ext/api.h +27 -0
  12. data/ext/array.c +123 -0
  13. data/ext/array.h +53 -0
  14. data/ext/bitvector.c +540 -0
  15. data/ext/bitvector.h +272 -0
  16. data/ext/compound_io.c +383 -0
  17. data/ext/config.h +42 -0
  18. data/ext/document.c +156 -0
  19. data/ext/document.h +53 -0
  20. data/ext/except.c +120 -0
  21. data/ext/except.h +168 -0
  22. data/ext/extconf.rb +14 -0
  23. data/ext/ferret.c +402 -0
  24. data/ext/ferret.h +91 -0
  25. data/ext/filter.c +156 -0
  26. data/ext/fs_store.c +483 -0
  27. data/ext/global.c +418 -0
  28. data/ext/global.h +117 -0
  29. data/ext/hash.c +567 -0
  30. data/ext/hash.h +473 -0
  31. data/ext/hashset.c +170 -0
  32. data/ext/hashset.h +187 -0
  33. data/ext/header.h +58 -0
  34. data/ext/helper.c +62 -0
  35. data/ext/helper.h +13 -0
  36. data/ext/inc/lang.h +48 -0
  37. data/ext/inc/threading.h +31 -0
  38. data/ext/index.c +6425 -0
  39. data/ext/index.h +961 -0
  40. data/ext/lang.h +66 -0
  41. data/ext/libstemmer.c +92 -0
  42. data/ext/libstemmer.h +79 -0
  43. data/ext/mempool.c +87 -0
  44. data/ext/mempool.h +35 -0
  45. data/ext/modules.h +162 -0
  46. data/ext/multimapper.c +310 -0
  47. data/ext/multimapper.h +51 -0
  48. data/ext/posh.c +1006 -0
  49. data/ext/posh.h +1007 -0
  50. data/ext/priorityqueue.c +151 -0
  51. data/ext/priorityqueue.h +143 -0
  52. data/ext/q_boolean.c +1608 -0
  53. data/ext/q_const_score.c +161 -0
  54. data/ext/q_filtered_query.c +209 -0
  55. data/ext/q_fuzzy.c +268 -0
  56. data/ext/q_match_all.c +148 -0
  57. data/ext/q_multi_term.c +677 -0
  58. data/ext/q_parser.c +2825 -0
  59. data/ext/q_phrase.c +1126 -0
  60. data/ext/q_prefix.c +100 -0
  61. data/ext/q_range.c +350 -0
  62. data/ext/q_span.c +2402 -0
  63. data/ext/q_term.c +337 -0
  64. data/ext/q_wildcard.c +171 -0
  65. data/ext/r_analysis.c +2575 -0
  66. data/ext/r_index.c +3472 -0
  67. data/ext/r_qparser.c +585 -0
  68. data/ext/r_search.c +4105 -0
  69. data/ext/r_store.c +513 -0
  70. data/ext/r_utils.c +963 -0
  71. data/ext/ram_store.c +471 -0
  72. data/ext/search.c +1741 -0
  73. data/ext/search.h +885 -0
  74. data/ext/similarity.c +150 -0
  75. data/ext/similarity.h +82 -0
  76. data/ext/sort.c +983 -0
  77. data/ext/stem_ISO_8859_1_danish.c +338 -0
  78. data/ext/stem_ISO_8859_1_danish.h +16 -0
  79. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  80. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  81. data/ext/stem_ISO_8859_1_english.c +1156 -0
  82. data/ext/stem_ISO_8859_1_english.h +16 -0
  83. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  84. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  85. data/ext/stem_ISO_8859_1_french.c +1276 -0
  86. data/ext/stem_ISO_8859_1_french.h +16 -0
  87. data/ext/stem_ISO_8859_1_german.c +512 -0
  88. data/ext/stem_ISO_8859_1_german.h +16 -0
  89. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  90. data/ext/stem_ISO_8859_1_italian.h +16 -0
  91. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  92. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  93. data/ext/stem_ISO_8859_1_porter.c +776 -0
  94. data/ext/stem_ISO_8859_1_porter.h +16 -0
  95. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  96. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  97. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  98. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  99. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  100. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  101. data/ext/stem_KOI8_R_russian.c +701 -0
  102. data/ext/stem_KOI8_R_russian.h +16 -0
  103. data/ext/stem_UTF_8_danish.c +344 -0
  104. data/ext/stem_UTF_8_danish.h +16 -0
  105. data/ext/stem_UTF_8_dutch.c +653 -0
  106. data/ext/stem_UTF_8_dutch.h +16 -0
  107. data/ext/stem_UTF_8_english.c +1176 -0
  108. data/ext/stem_UTF_8_english.h +16 -0
  109. data/ext/stem_UTF_8_finnish.c +808 -0
  110. data/ext/stem_UTF_8_finnish.h +16 -0
  111. data/ext/stem_UTF_8_french.c +1296 -0
  112. data/ext/stem_UTF_8_french.h +16 -0
  113. data/ext/stem_UTF_8_german.c +526 -0
  114. data/ext/stem_UTF_8_german.h +16 -0
  115. data/ext/stem_UTF_8_italian.c +1113 -0
  116. data/ext/stem_UTF_8_italian.h +16 -0
  117. data/ext/stem_UTF_8_norwegian.c +302 -0
  118. data/ext/stem_UTF_8_norwegian.h +16 -0
  119. data/ext/stem_UTF_8_porter.c +794 -0
  120. data/ext/stem_UTF_8_porter.h +16 -0
  121. data/ext/stem_UTF_8_portuguese.c +1055 -0
  122. data/ext/stem_UTF_8_portuguese.h +16 -0
  123. data/ext/stem_UTF_8_russian.c +709 -0
  124. data/ext/stem_UTF_8_russian.h +16 -0
  125. data/ext/stem_UTF_8_spanish.c +1137 -0
  126. data/ext/stem_UTF_8_spanish.h +16 -0
  127. data/ext/stem_UTF_8_swedish.c +313 -0
  128. data/ext/stem_UTF_8_swedish.h +16 -0
  129. data/ext/stopwords.c +401 -0
  130. data/ext/store.c +692 -0
  131. data/ext/store.h +777 -0
  132. data/ext/term_vectors.c +352 -0
  133. data/ext/threading.h +31 -0
  134. data/ext/utilities.c +446 -0
  135. data/ext/win32.h +54 -0
  136. data/lib/ferret.rb +29 -0
  137. data/lib/ferret/browser.rb +246 -0
  138. data/lib/ferret/browser/s/global.js +192 -0
  139. data/lib/ferret/browser/s/style.css +148 -0
  140. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  141. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  142. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  143. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  144. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  145. data/lib/ferret/browser/views/layout.rhtml +22 -0
  146. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  147. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  148. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  149. data/lib/ferret/browser/webrick.rb +14 -0
  150. data/lib/ferret/document.rb +130 -0
  151. data/lib/ferret/field_infos.rb +44 -0
  152. data/lib/ferret/index.rb +786 -0
  153. data/lib/ferret/number_tools.rb +157 -0
  154. data/lib/ferret_version.rb +3 -0
  155. data/setup.rb +1555 -0
  156. data/test/test_all.rb +5 -0
  157. data/test/test_helper.rb +24 -0
  158. data/test/threading/number_to_spoken.rb +132 -0
  159. data/test/threading/thread_safety_index_test.rb +79 -0
  160. data/test/threading/thread_safety_read_write_test.rb +76 -0
  161. data/test/threading/thread_safety_test.rb +133 -0
  162. data/test/unit/analysis/tc_analyzer.rb +548 -0
  163. data/test/unit/analysis/tc_token_stream.rb +646 -0
  164. data/test/unit/index/tc_index.rb +762 -0
  165. data/test/unit/index/tc_index_reader.rb +699 -0
  166. data/test/unit/index/tc_index_writer.rb +437 -0
  167. data/test/unit/index/th_doc.rb +315 -0
  168. data/test/unit/largefile/tc_largefile.rb +46 -0
  169. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  170. data/test/unit/search/tc_filter.rb +135 -0
  171. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  172. data/test/unit/search/tc_index_searcher.rb +61 -0
  173. data/test/unit/search/tc_multi_searcher.rb +128 -0
  174. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  175. data/test/unit/search/tc_search_and_sort.rb +179 -0
  176. data/test/unit/search/tc_sort.rb +49 -0
  177. data/test/unit/search/tc_sort_field.rb +27 -0
  178. data/test/unit/search/tc_spans.rb +190 -0
  179. data/test/unit/search/tm_searcher.rb +384 -0
  180. data/test/unit/store/tc_fs_store.rb +77 -0
  181. data/test/unit/store/tc_ram_store.rb +35 -0
  182. data/test/unit/store/tm_store.rb +34 -0
  183. data/test/unit/store/tm_store_lock.rb +68 -0
  184. data/test/unit/tc_document.rb +81 -0
  185. data/test/unit/ts_analysis.rb +2 -0
  186. data/test/unit/ts_index.rb +2 -0
  187. data/test/unit/ts_largefile.rb +4 -0
  188. data/test/unit/ts_query_parser.rb +2 -0
  189. data/test/unit/ts_search.rb +2 -0
  190. data/test/unit/ts_store.rb +2 -0
  191. data/test/unit/ts_utils.rb +2 -0
  192. data/test/unit/utils/tc_bit_vector.rb +295 -0
  193. data/test/unit/utils/tc_number_tools.rb +117 -0
  194. data/test/unit/utils/tc_priority_queue.rb +106 -0
  195. metadata +285 -0
@@ -0,0 +1,100 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+
4
+ /****************************************************************************
5
+ *
6
+ * PrefixQuery
7
+ *
8
+ ****************************************************************************/
9
+
10
+ #define PfxQ(query) ((PrefixQuery *)(query))
11
+
12
+ static char *prq_to_s(Query *self, const char *current_field)
13
+ {
14
+ char *buffer, *bptr;
15
+ const char *prefix = PfxQ(self)->prefix;
16
+ const char *field = PfxQ(self)->field;
17
+ size_t plen = strlen(prefix);
18
+ size_t flen = strlen(field);
19
+
20
+ bptr = buffer = ALLOC_N(char, plen + flen + 35);
21
+
22
+ if (strcmp(field, current_field) != 0) {
23
+ sprintf(bptr, "%s:", field);
24
+ bptr += flen + 1;
25
+ }
26
+
27
+ sprintf(bptr, "%s*", prefix);
28
+ bptr += plen + 1;
29
+ if (self->boost != 1.0) {
30
+ *bptr = '^';
31
+ dbl_to_s(++bptr, self->boost);
32
+ }
33
+
34
+ return buffer;
35
+ }
36
+
37
+ static Query *prq_rewrite(Query *self, IndexReader *ir)
38
+ {
39
+ const char *field = PfxQ(self)->field;
40
+ const int field_num = fis_get_field_num(ir->fis, field);
41
+ Query *volatile q = multi_tq_new_conf(field, MTQMaxTerms(self), 0.0);
42
+ q->boost = self->boost; /* set the boost */
43
+
44
+ if (field_num >= 0) {
45
+ const char *prefix = PfxQ(self)->prefix;
46
+ TermEnum *te = ir->terms_from(ir, field_num, prefix);
47
+ const char *term = te->curr_term;
48
+ size_t prefix_len = strlen(prefix);
49
+
50
+ TRY
51
+ do {
52
+ if (strncmp(term, prefix, prefix_len) != 0) {
53
+ break;
54
+ }
55
+ multi_tq_add_term(q, term); /* found a match */
56
+ } while (te->next(te));
57
+ XFINALLY
58
+ te->close(te);
59
+ XENDTRY
60
+ }
61
+
62
+ return q;
63
+ }
64
+
65
+ static void prq_destroy(Query *self)
66
+ {
67
+ free(PfxQ(self)->field);
68
+ free(PfxQ(self)->prefix);
69
+ q_destroy_i(self);
70
+ }
71
+
72
+ static unsigned long prq_hash(Query *self)
73
+ {
74
+ return str_hash(PfxQ(self)->field) ^ str_hash(PfxQ(self)->prefix);
75
+ }
76
+
77
+ static int prq_eq(Query *self, Query *o)
78
+ {
79
+ return (strcmp(PfxQ(self)->prefix, PfxQ(o)->prefix) == 0)
80
+ && (strcmp(PfxQ(self)->field, PfxQ(o)->field) == 0);
81
+ }
82
+
83
+ Query *prefixq_new(const char *field, const char *prefix)
84
+ {
85
+ Query *self = q_new(PrefixQuery);
86
+
87
+ PfxQ(self)->field = estrdup(field);
88
+ PfxQ(self)->prefix = estrdup(prefix);
89
+ MTQMaxTerms(self) = PREFIX_QUERY_MAX_TERMS;
90
+
91
+ self->type = PREFIX_QUERY;
92
+ self->rewrite = &prq_rewrite;
93
+ self->to_s = &prq_to_s;
94
+ self->hash = &prq_hash;
95
+ self->eq = &prq_eq;
96
+ self->destroy_i = &prq_destroy;
97
+ self->create_weight_i = &q_create_weight_unsup;
98
+
99
+ return self;
100
+ }
@@ -0,0 +1,350 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+
4
+ /*****************************************************************************
5
+ *
6
+ * Range
7
+ *
8
+ *****************************************************************************/
9
+
10
+ typedef struct Range
11
+ {
12
+ char *field;
13
+ char *lower_term;
14
+ char *upper_term;
15
+ bool include_lower : 1;
16
+ bool include_upper : 1;
17
+ } Range;
18
+
19
+ static char *range_to_s(Range *range, const char *field, float boost)
20
+ {
21
+ char *buffer, *b;
22
+ size_t flen, llen, ulen;
23
+
24
+ flen = strlen(range->field);
25
+ llen = range->lower_term ? strlen(range->lower_term) : 0;
26
+ ulen = range->upper_term ? strlen(range->upper_term) : 0;
27
+ buffer = ALLOC_N(char, flen + llen + ulen + 40);
28
+ b = buffer;
29
+
30
+ if (strcmp(field, range->field)) {
31
+ memcpy(buffer, range->field, flen * sizeof(char));
32
+ b += flen;
33
+ *b = ':';
34
+ b++;
35
+ }
36
+
37
+ if (range->lower_term) {
38
+ *b = range->include_lower ? '[' : '{';
39
+ b++;
40
+ memcpy(b, range->lower_term, llen);
41
+ b += llen;
42
+ } else {
43
+ *b = '<';
44
+ b++;
45
+ }
46
+
47
+ if (range->upper_term && range->lower_term) {
48
+ *b = ' '; b++;
49
+ }
50
+
51
+ if (range->upper_term) {
52
+ memcpy(b, range->upper_term, ulen);
53
+ b += ulen;
54
+ *b = range->include_upper ? ']' : '}';
55
+ b++;
56
+ } else {
57
+ *b = '>';
58
+ b++;
59
+ }
60
+
61
+ *b = 0;
62
+ if (boost != 1.0) {
63
+ *b = '^';
64
+ dbl_to_s(b + 1, boost);
65
+ }
66
+ return buffer;
67
+ }
68
+
69
+ static void range_destroy(Range *range)
70
+ {
71
+ free(range->field);
72
+ free(range->lower_term);
73
+ free(range->upper_term);
74
+ free(range);
75
+ }
76
+
77
+ static unsigned long range_hash(Range *filt)
78
+ {
79
+ return filt->include_lower | (filt->include_upper << 1)
80
+ | ((str_hash(filt->field)
81
+ ^ (filt->lower_term ? str_hash(filt->lower_term) : 0)
82
+ ^ (filt->upper_term ? str_hash(filt->upper_term) : 0)) << 2);
83
+ }
84
+
85
+ static int str_eq(char *s1, char *s2)
86
+ {
87
+ return (s1 && s2 && (strcmp(s1, s2) == 0)) || (s1 == s2);
88
+ }
89
+
90
+ static int range_eq(Range *filt, Range *o)
91
+ {
92
+ return (str_eq(filt->field, o->field)
93
+ && str_eq(filt->lower_term, o->lower_term)
94
+ && str_eq(filt->upper_term, o->upper_term)
95
+ && (filt->include_lower == o->include_lower)
96
+ && (filt->include_upper == o->include_upper));
97
+ }
98
+
99
+ Range *range_new(const char *field, const char *lower_term,
100
+ const char *upper_term, bool include_lower,
101
+ bool include_upper)
102
+ {
103
+ Range *range;
104
+
105
+ if (!lower_term && !upper_term) {
106
+ RAISE(ARG_ERROR, "Nil bounds for range. A range must include either "
107
+ "lower bound or an upper bound");
108
+ }
109
+ if (include_lower && !lower_term) {
110
+ RAISE(ARG_ERROR, "Lower bound must be non-nil to be inclusive. That "
111
+ "is, if you specify :include_lower => true when you create a "
112
+ "range you must include a :lower_term");
113
+ }
114
+ if (include_upper && !upper_term) {
115
+ RAISE(ARG_ERROR, "Upper bound must be non-nil to be inclusive. That "
116
+ "is, if you specify :include_upper => true when you create a "
117
+ "range you must include a :upper_term");
118
+ }
119
+ if (upper_term && lower_term && (strcmp(upper_term, lower_term) < 0)) {
120
+ RAISE(ARG_ERROR, "Upper bound must be greater than lower bound. "
121
+ "\"%s\" < \"%s\"", upper_term, lower_term);
122
+ }
123
+
124
+ range = ALLOC(Range);
125
+
126
+ range->field = estrdup((char *)field);
127
+ range->lower_term = lower_term ? estrdup(lower_term) : NULL;
128
+ range->upper_term = upper_term ? estrdup(upper_term) : NULL;
129
+ range->include_lower = include_lower;
130
+ range->include_upper = include_upper;
131
+ return range;
132
+ }
133
+
134
+ /***************************************************************************
135
+ *
136
+ * RangeFilter
137
+ *
138
+ ***************************************************************************/
139
+
140
+ typedef struct RangeFilter
141
+ {
142
+ Filter super;
143
+ Range *range;
144
+ } RangeFilter;
145
+
146
+ #define RF(filt) ((RangeFilter *)(filt))
147
+
148
+ static void rfilt_destroy_i(Filter *filt)
149
+ {
150
+ range_destroy(RF(filt)->range);
151
+ filt_destroy_i(filt);
152
+ }
153
+
154
+ static char *rfilt_to_s(Filter *filt)
155
+ {
156
+ char *rstr = range_to_s(RF(filt)->range, "", 1.0);
157
+ char *rfstr = strfmt("RangeFilter< %s >", rstr);
158
+ free(rstr);
159
+ return rfstr;
160
+ }
161
+
162
+ static BitVector *rfilt_get_bv_i(Filter *filt, IndexReader *ir)
163
+ {
164
+ BitVector *bv = bv_new_capa(ir->max_doc(ir));
165
+ Range *range = RF(filt)->range;
166
+ FieldInfo *fi = fis_get_field(ir->fis, range->field);
167
+ /* the field info exists we need to add docs to the bit vector, otherwise
168
+ * we just return an empty bit vector */
169
+ if (fi) {
170
+ const char *lower_term =
171
+ range->lower_term ? range->lower_term : EMPTY_STRING;
172
+ const char *upper_term = range->upper_term;
173
+ const bool include_upper = range->include_upper;
174
+ const int field_num = fi->number;
175
+ char *term;
176
+ TermEnum* te;
177
+ TermDocEnum *tde;
178
+ bool check_lower;
179
+
180
+ te = ir->terms(ir, field_num);
181
+ if (te->skip_to(te, lower_term) == NULL) {
182
+ te->close(te);
183
+ return bv;
184
+ }
185
+
186
+ check_lower = !(range->include_lower || (lower_term == EMPTY_STRING));
187
+
188
+ tde = ir->term_docs(ir);
189
+ term = te->curr_term;
190
+ do {
191
+ if (!check_lower
192
+ || (strcmp(term, lower_term) > 0)) {
193
+ check_lower = false;
194
+ if (upper_term) {
195
+ int compare = strcmp(upper_term, term);
196
+ /* Break if upper term is greater than or equal to upper
197
+ * term and include_upper is false or ther term is fully
198
+ * greater than upper term. This is optimized so that only
199
+ * one check is done except in last check or two */
200
+ if ((compare <= 0)
201
+ && (!include_upper || (compare < 0))) {
202
+ break;
203
+ }
204
+ }
205
+ /* we have a good term, find the docs */
206
+ /* text is already pointing to term buffer text */
207
+ tde->seek_te(tde, te);
208
+ while (tde->next(tde)) {
209
+ bv_set(bv, tde->doc_num(tde));
210
+ /* printf("Setting %d\n", tde->doc_num(tde)); */
211
+ }
212
+ }
213
+ } while (te->next(te));
214
+
215
+ tde->close(tde);
216
+ te->close(te);
217
+ }
218
+
219
+ return bv;
220
+ }
221
+
222
+ static unsigned long rfilt_hash(Filter *filt)
223
+ {
224
+ return range_hash(RF(filt)->range);
225
+ }
226
+
227
+ static int rfilt_eq(Filter *filt, Filter *o)
228
+ {
229
+ return range_eq(RF(filt)->range, RF(o)->range);
230
+ }
231
+
232
+ Filter *rfilt_new(const char *field,
233
+ const char *lower_term, const char *upper_term,
234
+ bool include_lower, bool include_upper)
235
+ {
236
+ Filter *filt = filt_new(RangeFilter);
237
+ RF(filt)->range = range_new(field, lower_term, upper_term,
238
+ include_lower, include_upper);
239
+
240
+ filt->get_bv_i = &rfilt_get_bv_i;
241
+ filt->hash = &rfilt_hash;
242
+ filt->eq = &rfilt_eq;
243
+ filt->to_s = &rfilt_to_s;
244
+ filt->destroy_i = &rfilt_destroy_i;
245
+ return filt;
246
+ }
247
+
248
+ /*****************************************************************************
249
+ *
250
+ * RangeQuery
251
+ *
252
+ *****************************************************************************/
253
+
254
+ #define RQ(query) ((RangeQuery *)(query))
255
+ typedef struct RangeQuery
256
+ {
257
+ Query f;
258
+ Range *range;
259
+ } RangeQuery;
260
+
261
+ static char *rq_to_s(Query *self, const char *field)
262
+ {
263
+ return range_to_s(RQ(self)->range, field, self->boost);
264
+ }
265
+
266
+ static void rq_destroy(Query *self)
267
+ {
268
+ range_destroy(RQ(self)->range);
269
+ q_destroy_i(self);
270
+ }
271
+
272
+ static MatchVector *rq_get_matchv_i(Query *self, MatchVector *mv,
273
+ TermVector *tv)
274
+ {
275
+ Range *range = RQ(((ConstantScoreQuery *)self)->original)->range;
276
+ if (strcmp(tv->field, range->field) == 0) {
277
+ int i, j;
278
+ char *upper_text = range->upper_term;
279
+ char *lower_text = range->lower_term;
280
+ int upper_limit = range->include_upper ? 1 : 0;
281
+ int lower_limit = range->include_lower ? 1 : 0;
282
+
283
+ for (i = tv->term_cnt - 1; i >= 0; i--) {
284
+ TVTerm *tv_term = &(tv->terms[i]);
285
+ char *text = tv_term->text;
286
+ if ((!upper_text || strcmp(text, upper_text) < upper_limit) &&
287
+ (!lower_text || strcmp(lower_text, text) < lower_limit)) {
288
+
289
+ for (j = 0; j < tv_term->freq; j++) {
290
+ int pos = tv_term->positions[j];
291
+ matchv_add(mv, pos, pos);
292
+ }
293
+ }
294
+ }
295
+ }
296
+ return mv;
297
+ }
298
+
299
+ static Query *rq_rewrite(Query *self, IndexReader *ir)
300
+ {
301
+ Query *csq;
302
+ Range *r = RQ(self)->range;
303
+ Filter *filter = rfilt_new(r->field, r->lower_term, r->upper_term,
304
+ r->include_lower, r->include_upper);
305
+ (void)ir;
306
+ csq = csq_new_nr(filter);
307
+ ((ConstantScoreQuery *)csq)->original = self;
308
+ csq->get_matchv_i = &rq_get_matchv_i;
309
+ return (Query *)csq;
310
+ }
311
+
312
+ static unsigned long rq_hash(Query *self)
313
+ {
314
+ return range_hash(RQ(self)->range);
315
+ }
316
+
317
+ static int rq_eq(Query *self, Query *o)
318
+ {
319
+ return range_eq(RQ(self)->range, RQ(o)->range);
320
+ }
321
+
322
+ Query *rq_new_less(const char *field, const char *upper_term,
323
+ bool include_upper)
324
+ {
325
+ return rq_new(field, NULL, upper_term, false, include_upper);
326
+ }
327
+
328
+ Query *rq_new_more(const char *field, const char *lower_term,
329
+ bool include_lower)
330
+ {
331
+ return rq_new(field, lower_term, NULL, include_lower, false);
332
+ }
333
+
334
+ Query *rq_new(const char *field, const char *lower_term,
335
+ const char *upper_term, bool include_lower, bool include_upper)
336
+ {
337
+ Query *self = q_new(RangeQuery);
338
+
339
+ RQ(self)->range = range_new(field, lower_term, upper_term,
340
+ include_lower, include_upper);
341
+
342
+ self->type = RANGE_QUERY;
343
+ self->rewrite = &rq_rewrite;
344
+ self->to_s = &rq_to_s;
345
+ self->hash = &rq_hash;
346
+ self->eq = &rq_eq;
347
+ self->destroy_i = &rq_destroy;
348
+ self->create_weight_i = &q_create_weight_unsup;
349
+ return self;
350
+ }
@@ -0,0 +1,2402 @@
1
+ #include <string.h>
2
+ #include <limits.h>
3
+ #include "search.h"
4
+ #include "hashset.h"
5
+
6
+ #define CLAUSE_INIT_CAPA 4
7
+
8
+ /*****************************************************************************
9
+ *
10
+ * SpanQuery
11
+ *
12
+ *****************************************************************************/
13
+
14
+ /***************************************************************************
15
+ * SpanQuery
16
+ ***************************************************************************/
17
+
18
+ #define SpQ(query) ((SpanQuery *)(query))
19
+
20
+ static unsigned long spanq_hash(Query *self)
21
+ {
22
+ return str_hash(SpQ(self)->field);
23
+ }
24
+
25
+ static int spanq_eq(Query *self, Query *o)
26
+ {
27
+ return strcmp(SpQ(self)->field, SpQ(o)->field) == 0;
28
+ }
29
+
30
+ static void spanq_destroy_i(Query *self)
31
+ {
32
+ q_destroy_i(self);
33
+ }
34
+
35
+ static MatchVector *mv_to_term_mv(MatchVector *term_mv, MatchVector *full_mv,
36
+ HashSet *terms, TermVector *tv)
37
+ {
38
+ int i;
39
+ for (i = 0; i < terms->size; i++) {
40
+ char *term = (char *)terms->elems[i];
41
+ TVTerm *tv_term = tv_get_tv_term(tv, term);
42
+ if (tv_term) {
43
+ int j;
44
+ int m_idx = 0;
45
+ for (j = 0; j < tv_term->freq; j++) {
46
+ int pos = tv_term->positions[j];
47
+ for (; m_idx < full_mv->size; m_idx++) {
48
+ if (pos <= full_mv->matches[m_idx].end) {
49
+ if (pos >= full_mv->matches[m_idx].start) {
50
+ matchv_add(term_mv, pos, pos);
51
+ }
52
+ break;
53
+ }
54
+ }
55
+ }
56
+ }
57
+ }
58
+
59
+ return term_mv;
60
+ }
61
+
62
+ /***************************************************************************
63
+ * TVTermDocEnum
64
+ * dummy TermDocEnum used by the highlighter to find matches
65
+ ***************************************************************************/
66
+
67
+ #define TV_TDE(tde) ((TVTermDocEnum *)(tde))
68
+
69
+ typedef struct TVTermDocEnum
70
+ {
71
+ TermDocEnum super;
72
+ int doc;
73
+ int index;
74
+ int freq;
75
+ int *positions;
76
+ TermVector *tv;
77
+ } TVTermDocEnum;
78
+
79
+ static void tv_tde_seek(TermDocEnum *tde, int field_num, const char *term)
80
+ {
81
+ TVTermDocEnum *tv_tde = TV_TDE(tde);
82
+ TVTerm *tv_term = tv_get_tv_term(tv_tde->tv, term);
83
+ (void)field_num;
84
+ if (tv_term) {
85
+ tv_tde->doc = -1;
86
+ tv_tde->index = 0;
87
+ tv_tde->freq = tv_term->freq;
88
+ tv_tde->positions = tv_term->positions;
89
+ }
90
+ else {
91
+ tv_tde->doc = INT_MAX;
92
+ }
93
+ }
94
+
95
+ static bool tv_tde_next(TermDocEnum *tde)
96
+ {
97
+ if (TV_TDE(tde)->doc == -1) {
98
+ TV_TDE(tde)->doc = 0;
99
+ return true;
100
+ }
101
+ else {
102
+ TV_TDE(tde)->doc = INT_MAX;
103
+ return false;
104
+ }
105
+ }
106
+
107
+ static bool tv_tde_skip_to(TermDocEnum *tde, int doc_num)
108
+ {
109
+ if (doc_num == 0) {
110
+ TV_TDE(tde)->doc = 0;
111
+ return true;
112
+ }
113
+ else {
114
+ TV_TDE(tde)->doc = INT_MAX;
115
+ return false;
116
+ }
117
+ }
118
+
119
+ static int tv_tde_next_position(TermDocEnum *tde)
120
+ {
121
+ return TV_TDE(tde)->positions[TV_TDE(tde)->index++];
122
+ }
123
+
124
+ static int tv_tde_freq(TermDocEnum *tde)
125
+ {
126
+ return TV_TDE(tde)->freq;
127
+ }
128
+
129
+ static int tv_tde_doc_num(TermDocEnum *tde)
130
+ {
131
+ return TV_TDE(tde)->doc;
132
+ }
133
+
134
+ static TermDocEnum *spanq_ir_term_positions(IndexReader *ir)
135
+ {
136
+ TVTermDocEnum *tv_tde = ALLOC(TVTermDocEnum);
137
+ TermDocEnum *tde = (TermDocEnum *)tv_tde;
138
+ tv_tde->tv = (TermVector *)ir->store;
139
+ tde->seek = &tv_tde_seek;
140
+ tde->doc_num = &tv_tde_doc_num;
141
+ tde->freq = &tv_tde_freq;
142
+ tde->next = &tv_tde_next;
143
+ tde->skip_to = &tv_tde_skip_to;
144
+ tde->next_position = &tv_tde_next_position;
145
+ tde->close = (void (*)(TermDocEnum *tde))&free;
146
+
147
+ return tde;
148
+ }
149
+
150
+ static MatchVector *spanq_get_matchv_i(Query *self, MatchVector *mv,
151
+ TermVector *tv)
152
+ {
153
+ if (strcmp(SpQ(self)->field, tv->field) == 0) {
154
+ SpanEnum *sp_enum;
155
+ IndexReader *ir = ALLOC(IndexReader);
156
+ MatchVector *full_mv = matchv_new();
157
+ HashSet *terms = SpQ(self)->get_terms(self);
158
+ ir->fis = fis_new(0, 0, 0);
159
+ fis_add_field(ir->fis, fi_new(tv->field, 0, 0, 0));
160
+ ir->store = (Store *)tv;
161
+ ir->term_positions = &spanq_ir_term_positions;
162
+ sp_enum = SpQ(self)->get_spans(self, ir);
163
+ while (sp_enum->next(sp_enum)) {
164
+ matchv_add(full_mv,
165
+ sp_enum->start(sp_enum),
166
+ sp_enum->end(sp_enum) - 1);
167
+ }
168
+ sp_enum->destroy(sp_enum);
169
+
170
+ fis_deref(ir->fis);
171
+ free(ir);
172
+
173
+ matchv_compact(full_mv);
174
+ mv_to_term_mv(mv, full_mv, terms, tv);
175
+ matchv_destroy(full_mv);
176
+ hs_destroy(terms);
177
+ }
178
+ return mv;
179
+ }
180
+
181
+ /***************************************************************************
182
+ *
183
+ * SpanScorer
184
+ *
185
+ ***************************************************************************/
186
+
187
+ #define SpSc(scorer) ((SpanScorer *)(scorer))
188
+ typedef struct SpanScorer
189
+ {
190
+ Scorer super;
191
+ IndexReader *ir;
192
+ SpanEnum *spans;
193
+ Similarity *sim;
194
+ uchar *norms;
195
+ Weight *weight;
196
+ float value;
197
+ float freq;
198
+ bool first_time : 1;
199
+ bool more : 1;
200
+ } SpanScorer;
201
+
202
+ static float spansc_score(Scorer *self)
203
+ {
204
+ SpanScorer *spansc = SpSc(self);
205
+ float raw = sim_tf(spansc->sim, spansc->freq) * spansc->value;
206
+
207
+ /* normalize */
208
+ return raw * sim_decode_norm(self->similarity, spansc->norms[self->doc]);
209
+ }
210
+
211
+ static bool spansc_next(Scorer *self)
212
+ {
213
+ SpanScorer *spansc = SpSc(self);
214
+ SpanEnum *se = spansc->spans;
215
+ int match_length;
216
+
217
+ if (spansc->first_time) {
218
+ spansc->more = se->next(se);
219
+ spansc->first_time = false;
220
+ }
221
+
222
+ if (!spansc->more) {
223
+ return false;
224
+ }
225
+
226
+ spansc->freq = 0.0;
227
+ self->doc = se->doc(se);
228
+
229
+ while (spansc->more && (self->doc == se->doc(se))) {
230
+ match_length = se->end(se) - se->start(se);
231
+ spansc->freq += sim_sloppy_freq(spansc->sim, match_length);
232
+ spansc->more = se->next(se);
233
+ }
234
+
235
+ return (spansc->more || (spansc->freq != 0.0));
236
+ }
237
+
238
+ static bool spansc_skip_to(Scorer *self, int target)
239
+ {
240
+ SpanScorer *spansc = SpSc(self);
241
+ SpanEnum *se = spansc->spans;
242
+
243
+ spansc->more = se->skip_to(se, target);
244
+
245
+ if (!spansc->more) {
246
+ return false;
247
+ }
248
+
249
+ spansc->freq = 0.0;
250
+ self->doc = se->doc(se);
251
+
252
+ while (spansc->more && (se->doc(se) == target)) {
253
+ spansc->freq += sim_sloppy_freq(spansc->sim, se->end(se) - se->start(se));
254
+ spansc->more = se->next(se);
255
+ }
256
+
257
+ return (spansc->more || (spansc->freq != 0.0));
258
+ }
259
+
260
+ static Explanation *spansc_explain(Scorer *self, int target)
261
+ {
262
+ Explanation *tf_explanation;
263
+ SpanScorer *spansc = SpSc(self);
264
+ float phrase_freq;
265
+ self->skip_to(self, target);
266
+ phrase_freq = (self->doc == target) ? spansc->freq : (float)0.0;
267
+
268
+ tf_explanation = expl_new(sim_tf(self->similarity, phrase_freq),
269
+ "tf(phrase_freq(%f)", phrase_freq);
270
+
271
+ return tf_explanation;
272
+ }
273
+
274
+ static void spansc_destroy(Scorer *self)
275
+ {
276
+ SpanScorer *spansc = SpSc(self);
277
+ if (spansc->spans) {
278
+ spansc->spans->destroy(spansc->spans);
279
+ }
280
+ scorer_destroy_i(self);
281
+ }
282
+
283
+ Scorer *spansc_new(Weight *weight, IndexReader *ir)
284
+ {
285
+ Scorer *self = NULL;
286
+ const int field_num = fis_get_field_num(ir->fis, SpQ(weight->query)->field);
287
+ if (field_num >= 0) {
288
+ Query *spanq = weight->query;
289
+ self = scorer_new(SpanScorer, weight->similarity);
290
+
291
+ SpSc(self)->first_time = true;
292
+ SpSc(self)->more = true;
293
+ SpSc(self)->spans = SpQ(spanq)->get_spans(spanq, ir);
294
+ SpSc(self)->sim = weight->similarity;
295
+ SpSc(self)->norms = ir->get_norms(ir, field_num);
296
+ SpSc(self)->weight = weight;
297
+ SpSc(self)->value = weight->value;
298
+ SpSc(self)->freq = 0.0;
299
+
300
+ self->score = &spansc_score;
301
+ self->next = &spansc_next;
302
+ self->skip_to = &spansc_skip_to;
303
+ self->explain = &spansc_explain;
304
+ self->destroy = &spansc_destroy;
305
+ }
306
+ return self;
307
+ }
308
+
309
+ /*****************************************************************************
310
+ * SpanTermEnum
311
+ *****************************************************************************/
312
+
313
+ #define SpTEn(span_enum) ((SpanTermEnum *)(span_enum))
314
+ #define SpTQ(query) ((SpanTermQuery *)(query))
315
+
316
+ typedef struct SpanTermEnum
317
+ {
318
+ SpanEnum super;
319
+ TermDocEnum *positions;
320
+ int position;
321
+ int doc;
322
+ int count;
323
+ int freq;
324
+ } SpanTermEnum;
325
+
326
+
327
+ static bool spante_next(SpanEnum *self)
328
+ {
329
+ SpanTermEnum *ste = SpTEn(self);
330
+ TermDocEnum *tde = ste->positions;
331
+
332
+ if (ste->count == ste->freq) {
333
+ if (! tde->next(tde)) {
334
+ ste->doc = INT_MAX;
335
+ return false;
336
+ }
337
+ ste->doc = tde->doc_num(tde);
338
+ ste->freq = tde->freq(tde);
339
+ ste->count = 0;
340
+ }
341
+ ste->position = tde->next_position(tde);
342
+ ste->count++;
343
+ return true;
344
+ }
345
+
346
+ static bool spante_skip_to(SpanEnum *self, int target)
347
+ {
348
+ SpanTermEnum *ste = SpTEn(self);
349
+ TermDocEnum *tde = ste->positions;
350
+
351
+ /* are we already at the correct position? */
352
+ if (ste->doc >= target) {
353
+ return true;
354
+ }
355
+
356
+ if (! tde->skip_to(tde, target)) {
357
+ ste->doc = INT_MAX;
358
+ return false;
359
+ }
360
+
361
+ ste->doc = tde->doc_num(tde);
362
+ ste->freq = tde->freq(tde);
363
+ ste->count = 0;
364
+
365
+ ste->position = tde->next_position(tde);
366
+ ste->count++;
367
+ return true;
368
+ }
369
+
370
+ static int spante_doc(SpanEnum *self)
371
+ {
372
+ return SpTEn(self)->doc;
373
+ }
374
+
375
+ static int spante_start(SpanEnum *self)
376
+ {
377
+ return SpTEn(self)->position;
378
+ }
379
+
380
+ static int spante_end(SpanEnum *self)
381
+ {
382
+ return SpTEn(self)->position + 1;
383
+ }
384
+
385
+ static char *spante_to_s(SpanEnum *self)
386
+ {
387
+ char *field = SpQ(self->query)->field;
388
+ char *query_str = self->query->to_s(self->query, field);
389
+ char pos_str[20];
390
+ size_t len = strlen(query_str);
391
+ int pos;
392
+ char *str = ALLOC_N(char, len + 40);
393
+
394
+ if (self->doc(self) < 0) {
395
+ sprintf(pos_str, "START");
396
+ }
397
+ else {
398
+ if (self->doc(self) == INT_MAX) {
399
+ sprintf(pos_str, "END");
400
+ }
401
+ else {
402
+ pos = SpTEn(self)->position;
403
+ sprintf(pos_str, "%d", self->doc(self) - pos);
404
+ }
405
+ }
406
+ sprintf("SpanTermEnum(%s)@%s", query_str, pos_str);
407
+ free(query_str);
408
+ return str;
409
+ }
410
+
411
+ static void spante_destroy(SpanEnum *self)
412
+ {
413
+ TermDocEnum *tde = SpTEn(self)->positions;
414
+ tde->close(tde);
415
+ free(self);
416
+ }
417
+
418
+ static SpanEnum *spante_new(Query *query, IndexReader *ir)
419
+ {
420
+ char *term = SpTQ(query)->term;
421
+ char *field = SpQ(query)->field;
422
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanTermEnum));
423
+
424
+ SpTEn(self)->positions = ir_term_positions_for(ir, field, term);
425
+ SpTEn(self)->position = -1;
426
+ SpTEn(self)->doc = -1;
427
+ SpTEn(self)->count = 0;
428
+ SpTEn(self)->freq = 0;
429
+
430
+ self->query = query;
431
+ self->next = &spante_next;
432
+ self->skip_to = &spante_skip_to;
433
+ self->doc = &spante_doc;
434
+ self->start = &spante_start;
435
+ self->end = &spante_end;
436
+ self->destroy = &spante_destroy;
437
+ self->to_s = &spante_to_s;
438
+
439
+ return self;
440
+ }
441
+
442
+ /*****************************************************************************
443
+ * SpanMultiTermEnum
444
+ *****************************************************************************/
445
+
446
+ /* * TermPosEnumWrapper * */
447
+ #define TPE_READ_SIZE 16
448
+
449
+ typedef struct TermPosEnumWrapper
450
+ {
451
+ const char *term;
452
+ TermDocEnum *tpe;
453
+ int doc;
454
+ int pos;
455
+ } TermPosEnumWrapper;
456
+
457
+ static bool tpew_less_than(const TermPosEnumWrapper *tpew1,
458
+ const TermPosEnumWrapper *tpew2)
459
+ {
460
+ return (tpew1->doc < tpew2->doc)
461
+ || (tpew1->doc == tpew2->doc && tpew1->pos < tpew2->pos);
462
+ }
463
+
464
+ static bool tpew_next(TermPosEnumWrapper *self)
465
+ {
466
+ TermDocEnum *tpe = self->tpe;
467
+ if (0 > (self->pos = tpe->next_position(tpe))) {
468
+ if (!tpe->next(tpe)) return false;
469
+ self->doc = tpe->doc_num(tpe);
470
+ self->pos = tpe->next_position(tpe);
471
+ }
472
+ return true;
473
+ }
474
+
475
+ static bool tpew_skip_to(TermPosEnumWrapper *self, int doc_num)
476
+ {
477
+ TermDocEnum *tpe = self->tpe;
478
+
479
+ if (tpe->skip_to(tpe, doc_num)) {
480
+ self->doc = tpe->doc_num(tpe);
481
+ self->pos = tpe->next_position(tpe);
482
+ return true;
483
+ }
484
+ else {
485
+ return false;
486
+ }
487
+ }
488
+
489
+ static void tpew_destroy(TermPosEnumWrapper *self)
490
+ {
491
+ self->tpe->close(self->tpe);
492
+ free(self);
493
+ }
494
+
495
+ static TermPosEnumWrapper *tpew_new(const char *term, TermDocEnum *tpe)
496
+ {
497
+ TermPosEnumWrapper *self = ALLOC_AND_ZERO(TermPosEnumWrapper);
498
+ self->term = term;
499
+ self->tpe = tpe;
500
+ self->doc = -1;
501
+ self->pos = -1;
502
+ return self;
503
+ }
504
+ #define SpMTEn(span_enum) ((SpanMultiTermEnum *)(span_enum))
505
+ #define SpMTQ(query) ((SpanMultiTermQuery *)(query))
506
+
507
+ typedef struct SpanMultiTermEnum
508
+ {
509
+ SpanEnum super;
510
+ PriorityQueue *tpew_pq;
511
+ TermPosEnumWrapper **tpews;
512
+ int tpew_cnt;
513
+ int pos;
514
+ int doc;
515
+ } SpanMultiTermEnum;
516
+
517
+ static bool spanmte_next(SpanEnum *self)
518
+ {
519
+ int curr_doc, curr_pos;
520
+ TermPosEnumWrapper *tpew;
521
+ SpanMultiTermEnum *mte = SpMTEn(self);
522
+ PriorityQueue *tpew_pq = mte->tpew_pq;
523
+ if (tpew_pq == NULL) {
524
+ TermPosEnumWrapper **tpews = mte->tpews;
525
+ int i;
526
+ tpew_pq = pq_new(mte->tpew_cnt, (lt_ft)tpew_less_than, (free_ft)NULL);
527
+ for (i = mte->tpew_cnt - 1; i >= 0; i--) {
528
+ if (tpew_next(tpews[i])) {
529
+ pq_push(tpew_pq, tpews[i]);
530
+ }
531
+ }
532
+ mte->tpew_pq = tpew_pq;
533
+ }
534
+
535
+ tpew = (TermPosEnumWrapper *)pq_top(tpew_pq);
536
+ if (tpew == NULL) {
537
+ return false;
538
+ }
539
+
540
+ mte->doc = curr_doc = tpew->doc;
541
+ mte->pos = curr_pos = tpew->pos;
542
+
543
+ do {
544
+ if (tpew_next(tpew)) {
545
+ pq_down(tpew_pq);
546
+ }
547
+ else {
548
+ pq_pop(tpew_pq);
549
+ }
550
+ } while (((tpew = (TermPosEnumWrapper *)pq_top(tpew_pq)) != NULL)
551
+ && tpew->doc == curr_doc && tpew->pos == curr_pos);
552
+ return true;
553
+ }
554
+
555
+ static bool spanmte_skip_to(SpanEnum *self, int target)
556
+ {
557
+ SpanMultiTermEnum *mte = SpMTEn(self);
558
+ PriorityQueue *tpew_pq = mte->tpew_pq;
559
+ TermPosEnumWrapper *tpew;
560
+ if (tpew_pq == NULL) {
561
+ TermPosEnumWrapper **tpews = mte->tpews;
562
+ int i;
563
+ tpew_pq = pq_new(mte->tpew_cnt, (lt_ft)tpew_less_than, (free_ft)NULL);
564
+ for (i = mte->tpew_cnt - 1; i >= 0; i--) {
565
+ tpew_skip_to(tpews[i], target);
566
+ pq_push(tpew_pq, tpews[i]);
567
+ }
568
+ mte->tpew_pq = tpew_pq;
569
+ }
570
+ if (tpew_pq->size == 0) {
571
+ mte->doc = -1;
572
+ return false;
573
+ }
574
+ while ((tpew = (TermPosEnumWrapper *)pq_top(tpew_pq)) != NULL
575
+ && (target > tpew->doc)) {
576
+ if (tpew_skip_to(tpew, target)) {
577
+ pq_down(tpew_pq);
578
+ }
579
+ else {
580
+ pq_pop(tpew_pq);
581
+ }
582
+ }
583
+ return spanmte_next(self);
584
+ }
585
+
586
+ static int spanmte_doc(SpanEnum *self)
587
+ {
588
+ return SpMTEn(self)->doc;
589
+ }
590
+
591
+ static int spanmte_start(SpanEnum *self)
592
+ {
593
+ return SpMTEn(self)->pos;
594
+ }
595
+
596
+ static int spanmte_end(SpanEnum *self)
597
+ {
598
+ return SpMTEn(self)->pos + 1;
599
+ }
600
+
601
+ static void spanmte_destroy(SpanEnum *self)
602
+ {
603
+ SpanMultiTermEnum *mte = SpMTEn(self);
604
+ int i;
605
+ if (mte->tpew_pq) pq_destroy(mte->tpew_pq);
606
+ for (i = 0; i < mte->tpew_cnt; i++) {
607
+ tpew_destroy(mte->tpews[i]);
608
+ }
609
+ free(mte->tpews);
610
+ free(self);
611
+ }
612
+
613
+ static SpanEnum *spanmte_new(Query *query, IndexReader *ir)
614
+ {
615
+ char *field = SpQ(query)->field;
616
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanMultiTermEnum));
617
+ SpanMultiTermEnum *smte = SpMTEn(self);
618
+ SpanMultiTermQuery *smtq = SpMTQ(query);
619
+ int i;
620
+
621
+
622
+ smte->tpews = ALLOC_N(TermPosEnumWrapper *, smtq->term_cnt);
623
+ for (i = 0; i < smtq->term_cnt; i++) {
624
+ char *term = smtq->terms[i];
625
+ smte->tpews[i] = tpew_new(term, ir_term_positions_for(ir, field, term));
626
+ }
627
+ smte->tpew_cnt = smtq->term_cnt;
628
+ smte->tpew_pq = NULL;
629
+ smte->pos = -1;
630
+ smte->doc = -1;
631
+
632
+ self->query = query;
633
+ self->next = &spanmte_next;
634
+ self->skip_to = &spanmte_skip_to;
635
+ self->doc = &spanmte_doc;
636
+ self->start = &spanmte_start;
637
+ self->end = &spanmte_end;
638
+ self->destroy = &spanmte_destroy;
639
+ self->to_s = &spante_to_s;
640
+
641
+ return self;
642
+ }
643
+
644
+
645
+ /*****************************************************************************
646
+ * SpanFirstEnum
647
+ *****************************************************************************/
648
+
649
+ #define SpFEn(span_enum) ((SpanFirstEnum *)(span_enum))
650
+ #define SpFQ(query) ((SpanFirstQuery *)(query))
651
+
652
+ typedef struct SpanFirstEnum
653
+ {
654
+ SpanEnum super;
655
+ SpanEnum *sub_enum;
656
+ } SpanFirstEnum;
657
+
658
+
659
+ static bool spanfe_next(SpanEnum *self)
660
+ {
661
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
662
+ int end = SpFQ(self->query)->end;
663
+ while (sub_enum->next(sub_enum)) { /* scan to next match */
664
+ if (sub_enum->end(sub_enum) <= end) {
665
+ return true;
666
+ }
667
+ }
668
+ return false;
669
+ }
670
+
671
+ static bool spanfe_skip_to(SpanEnum *self, int target)
672
+ {
673
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
674
+ int end = SpFQ(self->query)->end;
675
+
676
+ if (! sub_enum->skip_to(sub_enum, target)) {
677
+ return false;
678
+ }
679
+
680
+ if (sub_enum->end(sub_enum) <= end) { /* there is a match */
681
+ return true;
682
+ }
683
+
684
+ return sub_enum->next(sub_enum); /* scan to next match */
685
+ }
686
+
687
+ static int spanfe_doc(SpanEnum *self)
688
+ {
689
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
690
+ return sub_enum->doc(sub_enum);
691
+ }
692
+
693
+ static int spanfe_start(SpanEnum *self)
694
+ {
695
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
696
+ return sub_enum->start(sub_enum);
697
+ }
698
+
699
+ static int spanfe_end(SpanEnum *self)
700
+ {
701
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
702
+ return sub_enum->end(sub_enum);
703
+ }
704
+
705
+ static char *spanfe_to_s(SpanEnum *self)
706
+ {
707
+ char *field = SpQ(self->query)->field;
708
+ char *query_str = self->query->to_s(self->query, field);
709
+ char *res = strfmt("SpanFirstEnum(%s)", query_str);
710
+ free(query_str);
711
+ return res;
712
+ }
713
+
714
+ static void spanfe_destroy(SpanEnum *self)
715
+ {
716
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
717
+ sub_enum->destroy(sub_enum);
718
+ free(self);
719
+ }
720
+
721
+ static SpanEnum *spanfe_new(Query *query, IndexReader *ir)
722
+ {
723
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanFirstEnum));
724
+ SpanFirstQuery *sfq = SpFQ(query);
725
+
726
+ SpFEn(self)->sub_enum = SpQ(sfq->match)->get_spans(sfq->match, ir);
727
+
728
+ self->query = query;
729
+ self->next = &spanfe_next;
730
+ self->skip_to = &spanfe_skip_to;
731
+ self->doc = &spanfe_doc;
732
+ self->start = &spanfe_start;
733
+ self->end = &spanfe_end;
734
+ self->destroy = &spanfe_destroy;
735
+ self->to_s = &spanfe_to_s;
736
+
737
+ return self;
738
+ }
739
+
740
+
741
+ /*****************************************************************************
742
+ * SpanOrEnum
743
+ *****************************************************************************/
744
+
745
+ #define SpOEn(span_enum) ((SpanOrEnum *)(span_enum))
746
+ #define SpOQ(query) ((SpanOrQuery *)(query))
747
+
748
+ typedef struct SpanOrEnum
749
+ {
750
+ SpanEnum super;
751
+ PriorityQueue *queue;
752
+ SpanEnum **span_enums;
753
+ int s_cnt;
754
+ bool first_time : 1;
755
+ } SpanOrEnum;
756
+
757
+
758
+ static bool span_less_than(SpanEnum *s1, SpanEnum *s2)
759
+ {
760
+ int doc_diff, start_diff;
761
+ doc_diff = s1->doc(s1) - s2->doc(s2);
762
+ if (doc_diff == 0) {
763
+ start_diff = s1->start(s1) - s2->start(s2);
764
+ if (start_diff == 0) {
765
+ return s1->end(s1) < s2->end(s2);
766
+ }
767
+ else {
768
+ return start_diff < 0;
769
+ }
770
+ }
771
+ else {
772
+ return doc_diff < 0;
773
+ }
774
+ }
775
+
776
+ static bool spanoe_next(SpanEnum *self)
777
+ {
778
+ SpanOrEnum *soe = SpOEn(self);
779
+ SpanEnum *se;
780
+ int i;
781
+
782
+ if (soe->first_time) { /* first time -- initialize */
783
+ for (i = 0; i < soe->s_cnt; i++) {
784
+ se = soe->span_enums[i];
785
+ if (se->next(se)) { /* move to first entry */
786
+ pq_push(soe->queue, se);
787
+ }
788
+ }
789
+ soe->first_time = false;
790
+ return soe->queue->size != 0;
791
+ }
792
+
793
+ if (soe->queue->size == 0) {
794
+ return false; /* all done */
795
+ }
796
+
797
+ se = (SpanEnum *)pq_top(soe->queue);
798
+ if (se->next(se)) { /* move to next */
799
+ pq_down(soe->queue);
800
+ return true;
801
+ }
802
+
803
+ pq_pop(soe->queue); /* exhausted a clause */
804
+
805
+ return soe->queue->size != 0;
806
+ }
807
+
808
+ static bool spanoe_skip_to(SpanEnum *self, int target)
809
+ {
810
+ SpanOrEnum *soe = SpOEn(self);
811
+ SpanEnum *se;
812
+ int i;
813
+
814
+ if (soe->first_time) { /* first time -- initialize */
815
+ for (i = 0; i < soe->s_cnt; i++) {
816
+ se = soe->span_enums[i];
817
+ if (se->skip_to(se, target)) {/* move to target */
818
+ pq_push(soe->queue, se);
819
+ }
820
+ }
821
+ soe->first_time = false;
822
+ }
823
+ else {
824
+ while ((soe->queue->size != 0) &&
825
+ ((se = (SpanEnum *)pq_top(soe->queue))->doc(se) < target)) {
826
+ if (se->skip_to(se, target)) {
827
+ pq_down(soe->queue);
828
+ }
829
+ else {
830
+ pq_pop(soe->queue);
831
+ }
832
+ }
833
+ }
834
+
835
+ return soe->queue->size != 0;
836
+ }
837
+
838
+ #define SpOEn_Top_SE(self) (SpanEnum *)pq_top(SpOEn(self)->queue)
839
+
840
+ static int spanoe_doc(SpanEnum *self)
841
+ {
842
+ SpanEnum *se = SpOEn_Top_SE(self);
843
+ return se->doc(se);
844
+ }
845
+
846
+ static int spanoe_start(SpanEnum *self)
847
+ {
848
+ SpanEnum *se = SpOEn_Top_SE(self);
849
+ return se->start(se);
850
+ }
851
+
852
+ static int spanoe_end(SpanEnum *self)
853
+ {
854
+ SpanEnum *se = SpOEn_Top_SE(self);
855
+ return se->end(se);
856
+ }
857
+
858
+ static char *spanoe_to_s(SpanEnum *self)
859
+ {
860
+ SpanOrEnum *soe = SpOEn(self);
861
+ char *field = SpQ(self->query)->field;
862
+ char *query_str = self->query->to_s(self->query, field);
863
+ char doc_str[62];
864
+ size_t len = strlen(query_str);
865
+ char *str = ALLOC_N(char, len + 80);
866
+
867
+ if (soe->first_time) {
868
+ sprintf(doc_str, "START");
869
+ }
870
+ else {
871
+ if (soe->queue->size == 0) {
872
+ sprintf(doc_str, "END");
873
+ }
874
+ else {
875
+ sprintf(doc_str, "%d:%d-%d", self->doc(self),
876
+ self->start(self), self->end(self));
877
+ }
878
+ }
879
+ sprintf("SpanOrEnum(%s)@%s", query_str, doc_str);
880
+ free(query_str);
881
+ return str;
882
+ }
883
+
884
+ static void spanoe_destroy(SpanEnum *self)
885
+ {
886
+ SpanEnum *se;
887
+ SpanOrEnum *soe = SpOEn(self);
888
+ int i;
889
+ pq_destroy(soe->queue);
890
+ for (i = 0; i < soe->s_cnt; i++) {
891
+ se = soe->span_enums[i];
892
+ se->destroy(se);
893
+ }
894
+ free(soe->span_enums);
895
+ free(self);
896
+ }
897
+
898
+ SpanEnum *spanoe_new(Query *query, IndexReader *ir)
899
+ {
900
+ Query *clause;
901
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanOrEnum));
902
+ SpanOrQuery *soq = SpOQ(query);
903
+ int i;
904
+
905
+ SpOEn(self)->first_time = true;
906
+ SpOEn(self)->s_cnt = soq->c_cnt;
907
+ SpOEn(self)->span_enums = ALLOC_N(SpanEnum *, SpOEn(self)->s_cnt);
908
+
909
+ for (i = 0; i < SpOEn(self)->s_cnt; i++) {
910
+ clause = soq->clauses[i];
911
+ SpOEn(self)->span_enums[i] = SpQ(clause)->get_spans(clause, ir);
912
+ }
913
+
914
+ SpOEn(self)->queue = pq_new(SpOEn(self)->s_cnt, (lt_ft)&span_less_than,
915
+ (free_ft)NULL);
916
+
917
+ self->query = query;
918
+ self->next = &spanoe_next;
919
+ self->skip_to = &spanoe_skip_to;
920
+ self->doc = &spanoe_doc;
921
+ self->start = &spanoe_start;
922
+ self->end = &spanoe_end;
923
+ self->destroy = &spanoe_destroy;
924
+ self->to_s = &spanoe_to_s;
925
+
926
+ return self;
927
+ }
928
+
929
+ /*****************************************************************************
930
+ * SpanNearEnum
931
+ *****************************************************************************/
932
+
933
+ #define SpNEn(span_enum) ((SpanNearEnum *)(span_enum))
934
+ #define SpNQ(query) ((SpanNearQuery *)(query))
935
+
936
+ typedef struct SpanNearEnum
937
+ {
938
+ SpanEnum super;
939
+ SpanEnum **span_enums;
940
+ int s_cnt;
941
+ int slop;
942
+ int current;
943
+ int doc;
944
+ int start;
945
+ int end;
946
+ bool first_time : 1;
947
+ bool in_order : 1;
948
+ } SpanNearEnum;
949
+
950
+
951
+ #define SpNEn_NEXT() do {\
952
+ sne->current = (sne->current+1) % sne->s_cnt;\
953
+ se = sne->span_enums[sne->current];\
954
+ } while (0);
955
+
956
+ static bool sne_init(SpanNearEnum *sne)
957
+ {
958
+ SpanEnum *se = sne->span_enums[sne->current];
959
+ int prev_doc = se->doc(se);
960
+ int i;
961
+
962
+ for (i = 1; i < sne->s_cnt; i++) {
963
+ SpNEn_NEXT();
964
+ if (!se->skip_to(se, prev_doc)) {
965
+ return false;
966
+ }
967
+ prev_doc = se->doc(se);
968
+ }
969
+ return true;
970
+ }
971
+
972
+ static bool sne_goto_next_doc(SpanNearEnum *sne)
973
+ {
974
+ SpanEnum *se = sne->span_enums[sne->current];
975
+ int prev_doc = se->doc(se);
976
+
977
+ SpNEn_NEXT();
978
+
979
+ while (se->doc(se) < prev_doc) {
980
+ if (! se->skip_to(se, prev_doc)) {
981
+ return false;
982
+ }
983
+ prev_doc = se->doc(se);
984
+ SpNEn_NEXT();
985
+ }
986
+ return true;
987
+ }
988
+
989
+ static bool sne_next_unordered_match(SpanEnum *self)
990
+ {
991
+ SpanNearEnum *sne = SpNEn(self);
992
+ SpanEnum *se, *min_se = NULL;
993
+ int i;
994
+ int max_end, end, min_start, start, doc;
995
+ int lengths_sum;
996
+
997
+ while (true) {
998
+ max_end = 0;
999
+ min_start = INT_MAX;
1000
+ lengths_sum = 0;
1001
+
1002
+ for (i = 0; i < sne->s_cnt; i++) {
1003
+ se = sne->span_enums[i];
1004
+ if ((end=se->end(se)) > max_end) {
1005
+ max_end = end;
1006
+ }
1007
+ if ((start=se->start(se)) < min_start) {
1008
+ min_start = start;
1009
+ min_se = se;
1010
+ sne->current = i; /* current should point to the minimum span */
1011
+ }
1012
+ lengths_sum += end - start;
1013
+ }
1014
+
1015
+ if ((max_end - min_start - lengths_sum) <= sne->slop) {
1016
+ /* we have a match */
1017
+ sne->start = min_start;
1018
+ sne->end = max_end;
1019
+ sne->doc = min_se->doc(min_se);
1020
+ return true;
1021
+ }
1022
+
1023
+ /* increment the minimum span_enum and try again */
1024
+ doc = min_se->doc(min_se);
1025
+ if (!min_se->next(min_se)) {
1026
+ return false;
1027
+ }
1028
+ if (doc < min_se->doc(min_se)) {
1029
+ if (!sne_goto_next_doc(sne)) return false;
1030
+ }
1031
+ }
1032
+ }
1033
+
1034
+ static bool sne_next_ordered_match(SpanEnum *self)
1035
+ {
1036
+ SpanNearEnum *sne = SpNEn(self);
1037
+ SpanEnum *se;
1038
+ int i;
1039
+ int prev_doc, prev_start, prev_end;
1040
+ int doc=0, start=0, end=0;
1041
+ int lengths_sum;
1042
+
1043
+ while (true) {
1044
+ se = sne->span_enums[0];
1045
+
1046
+ prev_doc = se->doc(se);
1047
+ sne->start = prev_start = se->start(se);
1048
+ prev_end = se->end(se);
1049
+
1050
+ i = 1;
1051
+ lengths_sum = prev_end - prev_start;
1052
+
1053
+ while (i < sne->s_cnt) {
1054
+ se = sne->span_enums[i];
1055
+ doc = se->doc(se);
1056
+ start = se->start(se);
1057
+ end = se->end(se);
1058
+ while ((doc == prev_doc) && ((start < prev_start) ||
1059
+ ((start == prev_start) && (end < prev_end)))) {
1060
+ if (!se->next(se)) {
1061
+ return false;
1062
+ }
1063
+ doc = se->doc(se);
1064
+ start = se->start(se);
1065
+ end = se->end(se);
1066
+ }
1067
+ if (doc != prev_doc) {
1068
+ sne->current = i;
1069
+ if (!sne_goto_next_doc(sne)) {
1070
+ return false;
1071
+ }
1072
+ break;
1073
+ }
1074
+ i++;
1075
+ lengths_sum += end - start;
1076
+ prev_doc = doc;
1077
+ prev_start = start;
1078
+ prev_end = end;
1079
+ }
1080
+ if (i == sne->s_cnt) {
1081
+ if ((end - sne->start - lengths_sum) <= sne->slop) {
1082
+ /* we have a match */
1083
+ sne->end = end;
1084
+ sne->doc = doc;
1085
+
1086
+ /* the minimum span is always the first span so it needs to be
1087
+ * incremented next time around */
1088
+ sne->current = 0;
1089
+ return true;
1090
+
1091
+ }
1092
+ else {
1093
+ se = sne->span_enums[0];
1094
+ if (!se->next(se)) {
1095
+ return false;
1096
+ }
1097
+ if (se->doc(se) != prev_doc) {
1098
+ sne->current = 0;
1099
+ if (!sne_goto_next_doc(sne)) {
1100
+ return false;
1101
+ }
1102
+ }
1103
+ }
1104
+ }
1105
+ }
1106
+ }
1107
+
1108
+ static bool sne_next_match(SpanEnum *self)
1109
+ {
1110
+ SpanNearEnum *sne = SpNEn(self);
1111
+ SpanEnum *se_curr, *se_next;
1112
+
1113
+ if (!sne->first_time) {
1114
+ if (!sne_init(sne)) {
1115
+ return false;
1116
+ }
1117
+ sne->first_time = false;
1118
+ }
1119
+ se_curr = sne->span_enums[sne->current];
1120
+ se_next = sne->span_enums[(sne->current+1)%sne->s_cnt];
1121
+ if (se_curr->doc(se_curr) > se_next->doc(se_next)) {
1122
+ if (!sne_goto_next_doc(sne)) {
1123
+ return false;
1124
+ }
1125
+ }
1126
+
1127
+ if (sne->in_order) {
1128
+ return sne_next_ordered_match(self);
1129
+ }
1130
+ else {
1131
+ return sne_next_unordered_match(self);
1132
+ }
1133
+ }
1134
+
1135
+ static bool spanne_next(SpanEnum *self)
1136
+ {
1137
+ SpanNearEnum *sne = SpNEn(self);
1138
+ SpanEnum *se;
1139
+
1140
+ se = sne->span_enums[sne->current];
1141
+ if (!se->next(se)) return false;
1142
+
1143
+ return sne_next_match(self);
1144
+ }
1145
+
1146
+ static bool spanne_skip_to(SpanEnum *self, int target)
1147
+ {
1148
+ SpanEnum *se = SpNEn(self)->span_enums[SpNEn(self)->current];
1149
+ if (!se->skip_to(se, target)) {
1150
+ return false;
1151
+ }
1152
+
1153
+ return sne_next_match(self);
1154
+ }
1155
+
1156
+ static int spanne_doc(SpanEnum *self)
1157
+ {
1158
+ return SpNEn(self)->doc;
1159
+ }
1160
+
1161
+ static int spanne_start(SpanEnum *self)
1162
+ {
1163
+ return SpNEn(self)->start;
1164
+ }
1165
+
1166
+ static int spanne_end(SpanEnum *self)
1167
+ {
1168
+ return SpNEn(self)->end;
1169
+ }
1170
+
1171
+ static char *spanne_to_s(SpanEnum *self)
1172
+ {
1173
+ SpanNearEnum *sne = SpNEn(self);
1174
+ char *field = SpQ(self->query)->field;
1175
+ char *query_str = self->query->to_s(self->query, field);
1176
+ char doc_str[62];
1177
+ size_t len = strlen(query_str);
1178
+ char *str = ALLOC_N(char, len + 80);
1179
+
1180
+ if (sne->first_time) {
1181
+ sprintf(doc_str, "START");
1182
+ }
1183
+ else {
1184
+ sprintf(doc_str, "%d:%d-%d", self->doc(self),
1185
+ self->start(self), self->end(self));
1186
+ }
1187
+ sprintf("SpanNearEnum(%s)@%s", query_str, doc_str);
1188
+ free(query_str);
1189
+ return str;
1190
+ }
1191
+
1192
+ static void spanne_destroy(SpanEnum *self)
1193
+ {
1194
+ SpanEnum *se;
1195
+ SpanNearEnum *sne = SpNEn(self);
1196
+ int i;
1197
+ for (i = 0; i < sne->s_cnt; i++) {
1198
+ se = sne->span_enums[i];
1199
+ se->destroy(se);
1200
+ }
1201
+ free(sne->span_enums);
1202
+ free(self);
1203
+ }
1204
+
1205
+ static SpanEnum *spanne_new(Query *query, IndexReader *ir)
1206
+ {
1207
+ int i;
1208
+ Query *clause;
1209
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanNearEnum));
1210
+ SpanNearQuery *snq = SpNQ(query);
1211
+
1212
+ SpNEn(self)->first_time = true;
1213
+ SpNEn(self)->in_order = snq->in_order;
1214
+ SpNEn(self)->slop = snq->slop;
1215
+ SpNEn(self)->s_cnt = snq->c_cnt;
1216
+ SpNEn(self)->span_enums = ALLOC_N(SpanEnum *, SpNEn(self)->s_cnt);
1217
+
1218
+ for (i = 0; i < SpNEn(self)->s_cnt; i++) {
1219
+ clause = snq->clauses[i];
1220
+ SpNEn(self)->span_enums[i] = SpQ(clause)->get_spans(clause, ir);
1221
+ }
1222
+ SpNEn(self)->current = 0;
1223
+
1224
+ SpNEn(self)->doc = -1;
1225
+ SpNEn(self)->start = -1;
1226
+ SpNEn(self)->end = -1;
1227
+
1228
+ self->query = query;
1229
+ self->next = &spanne_next;
1230
+ self->skip_to = &spanne_skip_to;
1231
+ self->doc = &spanne_doc;
1232
+ self->start = &spanne_start;
1233
+ self->end = &spanne_end;
1234
+ self->destroy = &spanne_destroy;
1235
+ self->to_s = &spanne_to_s;
1236
+
1237
+ return self;
1238
+ }
1239
+
1240
+ /*****************************************************************************
1241
+ *
1242
+ * SpanNotEnum
1243
+ *
1244
+ *****************************************************************************/
1245
+
1246
+ #define SpXEn(span_enum) ((SpanNotEnum *)(span_enum))
1247
+ #define SpXQ(query) ((SpanNotQuery *)(query))
1248
+
1249
+ typedef struct SpanNotEnum
1250
+ {
1251
+ SpanEnum super;
1252
+ SpanEnum *inc;
1253
+ SpanEnum *exc;
1254
+ bool more_inc : 1;
1255
+ bool more_exc : 1;
1256
+ } SpanNotEnum;
1257
+
1258
+
1259
+ static bool spanxe_next(SpanEnum *self)
1260
+ {
1261
+ SpanNotEnum *sxe = SpXEn(self);
1262
+ SpanEnum *inc = sxe->inc, *exc = sxe->exc;
1263
+ if (sxe->more_inc) { /* move to next incl */
1264
+ sxe->more_inc = inc->next(inc);
1265
+ }
1266
+
1267
+ while (sxe->more_inc && sxe->more_exc) {
1268
+ if (inc->doc(inc) > exc->doc(exc)) { /* skip excl */
1269
+ sxe->more_exc = exc->skip_to(exc, inc->doc(inc));
1270
+ }
1271
+
1272
+ while (sxe->more_exc /* while excl is before */
1273
+ && (inc->doc(inc) == exc->doc(exc))
1274
+ && (exc->end(exc) <= inc->start(inc))) {
1275
+ sxe->more_exc = exc->next(exc); /* increment excl */
1276
+ }
1277
+
1278
+ if (! sxe->more_exc || /* if no intersection */
1279
+ (inc->doc(inc) != exc->doc(exc)) ||
1280
+ inc->end(inc) <= exc->start(exc)) {
1281
+ break; /* we found a match */
1282
+ }
1283
+
1284
+ sxe->more_inc = inc->next(inc); /* intersected: keep scanning */
1285
+ }
1286
+ return sxe->more_inc;
1287
+ }
1288
+
1289
+ static bool spanxe_skip_to(SpanEnum *self, int target)
1290
+ {
1291
+ SpanNotEnum *sxe = SpXEn(self);
1292
+ SpanEnum *inc = sxe->inc, *exc = sxe->exc;
1293
+ int doc;
1294
+
1295
+ if (sxe->more_inc) { /* move to next incl */
1296
+ if (!(sxe->more_inc=sxe->inc->skip_to(sxe->inc, target))) return false;
1297
+ }
1298
+
1299
+ if (sxe->more_inc && ((doc=inc->doc(inc)) > exc->doc(exc))) {
1300
+ sxe->more_exc = exc->skip_to(exc, doc);
1301
+ }
1302
+
1303
+ while (sxe->more_exc /* while excl is before */
1304
+ && inc->doc(inc) == exc->doc(exc)
1305
+ && exc->end(exc) <= inc->start(inc)) {
1306
+ sxe->more_exc = exc->next(exc); /* increment excl */
1307
+ }
1308
+
1309
+ if (!sxe->more_exc || /* if no intersection */
1310
+ inc->doc(inc) != exc->doc(exc) ||
1311
+ inc->end(inc) <= exc->start(exc)) {
1312
+ return true; /* we found a match */
1313
+ }
1314
+
1315
+ return spanxe_next(self); /* scan to next match */
1316
+ }
1317
+
1318
+ static int spanxe_doc(SpanEnum *self)
1319
+ {
1320
+ SpanEnum *inc = SpXEn(self)->inc;
1321
+ return inc->doc(inc);
1322
+ }
1323
+
1324
+ static int spanxe_start(SpanEnum *self)
1325
+ {
1326
+ SpanEnum *inc = SpXEn(self)->inc;
1327
+ return inc->start(inc);
1328
+ }
1329
+
1330
+ static int spanxe_end(SpanEnum *self)
1331
+ {
1332
+ SpanEnum *inc = SpXEn(self)->inc;
1333
+ return inc->end(inc);
1334
+ }
1335
+
1336
+ static char *spanxe_to_s(SpanEnum *self)
1337
+ {
1338
+ char *field = SpQ(self->query)->field;
1339
+ char *query_str = self->query->to_s(self->query, field);
1340
+ char *res = strfmt("SpanNotEnum(%s)", query_str);
1341
+ free(query_str);
1342
+ return res;
1343
+ }
1344
+
1345
+ static void spanxe_destroy(SpanEnum *self)
1346
+ {
1347
+ SpanNotEnum *sxe = SpXEn(self);
1348
+ sxe->inc->destroy(sxe->inc);
1349
+ sxe->exc->destroy(sxe->exc);
1350
+ free(self);
1351
+ }
1352
+
1353
+ static SpanEnum *spanxe_new(Query *query, IndexReader *ir)
1354
+ {
1355
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanNotEnum));
1356
+ SpanNotEnum *sxe = SpXEn(self);
1357
+ SpanNotQuery *sxq = SpXQ(query);
1358
+
1359
+ sxe->inc = SpQ(sxq->inc)->get_spans(sxq->inc, ir);
1360
+ sxe->exc = SpQ(sxq->exc)->get_spans(sxq->exc, ir);
1361
+ sxe->more_inc = true;
1362
+ sxe->more_exc = sxe->exc->next(sxe->exc);
1363
+
1364
+ self->query = query;
1365
+ self->next = &spanxe_next;
1366
+ self->skip_to = &spanxe_skip_to;
1367
+ self->doc = &spanxe_doc;
1368
+ self->start = &spanxe_start;
1369
+ self->end = &spanxe_end;
1370
+ self->destroy = &spanxe_destroy;
1371
+ self->to_s = &spanxe_to_s;
1372
+
1373
+ return self;
1374
+ }
1375
+
1376
+ /*****************************************************************************
1377
+ *
1378
+ * SpanWeight
1379
+ *
1380
+ *****************************************************************************/
1381
+
1382
+ #define SpW(weight) ((SpanWeight *)(weight))
1383
+ typedef struct SpanWeight
1384
+ {
1385
+ Weight super;
1386
+ HashSet *terms;
1387
+ } SpanWeight;
1388
+
1389
+ static Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
1390
+ {
1391
+ Explanation *expl;
1392
+ Explanation *idf_expl1;
1393
+ Explanation *idf_expl2;
1394
+ Explanation *query_expl;
1395
+ Explanation *qnorm_expl;
1396
+ Explanation *field_expl;
1397
+ Explanation *tf_expl;
1398
+ Scorer *scorer;
1399
+ uchar *field_norms;
1400
+ float field_norm;
1401
+ Explanation *field_norm_expl;
1402
+
1403
+ char *query_str;
1404
+ HashSet *terms = SpW(self)->terms;
1405
+ char *field = SpQ(self->query)->field;
1406
+ const int field_num = fis_get_field_num(ir->fis, field);
1407
+ char *doc_freqs = NULL;
1408
+ size_t df_i = 0;
1409
+ int i;
1410
+
1411
+ if (field_num < 0) {
1412
+ return expl_new(0.0, "field \"%s\" does not exist in the index", field);
1413
+ }
1414
+
1415
+ query_str = self->query->to_s(self->query, "");
1416
+
1417
+ for (i = 0; i < terms->size; i++) {
1418
+ char *term = (char *)terms->elems[i];
1419
+ REALLOC_N(doc_freqs, char, df_i + strlen(term) + 23);
1420
+ sprintf(doc_freqs + df_i, "%s=%d, ", term,
1421
+ ir->doc_freq(ir, field_num, term));
1422
+ df_i = strlen(doc_freqs);
1423
+ }
1424
+ /* remove the ',' at the end of the string if it exists */
1425
+ if (terms->size > 0) {
1426
+ df_i -= 2;
1427
+ doc_freqs[df_i] = '\0';
1428
+ }
1429
+ else {
1430
+ doc_freqs = "";
1431
+ }
1432
+
1433
+ expl = expl_new(0.0, "weight(%s in %d), product of:", query_str, target);
1434
+
1435
+ /* We need two of these as it's included in both the query explanation
1436
+ * and the field explanation */
1437
+ idf_expl1 = expl_new(self->idf, "idf(%s: %s)", field, doc_freqs);
1438
+ idf_expl2 = expl_new(self->idf, "idf(%s: %s)", field, doc_freqs);
1439
+ if (terms->size > 0) {
1440
+ free(doc_freqs); /* only free if allocated */
1441
+ }
1442
+
1443
+ /* explain query weight */
1444
+ query_expl = expl_new(0.0, "query_weight(%s), product of:", query_str);
1445
+
1446
+ if (self->query->boost != 1.0) {
1447
+ expl_add_detail(query_expl, expl_new(self->query->boost, "boost"));
1448
+ }
1449
+
1450
+ expl_add_detail(query_expl, idf_expl1);
1451
+
1452
+ qnorm_expl = expl_new(self->qnorm, "query_norm");
1453
+ expl_add_detail(query_expl, qnorm_expl);
1454
+
1455
+ query_expl->value = self->query->boost * idf_expl1->value * qnorm_expl->value;
1456
+
1457
+ expl_add_detail(expl, query_expl);
1458
+
1459
+ /* explain field weight */
1460
+ field_expl = expl_new(0.0, "field_weight(%s:%s in %d), product of:",
1461
+ field, query_str, target);
1462
+ free(query_str);
1463
+
1464
+ scorer = self->scorer(self, ir);
1465
+ tf_expl = scorer->explain(scorer, target);
1466
+ scorer->destroy(scorer);
1467
+ expl_add_detail(field_expl, tf_expl);
1468
+ expl_add_detail(field_expl, idf_expl2);
1469
+
1470
+ field_norms = ir->get_norms(ir, field_num);
1471
+ field_norm = (field_norms
1472
+ ? sim_decode_norm(self->similarity, field_norms[target])
1473
+ : (float)0.0);
1474
+ field_norm_expl = expl_new(field_norm, "field_norm(field=%s, doc=%d)",
1475
+ field, target);
1476
+ expl_add_detail(field_expl, field_norm_expl);
1477
+
1478
+ field_expl->value = tf_expl->value * idf_expl2->value * field_norm_expl->value;
1479
+
1480
+ /* combine them */
1481
+ if (query_expl->value == 1.0) {
1482
+ expl_destroy(expl);
1483
+ return field_expl;
1484
+ }
1485
+ else {
1486
+ expl->value = (query_expl->value * field_expl->value);
1487
+ expl_add_detail(expl, field_expl);
1488
+ return expl;
1489
+ }
1490
+ }
1491
+
1492
+ static char *spanw_to_s(Weight *self)
1493
+ {
1494
+ return strfmt("SpanWeight(%f)", self->value);
1495
+ }
1496
+
1497
+ static void spanw_destroy(Weight *self)
1498
+ {
1499
+ hs_destroy(SpW(self)->terms);
1500
+ w_destroy(self);
1501
+ }
1502
+
1503
+ static Weight *spanw_new(Query *query, Searcher *searcher)
1504
+ {
1505
+ int i;
1506
+ Weight *self = w_new(SpanWeight, query);
1507
+ HashSet *terms = SpQ(query)->get_terms(query);
1508
+
1509
+ SpW(self)->terms = terms;
1510
+ self->scorer = &spansc_new;
1511
+ self->explain = &spanw_explain;
1512
+ self->to_s = &spanw_to_s;
1513
+ self->destroy = &spanw_destroy;
1514
+
1515
+ self->similarity = query->get_similarity(query, searcher);
1516
+
1517
+ self->idf = 0.0;
1518
+
1519
+ for (i = terms->size - 1; i >= 0; i--) {
1520
+ self->idf += sim_idf_term(self->similarity, SpQ(query)->field,
1521
+ (char *)terms->elems[i], searcher);
1522
+ }
1523
+
1524
+ return self;
1525
+ }
1526
+
1527
+ /*****************************************************************************
1528
+ * SpanTermQuery
1529
+ *****************************************************************************/
1530
+
1531
+ static char *spantq_to_s(Query *self, const char *field)
1532
+ {
1533
+ if (field == SpQ(self)->field) {
1534
+ return strfmt("span_terms(%s)", SpTQ(self)->term);
1535
+ }
1536
+ else {
1537
+ return strfmt("span_terms(%s:%s)", SpQ(self)->field, SpTQ(self)->term);
1538
+ }
1539
+ }
1540
+
1541
+ static void spantq_destroy_i(Query *self)
1542
+ {
1543
+ free(SpTQ(self)->term);
1544
+ free(SpQ(self)->field);
1545
+ spanq_destroy_i(self);
1546
+ }
1547
+
1548
+ static void spantq_extract_terms(Query *self, HashSet *terms)
1549
+ {
1550
+ hs_add(terms, term_new(SpQ(self)->field, SpTQ(self)->term));
1551
+ }
1552
+
1553
+ static HashSet *spantq_get_terms(Query *self)
1554
+ {
1555
+ HashSet *terms = hs_new_str(&free);
1556
+ hs_add(terms, estrdup(SpTQ(self)->term));
1557
+ return terms;
1558
+ }
1559
+
1560
+ static unsigned long spantq_hash(Query *self)
1561
+ {
1562
+ return spanq_hash(self) ^ str_hash(SpTQ(self)->term);
1563
+ }
1564
+
1565
+ static int spantq_eq(Query *self, Query *o)
1566
+ {
1567
+ return spanq_eq(self, o) && strcmp(SpTQ(self)->term, SpTQ(o)->term) == 0;
1568
+ }
1569
+
1570
+ Query *spantq_new(const char *field, const char *term)
1571
+ {
1572
+ Query *self = q_new(SpanTermQuery);
1573
+
1574
+ SpTQ(self)->term = estrdup(term);
1575
+ SpQ(self)->field = estrdup(field);
1576
+ SpQ(self)->get_spans = &spante_new;
1577
+ SpQ(self)->get_terms = &spantq_get_terms;
1578
+
1579
+ self->type = SPAN_TERM_QUERY;
1580
+ self->extract_terms = &spantq_extract_terms;
1581
+ self->to_s = &spantq_to_s;
1582
+ self->hash = &spantq_hash;
1583
+ self->eq = &spantq_eq;
1584
+ self->destroy_i = &spantq_destroy_i;
1585
+ self->create_weight_i = &spanw_new;
1586
+ self->get_matchv_i = &spanq_get_matchv_i;
1587
+ return self;
1588
+ }
1589
+
1590
+ /*****************************************************************************
1591
+ * SpanMultiTermQuery
1592
+ *****************************************************************************/
1593
+
1594
+ static char *spanmtq_to_s(Query *self, const char *field)
1595
+ {
1596
+ char *terms = NULL, *p;
1597
+ int len = 2, i;
1598
+ SpanMultiTermQuery *smtq = SpMTQ(self);
1599
+ for (i = 0; i < smtq->term_cnt; i++) {
1600
+ len += strlen(smtq->terms[i]) + 2;
1601
+ }
1602
+ p = terms = ALLOC_N(char, len);
1603
+ *(p++) = '[';
1604
+ for (i = 0; i < smtq->term_cnt; i++) {
1605
+ strcpy(p, smtq->terms[i]);
1606
+ p += strlen(smtq->terms[i]);
1607
+ *(p++) = ',';
1608
+ }
1609
+ if (p > terms) p--;
1610
+ *(p++) = ']';
1611
+ *p = '\0';
1612
+
1613
+ if (field == SpQ(self)->field) {
1614
+ p = strfmt("span_terms(%s)", terms);
1615
+ }
1616
+ else {
1617
+ p = strfmt("span_terms(%s:%s)", SpQ(self)->field, terms);
1618
+ }
1619
+ free(terms);
1620
+ return p;
1621
+ }
1622
+
1623
+ static void spanmtq_destroy_i(Query *self)
1624
+ {
1625
+ SpanMultiTermQuery *smtq = SpMTQ(self);
1626
+ int i;
1627
+ for (i = 0; i < smtq->term_cnt; i++) {
1628
+ free(smtq->terms[i]);
1629
+ }
1630
+ free(smtq->terms);
1631
+ free(SpQ(self)->field);
1632
+ spanq_destroy_i(self);
1633
+ }
1634
+
1635
+ static void spanmtq_extract_terms(Query *self, HashSet *terms)
1636
+ {
1637
+ SpanMultiTermQuery *smtq = SpMTQ(self);
1638
+ int i;
1639
+ for (i = 0; i < smtq->term_cnt; i++) {
1640
+ hs_add(terms, term_new(SpQ(self)->field, smtq->terms[i]));
1641
+ }
1642
+ }
1643
+
1644
+ static HashSet *spanmtq_get_terms(Query *self)
1645
+ {
1646
+ HashSet *terms = hs_new_str(&free);
1647
+ SpanMultiTermQuery *smtq = SpMTQ(self);
1648
+ int i;
1649
+ for (i = 0; i < smtq->term_cnt; i++) {
1650
+ hs_add(terms, estrdup(smtq->terms[i]));
1651
+ }
1652
+ return terms;
1653
+ }
1654
+
1655
+ static unsigned long spanmtq_hash(Query *self)
1656
+ {
1657
+ unsigned long hash = spanq_hash(self);
1658
+ SpanMultiTermQuery *smtq = SpMTQ(self);
1659
+ int i;
1660
+ for (i = 0; i < smtq->term_cnt; i++) {
1661
+ hash ^= str_hash(smtq->terms[i]);
1662
+ }
1663
+ return hash;
1664
+ }
1665
+
1666
+ static int spanmtq_eq(Query *self, Query *o)
1667
+ {
1668
+ SpanMultiTermQuery *smtq = SpMTQ(self);
1669
+ SpanMultiTermQuery *smtqo = SpMTQ(o);
1670
+ int i;
1671
+ if (!spanq_eq(self, o)) return false;
1672
+ if (smtq->term_cnt != smtqo->term_cnt) return false;
1673
+ for (i = 0; i < smtq->term_cnt; i++) {
1674
+ if (strcmp(smtq->terms[i], smtqo->terms[i]) != 0) return false;
1675
+ }
1676
+ return true;;
1677
+ }
1678
+
1679
+ Query *spanmtq_new_conf(const char *field, int max_terms)
1680
+ {
1681
+ Query *self = q_new(SpanMultiTermQuery);
1682
+
1683
+ SpMTQ(self)->terms = ALLOC_N(char *, max_terms);
1684
+ SpMTQ(self)->term_cnt = 0;
1685
+ SpMTQ(self)->term_capa = max_terms;
1686
+
1687
+ SpQ(self)->field = estrdup(field);
1688
+ SpQ(self)->get_spans = &spanmte_new;
1689
+ SpQ(self)->get_terms = &spanmtq_get_terms;
1690
+
1691
+ self->type = SPAN_MULTI_TERM_QUERY;
1692
+ self->extract_terms = &spanmtq_extract_terms;
1693
+ self->to_s = &spanmtq_to_s;
1694
+ self->hash = &spanmtq_hash;
1695
+ self->eq = &spanmtq_eq;
1696
+ self->destroy_i = &spanmtq_destroy_i;
1697
+ self->create_weight_i = &spanw_new;
1698
+ self->get_matchv_i = &spanq_get_matchv_i;
1699
+
1700
+ return self;
1701
+ }
1702
+
1703
+ Query *spanmtq_new(const char *field)
1704
+ {
1705
+ return spanmtq_new_conf(field, SPAN_MULTI_TERM_QUERY_CAPA);
1706
+ }
1707
+
1708
+ void spanmtq_add_term(Query *self, const char *term)
1709
+ {
1710
+ SpanMultiTermQuery *smtq = SpMTQ(self);
1711
+ if (smtq->term_cnt < smtq->term_capa) {
1712
+ smtq->terms[smtq->term_cnt++] = estrdup(term);
1713
+ }
1714
+ }
1715
+
1716
+ /*****************************************************************************
1717
+ *
1718
+ * SpanFirstQuery
1719
+ *
1720
+ *****************************************************************************/
1721
+
1722
+ static char *spanfq_to_s(Query *self, const char *field)
1723
+ {
1724
+ Query *match = SpFQ(self)->match;
1725
+ char *q_str = match->to_s(match, field);
1726
+ char *res = strfmt("span_first(%s, %d)", q_str, SpFQ(self)->end);
1727
+ free(q_str);
1728
+ return res;
1729
+ }
1730
+
1731
+ static void spanfq_extract_terms(Query *self, HashSet *terms)
1732
+ {
1733
+ SpFQ(self)->match->extract_terms(SpFQ(self)->match, terms);
1734
+ }
1735
+
1736
+ static HashSet *spanfq_get_terms(Query *self)
1737
+ {
1738
+ SpanFirstQuery *sfq = SpFQ(self);
1739
+ return SpQ(sfq->match)->get_terms(sfq->match);
1740
+ }
1741
+
1742
+ static Query *spanfq_rewrite(Query *self, IndexReader *ir)
1743
+ {
1744
+ Query *q, *rq;
1745
+
1746
+ q = SpFQ(self)->match;
1747
+ rq = q->rewrite(q, ir);
1748
+ q_deref(q);
1749
+ SpFQ(self)->match = rq;
1750
+
1751
+ self->ref_cnt++;
1752
+ return self; /* no clauses rewrote */
1753
+ }
1754
+
1755
+ static void spanfq_destroy_i(Query *self)
1756
+ {
1757
+ q_deref(SpFQ(self)->match);
1758
+ free(SpQ(self)->field);
1759
+ spanq_destroy_i(self);
1760
+ }
1761
+
1762
+ static unsigned long spanfq_hash(Query *self)
1763
+ {
1764
+ return spanq_hash(self) ^ SpFQ(self)->match->hash(SpFQ(self)->match)
1765
+ ^ SpFQ(self)->end;
1766
+ }
1767
+
1768
+ static int spanfq_eq(Query *self, Query *o)
1769
+ {
1770
+ SpanFirstQuery *sfq1 = SpFQ(self);
1771
+ SpanFirstQuery *sfq2 = SpFQ(o);
1772
+ return spanq_eq(self, o) && sfq1->match->eq(sfq1->match, sfq2->match)
1773
+ && (sfq1->end == sfq2->end);
1774
+ }
1775
+
1776
+ Query *spanfq_new_nr(Query *match, int end)
1777
+ {
1778
+ Query *self = q_new(SpanFirstQuery);
1779
+
1780
+ SpFQ(self)->match = match;
1781
+ SpFQ(self)->end = end;
1782
+
1783
+ SpQ(self)->field = estrdup(SpQ(match)->field);
1784
+ SpQ(self)->get_spans = &spanfe_new;
1785
+ SpQ(self)->get_terms = &spanfq_get_terms;
1786
+
1787
+ self->type = SPAN_FIRST_QUERY;
1788
+ self->rewrite = &spanfq_rewrite;
1789
+ self->extract_terms = &spanfq_extract_terms;
1790
+ self->to_s = &spanfq_to_s;
1791
+ self->hash = &spanfq_hash;
1792
+ self->eq = &spanfq_eq;
1793
+ self->destroy_i = &spanfq_destroy_i;
1794
+ self->create_weight_i = &spanw_new;
1795
+ self->get_matchv_i = &spanq_get_matchv_i;
1796
+
1797
+ return self;
1798
+ }
1799
+
1800
+ Query *spanfq_new(Query *match, int end)
1801
+ {
1802
+ REF(match);
1803
+ return spanfq_new_nr(match, end);
1804
+ }
1805
+
1806
+ /*****************************************************************************
1807
+ *
1808
+ * SpanOrQuery
1809
+ *
1810
+ *****************************************************************************/
1811
+
1812
+ static char *spanoq_to_s(Query *self, const char *field)
1813
+ {
1814
+ int i;
1815
+ SpanOrQuery *soq = SpOQ(self);
1816
+ char *res, *res_p;
1817
+ char **q_strs = ALLOC_N(char *, soq->c_cnt);
1818
+ int len = 50;
1819
+ for (i = 0; i < soq->c_cnt; i++) {
1820
+ Query *clause = soq->clauses[i];
1821
+ q_strs[i] = clause->to_s(clause, field);
1822
+ len += strlen(q_strs[i]) + 2;
1823
+ }
1824
+
1825
+ res_p = res = ALLOC_N(char, len);
1826
+ sprintf(res_p, "span_or[ ");
1827
+ res_p += strlen(res_p);
1828
+ for (i = 0; i < soq->c_cnt; i++) {
1829
+ sprintf(res_p, "%s, ", q_strs[i]);
1830
+ free(q_strs[i]);
1831
+ res_p += strlen(res_p);
1832
+ }
1833
+ free(q_strs);
1834
+
1835
+ sprintf(res_p - 2, " ]");
1836
+ return res;
1837
+ }
1838
+
1839
+ static void spanoq_extract_terms(Query *self, HashSet *terms)
1840
+ {
1841
+ SpanOrQuery *soq = SpOQ(self);
1842
+ int i;
1843
+ for (i = 0; i < soq->c_cnt; i++) {
1844
+ Query *clause = soq->clauses[i];
1845
+ clause->extract_terms(clause, terms);
1846
+ }
1847
+ }
1848
+
1849
+ static HashSet *spanoq_get_terms(Query *self)
1850
+ {
1851
+ SpanOrQuery *soq = SpOQ(self);
1852
+ HashSet *terms = hs_new_str(&free);
1853
+ int i;
1854
+ for (i = 0; i < soq->c_cnt; i++) {
1855
+ Query *clause = soq->clauses[i];
1856
+ HashSet *sub_terms = SpQ(clause)->get_terms(clause);
1857
+ hs_merge(terms, sub_terms);
1858
+ }
1859
+
1860
+ return terms;
1861
+ }
1862
+
1863
+ static SpanEnum *spanoq_get_spans(Query *self, IndexReader *ir)
1864
+ {
1865
+ SpanOrQuery *soq = SpOQ(self);
1866
+ if (soq->c_cnt == 1) {
1867
+ Query *q = soq->clauses[0];
1868
+ return SpQ(q)->get_spans(q, ir);
1869
+ }
1870
+
1871
+ return spanoe_new(self, ir);
1872
+ }
1873
+
1874
+ static Query *spanoq_rewrite(Query *self, IndexReader *ir)
1875
+ {
1876
+ SpanOrQuery *soq = SpOQ(self);
1877
+ int i;
1878
+
1879
+ /* replace clauses with their rewritten queries */
1880
+ for (i = 0; i < soq->c_cnt; i++) {
1881
+ Query *clause = soq->clauses[i];
1882
+ Query *rewritten = clause->rewrite(clause, ir);
1883
+ q_deref(clause);
1884
+ soq->clauses[i] = rewritten;
1885
+ }
1886
+
1887
+ self->ref_cnt++;
1888
+ return self;
1889
+ }
1890
+
1891
+ static void spanoq_destroy_i(Query *self)
1892
+ {
1893
+ SpanOrQuery *soq = SpOQ(self);
1894
+
1895
+ int i;
1896
+ for (i = 0; i < soq->c_cnt; i++) {
1897
+ Query *clause = soq->clauses[i];
1898
+ q_deref(clause);
1899
+ }
1900
+ free(soq->clauses);
1901
+ free(SpQ(self)->field);
1902
+
1903
+ spanq_destroy_i(self);
1904
+ }
1905
+
1906
+ static unsigned long spanoq_hash(Query *self)
1907
+ {
1908
+ int i;
1909
+ unsigned long hash = spanq_hash(self);
1910
+ SpanOrQuery *soq = SpOQ(self);
1911
+
1912
+ for (i = 0; i < soq->c_cnt; i++) {
1913
+ Query *q = soq->clauses[i];
1914
+ hash ^= q->hash(q);
1915
+ }
1916
+ return hash;
1917
+ }
1918
+
1919
+ static int spanoq_eq(Query *self, Query *o)
1920
+ {
1921
+ int i;
1922
+ Query *q1, *q2;
1923
+ SpanOrQuery *soq1 = SpOQ(self);
1924
+ SpanOrQuery *soq2 = SpOQ(o);
1925
+
1926
+ if (!spanq_eq(self, o) || soq1->c_cnt != soq2->c_cnt) {
1927
+ return false;
1928
+ }
1929
+ for (i = 0; i < soq1->c_cnt; i++) {
1930
+ q1 = soq1->clauses[i];
1931
+ q2 = soq2->clauses[i];
1932
+ if (!q1->eq(q1, q2)) {
1933
+ return false;
1934
+ }
1935
+ }
1936
+ return true;
1937
+ }
1938
+
1939
+ Query *spanoq_new()
1940
+ {
1941
+ Query *self = q_new(SpanOrQuery);
1942
+ SpOQ(self)->clauses = ALLOC_N(Query *, CLAUSE_INIT_CAPA);
1943
+ SpOQ(self)->c_capa = CLAUSE_INIT_CAPA;
1944
+
1945
+ SpQ(self)->field = estrdup((char *)EMPTY_STRING);
1946
+ SpQ(self)->get_spans = &spanoq_get_spans;
1947
+ SpQ(self)->get_terms = &spanoq_get_terms;
1948
+
1949
+ self->type = SPAN_OR_QUERY;
1950
+ self->rewrite = &spanoq_rewrite;
1951
+ self->extract_terms = &spanoq_extract_terms;
1952
+ self->to_s = &spanoq_to_s;
1953
+ self->hash = &spanoq_hash;
1954
+ self->eq = &spanoq_eq;
1955
+ self->destroy_i = &spanoq_destroy_i;
1956
+ self->create_weight_i = &spanw_new;
1957
+ self->get_matchv_i = &spanq_get_matchv_i;
1958
+
1959
+ return self;
1960
+ }
1961
+
1962
+ Query *spanoq_add_clause_nr(Query *self, Query *clause)
1963
+ {
1964
+ const int curr_index = SpOQ(self)->c_cnt++;
1965
+ if (clause->type < SPAN_TERM_QUERY || clause->type > SPAN_NEAR_QUERY) {
1966
+ RAISE(ARG_ERROR, "Tried to add a %s to a SpanOrQuery. This is not a "
1967
+ "SpanQuery.", q_get_query_name(clause->type));
1968
+ }
1969
+ if (curr_index == 0) {
1970
+ free(SpQ(self)->field);
1971
+ SpQ(self)->field = estrdup(SpQ(clause)->field);
1972
+ }
1973
+ else if (strcmp(SpQ(self)->field, SpQ(clause)->field) != 0) {
1974
+ RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
1975
+ "Attempted to add a SpanQuery with field \"%s\" to a SpanOrQuery "
1976
+ "with field \"%s\"", SpQ(clause)->field, SpQ(self)->field);
1977
+ }
1978
+ if (curr_index >= SpOQ(self)->c_capa) {
1979
+ SpOQ(self)->c_capa <<= 1;
1980
+ REALLOC_N(SpOQ(self)->clauses, Query *, SpOQ(self)->c_capa);
1981
+ }
1982
+ SpOQ(self)->clauses[curr_index] = clause;
1983
+ return clause;
1984
+ }
1985
+
1986
+ Query *spanoq_add_clause(Query *self, Query *clause)
1987
+ {
1988
+ REF(clause);
1989
+ return spanoq_add_clause_nr(self, clause);
1990
+ }
1991
+
1992
+ /*****************************************************************************
1993
+ *
1994
+ * SpanNearQuery
1995
+ *
1996
+ *****************************************************************************/
1997
+
1998
+ static char *spannq_to_s(Query *self, const char *field)
1999
+ {
2000
+ int i;
2001
+ SpanNearQuery *snq = SpNQ(self);
2002
+ char *res, *res_p;
2003
+ char **q_strs = ALLOC_N(char *, snq->c_cnt);
2004
+ int len = 50;
2005
+ for (i = 0; i < snq->c_cnt; i++) {
2006
+ Query *clause = snq->clauses[i];
2007
+ q_strs[i] = clause->to_s(clause, field);
2008
+ len += strlen(q_strs[i]);
2009
+ }
2010
+
2011
+ res_p = res = ALLOC_N(char, len);
2012
+ sprintf(res_p, "span_near[ ");
2013
+ res_p += strlen(res_p);
2014
+ for (i = 0; i < snq->c_cnt; i++) {
2015
+ sprintf(res_p, "%s, ", q_strs[i]);
2016
+ free(q_strs[i]);
2017
+ res_p += strlen(res_p);
2018
+ }
2019
+ free(q_strs);
2020
+
2021
+ sprintf(res_p - 2, " ]");
2022
+ return res;
2023
+ }
2024
+
2025
+ static void spannq_extract_terms(Query *self, HashSet *terms)
2026
+ {
2027
+ SpanNearQuery *snq = SpNQ(self);
2028
+ int i;
2029
+ for (i = 0; i < snq->c_cnt; i++) {
2030
+ Query *clause = snq->clauses[i];
2031
+ clause->extract_terms(clause, terms);
2032
+ }
2033
+ }
2034
+
2035
+ static HashSet *spannq_get_terms(Query *self)
2036
+ {
2037
+ SpanNearQuery *snq = SpNQ(self);
2038
+ HashSet *terms = hs_new_str(&free);
2039
+ int i;
2040
+ for (i = 0; i < snq->c_cnt; i++) {
2041
+ Query *clause = snq->clauses[i];
2042
+ HashSet *sub_terms = SpQ(clause)->get_terms(clause);
2043
+ hs_merge(terms, sub_terms);
2044
+ }
2045
+
2046
+ return terms;
2047
+ }
2048
+
2049
+ static SpanEnum *spannq_get_spans(Query *self, IndexReader *ir)
2050
+ {
2051
+ SpanNearQuery *snq = SpNQ(self);
2052
+
2053
+ if (snq->c_cnt == 1) {
2054
+ Query *q = snq->clauses[0];
2055
+ return SpQ(q)->get_spans(q, ir);
2056
+ }
2057
+
2058
+ return spanne_new(self, ir);
2059
+ }
2060
+
2061
+ static Query *spannq_rewrite(Query *self, IndexReader *ir)
2062
+ {
2063
+ SpanNearQuery *snq = SpNQ(self);
2064
+ int i;
2065
+ for (i = 0; i < snq->c_cnt; i++) {
2066
+ Query *clause = snq->clauses[i];
2067
+ Query *rewritten = clause->rewrite(clause, ir);
2068
+ q_deref(clause);
2069
+ snq->clauses[i] = rewritten;
2070
+ }
2071
+
2072
+ self->ref_cnt++;
2073
+ return self;
2074
+ }
2075
+
2076
+ static void spannq_destroy(Query *self)
2077
+ {
2078
+ SpanNearQuery *snq = SpNQ(self);
2079
+
2080
+ int i;
2081
+ for (i = 0; i < snq->c_cnt; i++) {
2082
+ Query *clause = snq->clauses[i];
2083
+ q_deref(clause);
2084
+ }
2085
+ free(snq->clauses);
2086
+ free(SpQ(self)->field);
2087
+
2088
+ spanq_destroy_i(self);
2089
+ }
2090
+
2091
+ static unsigned long spannq_hash(Query *self)
2092
+ {
2093
+ int i;
2094
+ unsigned long hash = spanq_hash(self);
2095
+ SpanNearQuery *snq = SpNQ(self);
2096
+
2097
+ for (i = 0; i < snq->c_cnt; i++) {
2098
+ Query *q = snq->clauses[i];
2099
+ hash ^= q->hash(q);
2100
+ }
2101
+ return ((hash ^ snq->slop) << 1) | snq->in_order;
2102
+ }
2103
+
2104
+ static int spannq_eq(Query *self, Query *o)
2105
+ {
2106
+ int i;
2107
+ Query *q1, *q2;
2108
+ SpanNearQuery *snq1 = SpNQ(self);
2109
+ SpanNearQuery *snq2 = SpNQ(o);
2110
+ if (! spanq_eq(self, o)
2111
+ || (snq1->c_cnt != snq2->c_cnt)
2112
+ || (snq1->slop != snq2->slop)
2113
+ || (snq1->in_order != snq2->in_order)) {
2114
+ return false;
2115
+ }
2116
+
2117
+ for (i = 0; i < snq1->c_cnt; i++) {
2118
+ q1 = snq1->clauses[i];
2119
+ q2 = snq2->clauses[i];
2120
+ if (!q1->eq(q1, q2)) {
2121
+ return false;
2122
+ }
2123
+ }
2124
+
2125
+ return true;
2126
+ }
2127
+
2128
+ Query *spannq_new(int slop, bool in_order)
2129
+ {
2130
+ Query *self = q_new(SpanNearQuery);
2131
+
2132
+ SpNQ(self)->clauses = ALLOC_N(Query *, CLAUSE_INIT_CAPA);
2133
+ SpNQ(self)->c_capa = CLAUSE_INIT_CAPA;
2134
+ SpNQ(self)->slop = slop;
2135
+ SpNQ(self)->in_order = in_order;
2136
+
2137
+ SpQ(self)->get_spans = &spannq_get_spans;
2138
+ SpQ(self)->get_terms = &spannq_get_terms;
2139
+ SpQ(self)->field = estrdup((char *)EMPTY_STRING);
2140
+
2141
+ self->type = SPAN_NEAR_QUERY;
2142
+ self->rewrite = &spannq_rewrite;
2143
+ self->extract_terms = &spannq_extract_terms;
2144
+ self->to_s = &spannq_to_s;
2145
+ self->hash = &spannq_hash;
2146
+ self->eq = &spannq_eq;
2147
+ self->destroy_i = &spannq_destroy;
2148
+ self->create_weight_i = &spanw_new;
2149
+ self->get_matchv_i = &spanq_get_matchv_i;
2150
+
2151
+ return self;
2152
+ }
2153
+
2154
+ Query *spannq_add_clause_nr(Query *self, Query *clause)
2155
+ {
2156
+ const int curr_index = SpNQ(self)->c_cnt++;
2157
+ if (clause->type < SPAN_TERM_QUERY || clause->type > SPAN_NEAR_QUERY) {
2158
+ RAISE(ARG_ERROR, "Tried to add a %s to a SpanNearQuery. This is not a "
2159
+ "SpanQuery.", q_get_query_name(clause->type));
2160
+ }
2161
+ if (curr_index == 0) {
2162
+ free(SpQ(self)->field);
2163
+ SpQ(self)->field = estrdup(SpQ(clause)->field);
2164
+ }
2165
+ else if (strcmp(SpQ(self)->field, SpQ(clause)->field) != 0) {
2166
+ RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
2167
+ "Attempted to add a SpanQuery with field \"%s\" to SpanNearQuery "
2168
+ "with field \"%s\"", SpQ(clause)->field, SpQ(self)->field);
2169
+ }
2170
+ if (curr_index >= SpNQ(self)->c_capa) {
2171
+ SpNQ(self)->c_capa <<= 1;
2172
+ REALLOC_N(SpNQ(self)->clauses, Query *, SpNQ(self)->c_capa);
2173
+ }
2174
+ SpNQ(self)->clauses[curr_index] = clause;
2175
+ return clause;
2176
+ }
2177
+
2178
+ Query *spannq_add_clause(Query *self, Query *clause)
2179
+ {
2180
+ REF(clause);
2181
+ return spannq_add_clause_nr(self, clause);
2182
+ }
2183
+
2184
+ /*****************************************************************************
2185
+ *
2186
+ * SpanNotQuery
2187
+ *
2188
+ *****************************************************************************/
2189
+
2190
+ static char *spanxq_to_s(Query *self, const char *field)
2191
+ {
2192
+ SpanNotQuery *sxq = SpXQ(self);
2193
+ char *inc_s = sxq->inc->to_s(sxq->inc, field);
2194
+ char *exc_s = sxq->exc->to_s(sxq->exc, field);
2195
+ char *res = strfmt("span_not(inc:<%s>, exc:<%s>)", inc_s, exc_s);
2196
+
2197
+ free(inc_s);
2198
+ free(exc_s);
2199
+ return res;
2200
+ }
2201
+
2202
+ static void spanxq_extract_terms(Query *self, HashSet *terms)
2203
+ {
2204
+ SpXQ(self)->inc->extract_terms(SpXQ(self)->inc, terms);
2205
+ }
2206
+
2207
+ static HashSet *spanxq_get_terms(Query *self)
2208
+ {
2209
+ return SpQ(SpXQ(self)->inc)->get_terms(SpXQ(self)->inc);
2210
+ }
2211
+
2212
+ static Query *spanxq_rewrite(Query *self, IndexReader *ir)
2213
+ {
2214
+ SpanNotQuery *sxq = SpXQ(self);
2215
+ Query *q, *rq;
2216
+
2217
+ /* rewrite inclusive query */
2218
+ q = sxq->inc;
2219
+ rq = q->rewrite(q, ir);
2220
+ q_deref(q);
2221
+ sxq->inc = rq;
2222
+
2223
+ /* rewrite exclusive query */
2224
+ q = sxq->exc;
2225
+ rq = q->rewrite(q, ir);
2226
+ q_deref(q);
2227
+ sxq->exc = rq;
2228
+
2229
+ self->ref_cnt++;
2230
+ return self;
2231
+ }
2232
+
2233
+ static void spanxq_destroy(Query *self)
2234
+ {
2235
+ SpanNotQuery *sxq = SpXQ(self);
2236
+
2237
+ q_deref(sxq->inc);
2238
+ q_deref(sxq->exc);
2239
+
2240
+ free(SpQ(self)->field);
2241
+
2242
+ spanq_destroy_i(self);
2243
+ }
2244
+
2245
+ static unsigned long spanxq_hash(Query *self)
2246
+ {
2247
+ SpanNotQuery *sxq = SpXQ(self);
2248
+ return spanq_hash(self) ^ sxq->inc->hash(sxq->inc)
2249
+ ^ sxq->exc->hash(sxq->exc);
2250
+ }
2251
+
2252
+ static int spanxq_eq(Query *self, Query *o)
2253
+ {
2254
+ SpanNotQuery *sxq1 = SpXQ(self);
2255
+ SpanNotQuery *sxq2 = SpXQ(o);
2256
+ return spanq_eq(self, o) && sxq1->inc->eq(sxq1->inc, sxq2->inc)
2257
+ && sxq1->exc->eq(sxq1->exc, sxq2->exc);
2258
+ }
2259
+
2260
+
2261
+ Query *spanxq_new_nr(Query *inc, Query *exc)
2262
+ {
2263
+ Query *self;
2264
+ if (strcmp(SpQ(inc)->field, SpQ(inc)->field) != 0) {
2265
+ RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
2266
+ "Attempted to add a SpanQuery with field \"%s\" along with a "
2267
+ "SpanQuery with field \"%s\" to an SpanNotQuery",
2268
+ SpQ(inc)->field, SpQ(exc)->field);
2269
+ }
2270
+ self = q_new(SpanNotQuery);
2271
+
2272
+ SpXQ(self)->inc = inc;
2273
+ SpXQ(self)->exc = exc;
2274
+
2275
+ SpQ(self)->field = estrdup(SpQ(inc)->field);
2276
+ SpQ(self)->get_spans = &spanxe_new;
2277
+ SpQ(self)->get_terms = &spanxq_get_terms;
2278
+
2279
+ self->type = SPAN_NOT_QUERY;
2280
+ self->rewrite = &spanxq_rewrite;
2281
+ self->extract_terms = &spanxq_extract_terms;
2282
+ self->to_s = &spanxq_to_s;
2283
+ self->hash = &spanxq_hash;
2284
+ self->eq = &spanxq_eq;
2285
+ self->destroy_i = &spanxq_destroy;
2286
+ self->create_weight_i = &spanw_new;
2287
+ self->get_matchv_i = &spanq_get_matchv_i;
2288
+
2289
+ return self;
2290
+ }
2291
+
2292
+ Query *spanxq_new(Query *inc, Query *exc)
2293
+ {
2294
+ REF(inc);
2295
+ REF(exc);
2296
+ return spanxq_new_nr(inc, exc);
2297
+ }
2298
+
2299
+
2300
+ /*****************************************************************************
2301
+ *
2302
+ * Rewritables
2303
+ *
2304
+ *****************************************************************************/
2305
+
2306
+ /*****************************************************************************
2307
+ *
2308
+ * SpanPrefixQuery
2309
+ *
2310
+ *****************************************************************************/
2311
+
2312
+ #define SpPfxQ(query) ((SpanPrefixQuery *)(query))
2313
+
2314
+ static char *spanprq_to_s(Query *self, const char *current_field)
2315
+ {
2316
+ char *buffer, *bptr;
2317
+ const char *prefix = SpPfxQ(self)->prefix;
2318
+ const char *field = SpQ(self)->field;
2319
+ size_t plen = strlen(prefix);
2320
+ size_t flen = strlen(field);
2321
+
2322
+ bptr = buffer = ALLOC_N(char, plen + flen + 35);
2323
+
2324
+ if (strcmp(field, current_field) != 0) {
2325
+ sprintf(bptr, "%s:", field);
2326
+ bptr += flen + 1;
2327
+ }
2328
+
2329
+ sprintf(bptr, "%s*", prefix);
2330
+ bptr += plen + 1;
2331
+ if (self->boost != 1.0) {
2332
+ *bptr = '^';
2333
+ dbl_to_s(++bptr, self->boost);
2334
+ }
2335
+
2336
+ return buffer;
2337
+ }
2338
+
2339
+ static Query *spanprq_rewrite(Query *self, IndexReader *ir)
2340
+ {
2341
+ const char *field = SpQ(self)->field;
2342
+ const int field_num = fis_get_field_num(ir->fis, field);
2343
+ Query *volatile q = spanmtq_new_conf(field, SpPfxQ(self)->max_terms);
2344
+ q->boost = self->boost; /* set the boost */
2345
+
2346
+ if (field_num >= 0) {
2347
+ const char *prefix = SpPfxQ(self)->prefix;
2348
+ TermEnum *te = ir->terms_from(ir, field_num, prefix);
2349
+ const char *term = te->curr_term;
2350
+ size_t prefix_len = strlen(prefix);
2351
+
2352
+ TRY
2353
+ do {
2354
+ if (strncmp(term, prefix, prefix_len) != 0) {
2355
+ break;
2356
+ }
2357
+ spanmtq_add_term(q, term); /* found a match */
2358
+ } while (te->next(te));
2359
+ XFINALLY
2360
+ te->close(te);
2361
+ XENDTRY
2362
+ }
2363
+
2364
+ return q;
2365
+ }
2366
+
2367
+ static void spanprq_destroy(Query *self)
2368
+ {
2369
+ free(SpQ(self)->field);
2370
+ free(SpPfxQ(self)->prefix);
2371
+ spanq_destroy_i(self);
2372
+ }
2373
+
2374
+ static unsigned long spanprq_hash(Query *self)
2375
+ {
2376
+ return str_hash(SpQ(self)->field) ^ str_hash(SpPfxQ(self)->prefix);
2377
+ }
2378
+
2379
+ static int spanprq_eq(Query *self, Query *o)
2380
+ {
2381
+ return (strcmp(SpPfxQ(self)->prefix, SpPfxQ(o)->prefix) == 0)
2382
+ && (strcmp(SpQ(self)->field, SpQ(o)->field) == 0);
2383
+ }
2384
+
2385
+ Query *spanprq_new(const char *field, const char *prefix)
2386
+ {
2387
+ Query *self = q_new(SpanPrefixQuery);
2388
+
2389
+ SpQ(self)->field = estrdup(field);
2390
+ SpPfxQ(self)->prefix = estrdup(prefix);
2391
+ SpPfxQ(self)->max_terms = SPAN_PREFIX_QUERY_MAX_TERMS;
2392
+
2393
+ self->type = SPAN_PREFIX_QUERY;
2394
+ self->rewrite = &spanprq_rewrite;
2395
+ self->to_s = &spanprq_to_s;
2396
+ self->hash = &spanprq_hash;
2397
+ self->eq = &spanprq_eq;
2398
+ self->destroy_i = &spanprq_destroy;
2399
+ self->create_weight_i = &q_create_weight_unsup;
2400
+
2401
+ return self;
2402
+ }