ferret 0.3.2 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
data/ext/q_prefix.c ADDED
@@ -0,0 +1,75 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+
4
+ /****************************************************************************
5
+ *
6
+ * PrefixQuery
7
+ *
8
+ ****************************************************************************/
9
+
10
+ char *prq_to_s(Query *self, char *field)
11
+ {
12
+ char *buffer, *bptr;
13
+ Term *term = (Term *)self->data;
14
+ int tlen = strlen(term->text);
15
+ int flen = strlen(term->field);
16
+ bptr = buffer = ALLOC_N(char, tlen + flen + 35);
17
+
18
+ if (strcmp(term->field, field) != 0) {
19
+ sprintf(bptr, "%s:", term->field);
20
+ bptr++;
21
+ }
22
+ sprintf(bptr, "%s*", term->text);
23
+ if (self->boost != 1.0) {
24
+ *bptr = '^';
25
+ dbl_to_s(++bptr, self->boost);
26
+ }
27
+
28
+ return buffer;
29
+ }
30
+
31
+ Query *prq_rewrite(Query *self, IndexReader *ir)
32
+ {
33
+ Term *prefix = (Term *)self->data;
34
+ TermEnum *te = ir->terms_from(ir, prefix);
35
+ char *prefix_text = prefix->text;
36
+ int prefix_length = strlen(prefix_text);
37
+ char *prefix_field = prefix->field;
38
+ Query *tq;
39
+ Query *bq = bq_create(true);
40
+
41
+ do {
42
+ TermBuffer *tb = te->tb_curr;
43
+ if (!tb || strcmp(tb->field, prefix_field) != 0 ||
44
+ strncmp(tb->text, prefix_text, prefix_length) != 0) {
45
+ break;
46
+ }
47
+ tq = tq_create(term_create(tb->field, tb->text)); // found a match
48
+ tq->boost = self->boost; // set the boost
49
+ bq_add_query(bq, tq, BC_SHOULD); // add to query
50
+ } while (te->next(te));
51
+ te->close(te);
52
+
53
+ if (self->rewritten) self->rewritten->destroy(self->rewritten);
54
+ return self->rewritten = bq;
55
+ }
56
+
57
+ void prq_destroy(void *p)
58
+ {
59
+ Query *self = (Query *)p;
60
+ if (self->destroy_all) term_destroy((Term *)self->data);
61
+ q_destroy(self);
62
+ }
63
+
64
+ Query *prefixq_create(Term *prefix)
65
+ {
66
+ Query *self = q_create();
67
+ self->data = prefix;
68
+ self->type = PREFIX_QUERY;
69
+ self->create_weight = NULL;
70
+ self->to_s = &prq_to_s;
71
+ self->rewrite = &prq_rewrite;
72
+ self->destroy = &prq_destroy;
73
+
74
+ return self;
75
+ }
data/ext/q_range.c ADDED
@@ -0,0 +1,247 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+
4
+ /*****************************************************************************
5
+ *
6
+ * Range
7
+ *
8
+ *****************************************************************************/
9
+
10
+ char *range_to_s(Range *range, char *field, float boost)
11
+ {
12
+ char *buffer, *bptr;
13
+ int flen, llen, ulen;
14
+
15
+ flen = strlen(range->field);
16
+ llen = range->lower_term ? strlen(range->lower_term) : 0;
17
+ ulen = range->upper_term ? strlen(range->upper_term) : 0;
18
+ buffer = ALLOC_N(char, flen + llen + ulen + 40);
19
+ bptr = buffer;
20
+
21
+ if (strcmp(field, range->field)) {
22
+ memcpy(buffer, range->field, flen * sizeof(char));
23
+ bptr += flen;
24
+ *bptr = ':';
25
+ bptr++;
26
+ }
27
+
28
+ if (range->lower_term) {
29
+ *bptr = range->include_lower ? '[' : '{';
30
+ bptr++;
31
+ memcpy(bptr, range->lower_term, llen);
32
+ bptr += llen;
33
+ } else {
34
+ *bptr = '<';
35
+ bptr++;
36
+ }
37
+
38
+ if (range->upper_term && range->lower_term) {
39
+ *bptr = ' '; bptr++;
40
+ }
41
+
42
+ if (range->upper_term) {
43
+ memcpy(bptr, range->upper_term, ulen);
44
+ bptr += ulen;
45
+ *bptr = range->include_upper ? ']' : '}';
46
+ bptr++;
47
+ } else {
48
+ *bptr = '>';
49
+ bptr++;
50
+ }
51
+
52
+ *bptr = 0;
53
+ if (boost != 1.0) {
54
+ char dbuf[32];
55
+ dbl_to_s(dbuf, boost);
56
+ sprintf(bptr, "^%s", dbuf);
57
+ }
58
+ return buffer;
59
+ }
60
+
61
+ void range_destroy(void *p)
62
+ {
63
+ Range *range = (Range *)p;
64
+ free(range->field);
65
+ if (range->lower_term) free(range->lower_term);
66
+ if (range->upper_term) free(range->upper_term);
67
+ free(range);
68
+ }
69
+
70
+ Range *range_create(const char *field, char *lower_term, char *upper_term,
71
+ bool include_lower, bool include_upper)
72
+ {
73
+ Range *range;
74
+
75
+ if (!lower_term && !upper_term)
76
+ eprintf(ARG_ERROR, "At least one value must be non-nil");
77
+ if (include_lower && !lower_term)
78
+ eprintf(ARG_ERROR, "The lower bound must be non-nil to be inclusive");
79
+ if (include_upper && !upper_term)
80
+ eprintf(ARG_ERROR, "The upper bound must be non-nil to be inclusive");
81
+ if (upper_term && lower_term && (strcmp(upper_term, lower_term) < 0))
82
+ eprintf(ARG_ERROR,
83
+ "The lower bound must less than the upper bound, %s > %s",
84
+ upper_term, upper_term);
85
+
86
+ range = ALLOC(Range);
87
+
88
+ range->field = estrdup((char *)field);
89
+ range->lower_term = lower_term ? estrdup(lower_term) : NULL;
90
+ range->upper_term = upper_term ? estrdup(upper_term) : NULL;
91
+ range->include_lower = include_lower;
92
+ range->include_upper = include_upper;
93
+ return range;
94
+ }
95
+
96
+ /***************************************************************************
97
+ *
98
+ * RangeFilter
99
+ *
100
+ ***************************************************************************/
101
+
102
+ void rfilt_destroy(void *p)
103
+ {
104
+ Filter *self = (Filter *)p;
105
+ range_destroy(self->data);
106
+ filt_destroy(self);
107
+ }
108
+
109
+ char *rfilt_to_s(Filter *self)
110
+ {
111
+ Range *range = (Range *)self->data;
112
+ return range_to_s(range, "", 1.0);
113
+ }
114
+
115
+ BitVector *rfilt_get_bv(Filter *self, IndexReader *ir)
116
+ {
117
+ BitVector *bv = bv_create_size(ir->max_doc(ir));
118
+ Range *range = (Range *)self->data;
119
+ char *field = range->field;
120
+ char *lower_term = range->lower_term ? range->lower_term : (char *)EMPTY_STRING;
121
+ char *upper_term = range->upper_term;
122
+ bool include_upper = range->include_upper;
123
+
124
+ Term *term_from = term_create(range->field, lower_term);
125
+ Term term;
126
+ TermBuffer *tb;
127
+ TermEnum* te;
128
+ TermDocEnum *tde;
129
+ bool check_lower;
130
+
131
+ te = ir->terms_from(ir, term_from);
132
+ if (te->tb_curr == NULL) {
133
+ return bv;
134
+ }
135
+
136
+ check_lower = false;
137
+ if (!range->include_lower) // make adjustments to set to exclusive
138
+ check_lower = true;
139
+
140
+ tde = ir->term_docs(ir);
141
+ tb = te->tb_curr;
142
+ term.text = tb->text;
143
+ do {
144
+ if (tb && strcmp(tb->field, field) == 0) {
145
+ if (!check_lower || lower_term == EMPTY_STRING ||
146
+ strcmp(tb->text, lower_term) > 0) {
147
+ check_lower = false;
148
+ if (upper_term) {
149
+ int compare = strcmp(upper_term, tb->text);
150
+ /* if beyond the upper term, or is exclusive and
151
+ * this is equal to the upper term, break out */
152
+ if ((compare < 0) ||
153
+ (!include_upper && compare==0)) {
154
+ break;
155
+ }
156
+ }
157
+ /* we have a good term, find the docs */
158
+ /* text is already pointing to term buffer text */
159
+ term.field = tb->field;
160
+ tde->seek(tde, &term);
161
+ while (tde->next(tde)) {
162
+ bv_set(bv, tde->doc_num(tde));
163
+ //printf("Setting %d\n", tde->doc_num(tde));
164
+ }
165
+ }
166
+ } else {
167
+ break;
168
+ }
169
+ } while (te->next(te));
170
+
171
+ tde->close(tde);
172
+ te->close(te);
173
+ term_destroy(term_from);
174
+
175
+ return bv;
176
+ }
177
+
178
+ Filter *rfilt_create(const char *field, char *lower_term, char *upper_term,
179
+ bool include_lower, bool include_upper)
180
+ {
181
+ Filter *self;
182
+ Range *range = range_create(field, lower_term, upper_term,
183
+ include_lower, include_upper);
184
+
185
+ self = filt_create("RangeFilter");
186
+ self->data = range;
187
+ self->get_bv = &rfilt_get_bv;
188
+ self->to_s = &rfilt_to_s;
189
+ self->destroy = &rfilt_destroy;
190
+ return self;
191
+ }
192
+
193
+ /*****************************************************************************
194
+ *
195
+ * RangeQuery
196
+ *
197
+ *****************************************************************************/
198
+
199
+ char *rq_to_s(Query *self, char *field)
200
+ {
201
+ Range *range = (Range *)self->data;
202
+ return range_to_s(range, field, self->boost);
203
+ }
204
+
205
+ void rq_destroy(void *p)
206
+ {
207
+ Query *self = (Query *)p;
208
+ range_destroy(self->data);
209
+ q_destroy(self);
210
+ }
211
+
212
+ Query *rq_rewrite(Query *self, IndexReader *ir)
213
+ {
214
+ Range *r = (Range *)self->data;
215
+ Filter *filter = rfilt_create(r->field, r->lower_term, r->upper_term,
216
+ r->include_lower, r->include_upper);
217
+ if (self->rewritten) self->rewritten->destroy(self->rewritten);
218
+ return self->rewritten = csq_create(filter);
219
+ }
220
+
221
+ Query *rq_create_less(const char *field, char *upper_term, bool include_upper)
222
+ {
223
+ return rq_create(field, NULL, upper_term, false, include_upper);
224
+ }
225
+
226
+ Query *rq_create_more(const char *field, char *lower_term, bool include_lower)
227
+ {
228
+ return rq_create(field, lower_term, NULL, include_lower, false);
229
+ }
230
+
231
+ Query *rq_create(const char *field, char *lower_term, char *upper_term,
232
+ bool include_lower, bool include_upper)
233
+ {
234
+ Query *self;
235
+ Range *range = range_create(field, lower_term, upper_term,
236
+ include_lower, include_upper);
237
+
238
+ self = q_create();
239
+
240
+ self->type = RANGE_QUERY;
241
+ self->data = range;
242
+ self->create_weight = NULL;
243
+ self->rewrite = &rq_rewrite;
244
+ self->to_s = &rq_to_s;
245
+ self->destroy = &rq_destroy;
246
+ return self;
247
+ }
data/ext/q_span.c ADDED
@@ -0,0 +1,1566 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+
4
+
5
+ /*****************************************************************************
6
+ *
7
+ * NearSpanEnum
8
+ *
9
+ *****************************************************************************/
10
+
11
+ /*****************************************************************************
12
+ *
13
+ * SpanWeight
14
+ *
15
+ *****************************************************************************/
16
+
17
+ Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
18
+ {
19
+ char *query_str = self->query->to_s(self->query, "");
20
+ Array *terms = (Array *)self->data;
21
+ char *field = ((SpanQuery *)self->query->data)->field;
22
+ char *doc_freqs = NULL;
23
+ int df_i = 0, i;
24
+ Term *t;
25
+
26
+
27
+ for (i = 0; i < terms->size; i++) {
28
+ t = (Term *)terms->elems[i];
29
+ REALLOC_N(doc_freqs, char, df_i + strlen(t->text) + 23);
30
+ sprintf(doc_freqs + df_i, "%s=%d, ", t->text, ir->doc_freq(ir, t));
31
+ df_i = strlen(doc_freqs);
32
+ }
33
+ /* remove the ',' at the end of the string if it exists */
34
+ if (terms->size > 0) {
35
+ df_i -= 2;
36
+ doc_freqs[df_i] = '\0';
37
+ } else {
38
+ doc_freqs = "";
39
+ }
40
+
41
+ Explanation *expl = expl_create(0.0,
42
+ epstrdup("weight(%s in %d), product of:",
43
+ strlen(query_str) + 20,
44
+ query_str, target));
45
+
46
+ /* We need two of these as it's included in both the query explanation
47
+ * and the field explanation */
48
+ Explanation *idf_expl1 = expl_create(self->idf,
49
+ epstrdup("idf(%s: %s)", strlen(field) + df_i, field, doc_freqs));
50
+ Explanation *idf_expl2 = expl_create(self->idf,
51
+ epstrdup("idf(%s: %s)", strlen(field) + df_i, field, doc_freqs));
52
+ if (terms->size > 0) free(doc_freqs); /* only free if allocated */
53
+
54
+ /* explain query weight */
55
+ Explanation *query_expl = expl_create(0.0,
56
+ epstrdup("query_weight(%s), product of:", strlen(query_str), query_str));
57
+
58
+ if (self->query->boost != 1.0) {
59
+ expl_add_detail(query_expl, expl_create(self->query->boost, estrdup("boost")));
60
+ }
61
+
62
+ expl_add_detail(query_expl, idf_expl1);
63
+
64
+ Explanation *qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
65
+ expl_add_detail(query_expl, qnorm_expl);
66
+
67
+ query_expl->value = self->query->boost * idf_expl1->value * qnorm_expl->value;
68
+
69
+ expl_add_detail(expl, query_expl);
70
+
71
+ /* explain field weight */
72
+ Explanation *field_expl = expl_create(0.0,
73
+ epstrdup("field_weight(%s:%s in %d), product of:",
74
+ strlen(field) + strlen(query_str) + 20,
75
+ field, query_str, target));
76
+ free(query_str);
77
+
78
+ Scorer *scorer = self->scorer(self, ir);
79
+ Explanation *tf_expl = scorer->explain(scorer, target);
80
+ scorer->destroy(scorer);
81
+ expl_add_detail(field_expl, tf_expl);
82
+ expl_add_detail(field_expl, idf_expl2);
83
+
84
+ uchar *field_norms = ir->get_norms(ir, field);
85
+ float field_norm = (field_norms ? sim_decode_norm(self->similarity, field_norms[target]) : 0.0);
86
+ Explanation *field_norm_expl = expl_create(field_norm,
87
+ epstrdup("field_norm(field=%s, doc=%d)",
88
+ strlen(field) + 20, field, target));
89
+ expl_add_detail(field_expl, field_norm_expl);
90
+
91
+ field_expl->value = tf_expl->value * idf_expl2->value * field_norm_expl->value;
92
+
93
+ /* combine them */
94
+ if (query_expl->value == 1.0) {
95
+ expl_destoy(expl);
96
+ return field_expl;
97
+ } else {
98
+ expl->value = (query_expl->value * field_expl->value);
99
+ expl_add_detail(expl, field_expl);
100
+ return expl;
101
+ }
102
+ }
103
+
104
+ char *spanw_to_s(Weight *self)
105
+ {
106
+ char dbuf[32];
107
+ dbl_to_s(dbuf, self->value);
108
+ return epstrdup("SpanWeight(%s)", strlen(dbuf), dbuf);
109
+ }
110
+
111
+ void spanw_destroy(void *p)
112
+ {
113
+ Weight *self = (Weight *)p;
114
+ ary_destroy(self->data);
115
+ free(p);
116
+ }
117
+
118
+ Weight *spanw_create(Query *query, Searcher *searcher)
119
+ {
120
+ Weight *self = ALLOC(Weight);
121
+ SpanQuery *spanq = (SpanQuery *)query->data;
122
+ Array *terms = spanq->get_terms(query);
123
+ ZEROSET(self, Weight, 1);
124
+ self->get_query = &w_get_query;
125
+ self->get_value = &w_get_value;
126
+ self->normalize = &w_normalize;
127
+ self->scorer = &spansc_create;
128
+ self->explain = &spanw_explain;
129
+ self->to_s = &spanw_to_s;
130
+ self->destroy = &spanw_destroy;
131
+ self->sum_of_squared_weights = &w_sum_of_squared_weights;
132
+
133
+ self->similarity = query->get_similarity(query, searcher);
134
+
135
+ self->idf = sim_idf_phrase(self->similarity, (Term **)terms->elems, terms->size, searcher);
136
+ self->query = query;
137
+ self->value = 0.0;
138
+ self->data = terms;
139
+
140
+ return self;
141
+ }
142
+
143
+
144
+ /*****************************************************************************
145
+ *
146
+ * SpanTermEnum
147
+ *
148
+ *****************************************************************************/
149
+
150
+ bool spante_next(SpanEnum *self)
151
+ {
152
+ SpanTermEnum *ste = (SpanTermEnum *)self->data;
153
+ TermDocEnum *tde = ste->positions;
154
+
155
+ if (ste->count == ste->freq) {
156
+ if (! tde->next(tde)) {
157
+ ste->doc = INT_MAX;
158
+ return false;
159
+ }
160
+ ste->doc = tde->doc_num(tde);
161
+ ste->freq = tde->freq(tde);
162
+ ste->count = 0;
163
+ }
164
+ ste->position = tde->next_position(tde);
165
+ ste->count++;
166
+ return true;
167
+ }
168
+
169
+ bool spante_skip_to(SpanEnum *self, int target)
170
+ {
171
+ SpanTermEnum *ste = (SpanTermEnum *)self->data;
172
+ TermDocEnum *tde = ste->positions;
173
+
174
+ /* are we already at the correct position? */
175
+ if (ste->doc >= target) return true;
176
+
177
+ if (! tde->skip_to(tde, target)) {
178
+ ste->doc = INT_MAX;
179
+ return false;
180
+ }
181
+
182
+ ste->doc = tde->doc_num(tde);
183
+ ste->freq = tde->freq(tde);
184
+ ste->count = 0;
185
+
186
+ ste->position = tde->next_position(tde);
187
+ ste->count++;
188
+ return true;
189
+ }
190
+
191
+ int spante_doc(SpanEnum *self)
192
+ {
193
+ SpanTermEnum *ste = (SpanTermEnum *)self->data;
194
+ return ste->doc;
195
+ }
196
+
197
+ int spante_start(SpanEnum *self)
198
+ {
199
+ SpanTermEnum *ste = (SpanTermEnum *)self->data;
200
+ return ste->position;
201
+ }
202
+
203
+ int spante_end(SpanEnum *self)
204
+ {
205
+ SpanTermEnum *ste = (SpanTermEnum *)self->data;
206
+ return ste->position + 1;
207
+ }
208
+
209
+ char *spante_to_s(SpanEnum *self)
210
+ {
211
+ char *field = ((SpanQuery *)self->query->data)->field;
212
+ char *query_str = self->query->to_s(self->query, field);
213
+ char pos_str[20];
214
+ int len = strlen(query_str), pos;
215
+ char *str = ALLOC_N(char, len + 40);
216
+
217
+ if (self->doc(self) < 0) {
218
+ sprintf(pos_str, "START");
219
+ } else {
220
+ if (self->doc(self) == INT_MAX) {
221
+ sprintf(pos_str, "END");
222
+ } else {
223
+ pos = ((SpanTermEnum *)self->data)->position;
224
+ sprintf(pos_str, "%d", self->doc(self) - pos);
225
+ }
226
+ }
227
+ sprintf("SpanTermEnum(%s)@%s", query_str, pos_str);
228
+ free(query_str);
229
+ return str;
230
+ }
231
+
232
+ void spante_destroy(void *p)
233
+ {
234
+ SpanEnum *self = (SpanEnum *)p;
235
+ SpanTermEnum *ste = (SpanTermEnum *)self->data;
236
+ TermDocEnum *tde = ste->positions;
237
+ tde->close(tde);
238
+ free(ste);
239
+ free(self);
240
+ }
241
+
242
+ SpanEnum *spante_create(Query *query, IndexReader *ir)
243
+ {
244
+ Term *term = (Term *)((SpanQuery *)query->data)->data;
245
+ SpanEnum *self = ALLOC(SpanEnum);
246
+
247
+ SpanTermEnum *ste = ALLOC(SpanTermEnum);
248
+ ste->positions = ir_term_positions_for(ir, term);
249
+ ste->position = -1;
250
+ ste->doc = -1;
251
+ ste->count = 0;
252
+ ste->freq = 0;
253
+
254
+ self->data = ste;
255
+
256
+ self->query = query;
257
+ self->next = &spante_next;
258
+ self->skip_to = &spante_skip_to;
259
+ self->doc = &spante_doc;
260
+ self->start = &spante_start;
261
+ self->end = &spante_end;
262
+ self->destroy = &spante_destroy;
263
+ self->to_s = &spante_to_s;
264
+
265
+ return self;
266
+ }
267
+
268
+
269
+ /*****************************************************************************
270
+ *
271
+ * SpanFirstEnum
272
+ *
273
+ *****************************************************************************/
274
+
275
+ bool spanfe_next(SpanEnum *self)
276
+ {
277
+ SpanEnum *se = (SpanEnum *)(self->data);
278
+ int end = ((SpanFirstQuery *)((SpanQuery *)self->query->data)->data)->end;
279
+ while (se->next(se)) { /* scan to next match */
280
+ if (se->end(se) <= end) return true;
281
+ }
282
+ return false;
283
+ }
284
+
285
+ bool spanfe_skip_to(SpanEnum *self, int target)
286
+ {
287
+ SpanEnum *se = (SpanEnum *)(self->data);
288
+ int end = ((SpanFirstQuery *)((SpanQuery *)self->query->data)->data)->end;
289
+
290
+ if (! se->skip_to(se, target)) return false;
291
+
292
+ if (se->end(se) <= end) /* there is a match */
293
+ return true;
294
+
295
+ return se->next(se); /* scan to next match */
296
+ }
297
+
298
+ int spanfe_doc(SpanEnum *self)
299
+ {
300
+ SpanEnum *se = (SpanEnum *)(self->data);
301
+ return se->doc(se);
302
+ }
303
+
304
+ int spanfe_start(SpanEnum *self)
305
+ {
306
+ SpanEnum *se = (SpanEnum *)(self->data);
307
+ return se->start(se);
308
+ }
309
+
310
+ int spanfe_end(SpanEnum *self)
311
+ {
312
+ SpanEnum *se = (SpanEnum *)(self->data);
313
+ return se->end(se);
314
+ }
315
+
316
+ char *spanfe_to_s(SpanEnum *self)
317
+ {
318
+ char *field = ((SpanQuery *)self->query->data)->field;
319
+ char *query_str = self->query->to_s(self->query, field);
320
+ char *res = epstrdup("SpanFirstEnum(%s)", strlen(query_str), query_str);
321
+ free(query_str);
322
+ return res;
323
+ }
324
+
325
+ void spanfe_destroy(void *p)
326
+ {
327
+ SpanEnum *self = (SpanEnum *)p;
328
+ SpanEnum *se = (SpanEnum *)self->data;
329
+ se->destroy(se);
330
+ free(self);
331
+ }
332
+
333
+ SpanEnum *spanfe_create(Query *query, IndexReader *ir)
334
+ {
335
+ SpanEnum *self = ALLOC(SpanEnum);
336
+ SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)query->data)->data;
337
+
338
+ self->data = ((SpanQuery *)sfq->match->data)->get_spans(sfq->match, ir);
339
+
340
+ self->query = query;
341
+ self->next = &spanfe_next;
342
+ self->skip_to = &spanfe_skip_to;
343
+ self->doc = &spanfe_doc;
344
+ self->start = &spanfe_start;
345
+ self->end = &spanfe_end;
346
+ self->destroy = &spanfe_destroy;
347
+ self->to_s = &spanfe_to_s;
348
+
349
+ return self;
350
+ }
351
+
352
+
353
+ /*****************************************************************************
354
+ *
355
+ * SpanOrEnum
356
+ *
357
+ *****************************************************************************/
358
+
359
+ bool span_less_than(void *p1, void *p2)
360
+ {
361
+ SpanEnum *s1 = (SpanEnum *)p1;
362
+ SpanEnum *s2 = (SpanEnum *)p2;
363
+ int doc_diff, start_diff;
364
+ doc_diff = s1->doc(s1) - s2->doc(s2);
365
+ if (doc_diff == 0) {
366
+ start_diff = s1->start(s1) - s2->start(s2);
367
+ if (start_diff == 0) {
368
+ return s1->end(s1) < s2->end(s2);
369
+ } else {
370
+ return start_diff < 0;
371
+ }
372
+ } else {
373
+ return doc_diff < 0;
374
+ }
375
+ }
376
+
377
+ bool spanoe_next(SpanEnum *self)
378
+ {
379
+ SpanOrEnum *soe = (SpanOrEnum *)self->data;
380
+ SpanEnum *se;
381
+ int i;
382
+
383
+ if (soe->first_time) { /* first time -- initialize */
384
+ for (i = 0; i < soe->s_cnt; i++) {
385
+ se = soe->span_enums[i];
386
+ if (se->next(se)) /* move to first entry */
387
+ pq_push(soe->queue, se);
388
+ }
389
+ soe->first_time = false;
390
+ return soe->queue->count != 0;
391
+ }
392
+
393
+ if (soe->queue->count == 0) return false; /* all done */
394
+
395
+ se = (SpanEnum *)pq_top(soe->queue);
396
+ if (se->next(se)) { /* move to next */
397
+ pq_down(soe->queue);
398
+ return true;
399
+ }
400
+
401
+ pq_pop(soe->queue); /* exhausted a clause */
402
+
403
+ return soe->queue->count != 0;
404
+ }
405
+
406
+ bool spanoe_skip_to(SpanEnum *self, int target)
407
+ {
408
+ SpanOrEnum *soe = (SpanOrEnum *)self->data;
409
+ SpanEnum *se;
410
+ int i;
411
+
412
+ if (soe->first_time) { /* first time -- initialize */
413
+ for (i = 0; i < soe->s_cnt; i++) {
414
+ se = soe->span_enums[i];
415
+ if (se->skip_to(se, target)) /* move to target */
416
+ pq_push(soe->queue, se);
417
+ }
418
+ soe->first_time = false;
419
+ } else {
420
+ while ((soe->queue->count != 0) &&
421
+ ((se=(SpanEnum *)pq_top(soe->queue))->doc(se) < target)) {
422
+ if (se->skip_to(se, target)) {
423
+ pq_down(soe->queue);
424
+ } else {
425
+ pq_pop(soe->queue);
426
+ }
427
+ }
428
+ }
429
+
430
+ return soe->queue->count != 0;
431
+ }
432
+
433
+ #define GET_TOP_SOE SpanOrEnum *soe = (SpanOrEnum *)self->data;\
434
+ SpanEnum *se = (SpanEnum *)pq_top(soe->queue)
435
+ int spanoe_doc(SpanEnum *self)
436
+ {
437
+ GET_TOP_SOE;
438
+ return se->doc(se);
439
+ }
440
+
441
+ int spanoe_start(SpanEnum *self)
442
+ {
443
+ GET_TOP_SOE;
444
+ return se->start(se);
445
+ }
446
+
447
+ int spanoe_end(SpanEnum *self)
448
+ {
449
+ GET_TOP_SOE;
450
+ return se->end(se);
451
+ }
452
+
453
+ char *spanoe_to_s(SpanEnum *self)
454
+ {
455
+ SpanOrEnum *soe = (SpanOrEnum *)self->data;
456
+ char *field = ((SpanQuery *)self->query->data)->field;
457
+ char *query_str = self->query->to_s(self->query, field);
458
+ char doc_str[62];
459
+ int len = strlen(query_str);
460
+ char *str = ALLOC_N(char, len + 80);
461
+
462
+ if (soe->first_time) {
463
+ sprintf(doc_str, "START");
464
+ } else {
465
+ if (soe->queue->count == 0) {
466
+ sprintf(doc_str, "END");
467
+ } else {
468
+ sprintf(doc_str, "%d:%d-%d", self->doc(self),
469
+ self->start(self), self->end(self));
470
+ }
471
+ }
472
+ sprintf("SpanOrEnum(%s)@%s", query_str, doc_str);
473
+ free(query_str);
474
+ return str;
475
+ }
476
+
477
+ void spanoe_destroy(void *p)
478
+ {
479
+ SpanEnum *self = (SpanEnum *)p, *se;
480
+ SpanOrEnum *soe = (SpanOrEnum *)self->data;
481
+ int i;
482
+ pq_destroy(soe->queue);
483
+ for (i = 0; i < soe->s_cnt; i++) {
484
+ se = soe->span_enums[i];
485
+ se->destroy(se);
486
+ }
487
+ free(soe->span_enums);
488
+ free(soe);
489
+ free(self);
490
+ }
491
+
492
+ SpanEnum *spanoe_create(Query *query, IndexReader *ir)
493
+ {
494
+ Query *clause;
495
+ SpanEnum *self = ALLOC(SpanEnum);
496
+ SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)query->data)->data;
497
+ SpanOrEnum *soe = ALLOC(SpanOrEnum);
498
+ int i;
499
+ soe->first_time = true;
500
+ soe->s_cnt = soq->c_cnt;
501
+ soe->span_enums = ALLOC_N(SpanEnum *, soe->s_cnt);
502
+ for (i = 0; i < soe->s_cnt; i++) {
503
+ clause = soq->clauses[i];
504
+ soe->span_enums[i] = ((SpanQuery *)clause->data)->get_spans(clause, ir);
505
+ }
506
+
507
+ soe->queue = pq_create(soe->s_cnt, &span_less_than);
508
+
509
+ self->data = soe;
510
+
511
+ self->query = query;
512
+ self->next = &spanoe_next;
513
+ self->skip_to = &spanoe_skip_to;
514
+ self->doc = &spanoe_doc;
515
+ self->start = &spanoe_start;
516
+ self->end = &spanoe_end;
517
+ self->destroy = &spanoe_destroy;
518
+ self->to_s = &spanoe_to_s;
519
+
520
+ return self;
521
+ }
522
+
523
+ /*****************************************************************************
524
+ *
525
+ * SpanNearEnum
526
+ *
527
+ *****************************************************************************/
528
+
529
+ #define SNE_NEXT() do {\
530
+ sne->current = (sne->current+1) % sne->s_cnt;\
531
+ se = sne->span_enums[sne->current];\
532
+ } while (0);
533
+
534
+ bool sne_init(SpanNearEnum *sne)
535
+ {
536
+ SpanEnum *se = sne->span_enums[sne->current];
537
+ int prev_doc = se->doc(se);
538
+ int i;
539
+
540
+ for (i = 1; i < sne->s_cnt; i++) {
541
+ SNE_NEXT();
542
+ if (!se->skip_to(se, prev_doc)) return false;
543
+ prev_doc = se->doc(se);
544
+ }
545
+ return true;
546
+ }
547
+
548
+ bool sne_goto_next_doc(SpanNearEnum *sne)
549
+ {
550
+ SpanEnum *se = sne->span_enums[sne->current];
551
+ int prev_doc = se->doc(se);
552
+
553
+ SNE_NEXT();
554
+
555
+ while (se->doc(se) < prev_doc) {
556
+ if (! se->skip_to(se, prev_doc)) return false;
557
+ prev_doc = se->doc(se);
558
+ SNE_NEXT();
559
+ }
560
+ return true;
561
+ }
562
+
563
+ bool sne_next_unordered_match(SpanEnum *self)
564
+ {
565
+ SpanNearEnum *sne = (SpanNearEnum *)self->data;
566
+ SpanEnum *se, *min_se = NULL;
567
+ int i;
568
+ int max_end, end, min_start, start, doc;
569
+ int lengths_sum;
570
+
571
+ while (true) {
572
+
573
+ max_end = 0;
574
+ min_start = INT_MAX;
575
+ lengths_sum = 0;
576
+
577
+ for (i = 0; i < sne->s_cnt; i++) {
578
+ se = sne->span_enums[i];
579
+ if ((end=se->end(se)) > max_end) max_end = end;
580
+ if ((start=se->start(se)) < min_start) {
581
+ min_start = start;
582
+ min_se = se;
583
+ sne->current = i; /* current should point to the minimum span */
584
+ }
585
+ lengths_sum += end - start;
586
+ }
587
+
588
+ if ((max_end - min_start - lengths_sum) <= sne->slop) {
589
+ /* we have a match */
590
+ sne->start = min_start;
591
+ sne->end = max_end;
592
+ sne->doc = min_se->doc(min_se);
593
+ return true;
594
+ }
595
+
596
+ /* increment the minimum span_enum and try again */
597
+ doc = min_se->doc(min_se);
598
+ if (!min_se->next(min_se)) return false;
599
+ if (doc < min_se->doc(min_se)) {
600
+ if (!sne_goto_next_doc(sne)) return false;
601
+ }
602
+ }
603
+ }
604
+
605
+ bool sne_next_ordered_match(SpanEnum *self)
606
+ {
607
+ SpanNearEnum *sne = (SpanNearEnum *)self->data;
608
+ SpanEnum *se;
609
+ int i;
610
+ int prev_doc, prev_start, prev_end;
611
+ int doc=0, start=0, end=0;
612
+ int lengths_sum;
613
+
614
+ while (true) {
615
+ se = sne->span_enums[0];
616
+
617
+ prev_doc = se->doc(se);
618
+ sne->start = prev_start = se->start(se);
619
+ prev_end = se->end(se);
620
+
621
+ i = 1;
622
+ lengths_sum = prev_end - prev_start;
623
+
624
+ while (i < sne->s_cnt) {
625
+ se = sne->span_enums[i];
626
+ doc = se->doc(se);
627
+ start = se->start(se);
628
+ end = se->end(se);
629
+ while ((doc == prev_doc) && ((start < prev_start) ||
630
+ ((start == prev_start) && (end < prev_end)))) {
631
+ if (!se->next(se)) return false;
632
+ doc = se->doc(se);
633
+ start = se->start(se);
634
+ end = se->end(se);
635
+ }
636
+ if (doc != prev_doc) {
637
+ sne->current = i;
638
+ if (!sne_goto_next_doc(sne)) return false;
639
+ break;
640
+ }
641
+ i++;
642
+ lengths_sum += end - start;
643
+ prev_doc = doc;
644
+ prev_start = start;
645
+ prev_end = end;
646
+ }
647
+ if (i == sne->s_cnt) {
648
+ if ((end - sne->start - lengths_sum) <= sne->slop) {
649
+ /* we have a match */
650
+ sne->end = end;
651
+ sne->doc = doc;
652
+
653
+ /* the minimum span is always the first span so it needs to be
654
+ * incremented next time around */
655
+ sne->current = 0;
656
+ return true;
657
+
658
+ } else {
659
+ se = sne->span_enums[0];
660
+ if (!se->next(se)) return false;
661
+ if (se->doc(se) != prev_doc) {
662
+ sne->current = 0;
663
+ if (!sne_goto_next_doc(sne)) return false;
664
+ }
665
+ }
666
+ }
667
+ }
668
+ }
669
+
670
+ bool sne_next_match(SpanEnum *self)
671
+ {
672
+ SpanNearEnum *sne = (SpanNearEnum *)self->data;
673
+ SpanEnum *se_curr, *se_next;
674
+
675
+ if (!sne->first_time) {
676
+ if (!sne_init(sne)) return false;
677
+ sne->first_time = false;
678
+ }
679
+ se_curr = sne->span_enums[sne->current];
680
+ se_next = sne->span_enums[(sne->current+1)%sne->s_cnt];
681
+ if (se_curr->doc(se_curr) > se_next->doc(se_next)) {
682
+ if (!sne_goto_next_doc(sne)) return false;
683
+ }
684
+
685
+ if (sne->in_order) {
686
+ return sne_next_ordered_match(self);
687
+ } else {
688
+ return sne_next_unordered_match(self);
689
+ }
690
+ }
691
+
692
+ bool spanne_next(SpanEnum *self)
693
+ {
694
+ SpanNearEnum *sne = (SpanNearEnum *)self->data;
695
+ SpanEnum *se;
696
+
697
+ se = sne->span_enums[sne->current];
698
+ if (!se->next(se)) return false;
699
+
700
+ return sne_next_match(self);
701
+ }
702
+
703
+ bool spanne_skip_to(SpanEnum *self, int target)
704
+ {
705
+ SpanNearEnum *sne = (SpanNearEnum *)self->data;
706
+ SpanEnum *se;
707
+
708
+ se = sne->span_enums[sne->current];
709
+ if (!se->skip_to(se, target)) return false;
710
+
711
+ return sne_next_match(self);
712
+ }
713
+
714
+ #define GET_TOP_SNE SpanNearEnum *sne = (SpanNearEnum *)self->data;
715
+
716
+ int spanne_doc(SpanEnum *self)
717
+ {
718
+ GET_TOP_SNE;
719
+ return sne->doc;
720
+ }
721
+
722
+ int spanne_start(SpanEnum *self)
723
+ {
724
+ GET_TOP_SNE;
725
+ return sne->start;
726
+ }
727
+
728
+ int spanne_end(SpanEnum *self)
729
+ {
730
+ GET_TOP_SNE;
731
+ return sne->end;
732
+ }
733
+
734
+ char *spanne_to_s(SpanEnum *self)
735
+ {
736
+ SpanNearEnum *sne = (SpanNearEnum *)self->data;
737
+ char *field = ((SpanQuery *)self->query->data)->field;
738
+ char *query_str = self->query->to_s(self->query, field);
739
+ char doc_str[62];
740
+ int len = strlen(query_str);
741
+ char *str = ALLOC_N(char, len + 80);
742
+
743
+ if (sne->first_time) {
744
+ sprintf(doc_str, "START");
745
+ } else {
746
+ sprintf(doc_str, "%d:%d-%d", self->doc(self),
747
+ self->start(self), self->end(self));
748
+ }
749
+ sprintf("SpanNearEnum(%s)@%s", query_str, doc_str);
750
+ free(query_str);
751
+ return str;
752
+ }
753
+
754
+ void spanne_destroy(void *p)
755
+ {
756
+ SpanEnum *self = (SpanEnum *)p, *se;
757
+ SpanNearEnum *sne = (SpanNearEnum *)self->data;
758
+ int i;
759
+ for (i = 0; i < sne->s_cnt; i++) {
760
+ se = sne->span_enums[i];
761
+ se->destroy(se);
762
+ }
763
+ free(sne->span_enums);
764
+ free(sne);
765
+ free(self);
766
+ }
767
+
768
+ SpanEnum *spanne_create(Query *query, IndexReader *ir)
769
+ {
770
+ Query *clause;
771
+ SpanEnum *self = ALLOC(SpanEnum);
772
+ SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)query->data)->data;
773
+ SpanNearEnum *sne = ALLOC(SpanNearEnum);
774
+ int i;
775
+ sne->first_time = true;
776
+ sne->in_order = snq->in_order;
777
+ sne->slop = snq->slop;
778
+ sne->s_cnt = snq->c_cnt;
779
+ sne->span_enums = ALLOC_N(SpanEnum *, sne->s_cnt);
780
+ for (i = 0; i < sne->s_cnt; i++) {
781
+ clause = snq->clauses[i];
782
+ sne->span_enums[i] = ((SpanQuery *)clause->data)->get_spans(clause, ir);
783
+ }
784
+ sne->current = 0;
785
+
786
+ sne->doc = -1;
787
+ sne->start = -1;
788
+ sne->end = -1;
789
+
790
+ self->data = sne;
791
+
792
+ self->query = query;
793
+ self->next = &spanne_next;
794
+ self->skip_to = &spanne_skip_to;
795
+ self->doc = &spanne_doc;
796
+ self->start = &spanne_start;
797
+ self->end = &spanne_end;
798
+ self->destroy = &spanne_destroy;
799
+ self->to_s = &spanne_to_s;
800
+
801
+ return self;
802
+ }
803
+
804
+ /*****************************************************************************
805
+ *
806
+ * SpanNotEnum
807
+ *
808
+ *****************************************************************************/
809
+
810
+ bool spanxe_next(SpanEnum *self)
811
+ {
812
+ SpanNotEnum *sxe = (SpanNotEnum *)(self->data);
813
+ SpanEnum *inc = sxe->inc, *exc = sxe->exc;
814
+ if (sxe->more_inc) { // move to next incl
815
+ sxe->more_inc = inc->next(inc);
816
+ }
817
+
818
+ while (sxe->more_inc && sxe->more_exc) {
819
+ if (inc->doc(inc) > exc->doc(exc)) { // skip excl
820
+ sxe->more_exc = exc->skip_to(exc, inc->doc(inc));
821
+ }
822
+
823
+ while (sxe->more_exc && // while excl is before
824
+ (inc->doc(inc) == exc->doc(exc)) &&
825
+ (exc->end(exc) <= inc->start(inc))) {
826
+ sxe->more_exc = exc->next(exc); // increment excl
827
+ }
828
+
829
+ if (! sxe->more_exc || // if no intersection
830
+ (inc->doc(inc) != exc->doc(exc)) ||
831
+ inc->end(inc) <= exc->start(exc)) {
832
+ break; // we found a match
833
+ }
834
+
835
+ sxe->more_inc = inc->next(inc); // intersected: keep scanning
836
+ }
837
+ return sxe->more_inc;
838
+ }
839
+
840
+ bool spanxe_skip_to(SpanEnum *self, int target)
841
+ {
842
+ SpanNotEnum *sxe = (SpanNotEnum *)(self->data);
843
+ SpanEnum *inc = sxe->inc, *exc = sxe->exc;
844
+ int doc;
845
+
846
+ if (sxe->more_inc) { // move to next incl
847
+ if (!(sxe->more_inc=sxe->inc->skip_to(sxe->inc, target))) return false;
848
+ }
849
+
850
+ if (sxe->more_inc && ((doc=inc->doc(inc)) > exc->doc(exc))) {
851
+ sxe->more_exc = exc->skip_to(exc, doc);
852
+ }
853
+
854
+ while (sxe->more_exc && // while excl is before
855
+ inc->doc(inc) == exc->doc(exc) &&
856
+ exc->end(exc) <= inc->start(inc)) {
857
+ sxe->more_exc = exc->next(exc); // increment excl
858
+ }
859
+
860
+ if (!sxe->more_exc || // if no intersection
861
+ inc->doc(inc) != exc->doc(exc) ||
862
+ inc->end(inc) <= exc->start(exc)) {
863
+ return true; // we found a match
864
+ }
865
+
866
+ return spanxe_next(self); // scan to next match
867
+ }
868
+
869
+ int spanxe_doc(SpanEnum *self)
870
+ {
871
+ SpanEnum *inc = ((SpanNotEnum *)(self->data))->inc;
872
+ return inc->doc(inc);
873
+ }
874
+
875
+ int spanxe_start(SpanEnum *self)
876
+ {
877
+ SpanEnum *inc = ((SpanNotEnum *)(self->data))->inc;
878
+ return inc->start(inc);
879
+ }
880
+
881
+ int spanxe_end(SpanEnum *self)
882
+ {
883
+ SpanEnum *inc = ((SpanNotEnum *)(self->data))->inc;
884
+ return inc->end(inc);
885
+ }
886
+
887
+ char *spanxe_to_s(SpanEnum *self)
888
+ {
889
+ char *field = ((SpanQuery *)self->query->data)->field;
890
+ char *query_str = self->query->to_s(self->query, field);
891
+ char *res = epstrdup("SpanNotEnum(%s)", strlen(query_str), query_str);
892
+ free(query_str);
893
+ return res;
894
+ }
895
+
896
+ void spanxe_destroy(void *p)
897
+ {
898
+ SpanEnum *self = (SpanEnum *)p;
899
+ SpanNotEnum *sxe = (SpanNotEnum *)self->data;
900
+ sxe->inc->destroy(sxe->inc);
901
+ sxe->exc->destroy(sxe->exc);
902
+ free(sxe);
903
+ free(self);
904
+ }
905
+
906
+ SpanEnum *spanxe_create(Query *query, IndexReader *ir)
907
+ {
908
+ SpanEnum *self = ALLOC(SpanEnum);
909
+ SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)query->data)->data;
910
+
911
+ SpanNotEnum *sxe = self->data = ALLOC(SpanNotEnum);
912
+ sxe->inc = ((SpanQuery *)sxq->inc->data)->get_spans(sxq->inc, ir);
913
+ sxe->exc = ((SpanQuery *)sxq->exc->data)->get_spans(sxq->exc, ir);
914
+ sxe->more_inc = true;
915
+ sxe->more_exc = sxe->exc->next(sxe->exc);
916
+
917
+ self->query = query;
918
+ self->next = &spanxe_next;
919
+ self->skip_to = &spanxe_skip_to;
920
+ self->doc = &spanxe_doc;
921
+ self->start = &spanxe_start;
922
+ self->end = &spanxe_end;
923
+ self->destroy = &spanxe_destroy;
924
+ self->to_s = &spanxe_to_s;
925
+
926
+ return self;
927
+ }
928
+ /*****************************************************************************
929
+ *
930
+ * SpanQuery
931
+ *
932
+ *****************************************************************************/
933
+
934
+ void spanq_destroy(void *p)
935
+ {
936
+ Query *self = (Query *)p;
937
+ SpanQuery *sq = (SpanQuery *)self->data;
938
+ free(sq);
939
+ q_destroy(self);
940
+ }
941
+
942
+ /*****************************************************************************
943
+ *
944
+ * SpanTermQuery
945
+ *
946
+ *****************************************************************************/
947
+
948
+ char *spantq_to_s(Query *self, char *field)
949
+ {
950
+ Term *term = (Term *)((SpanQuery *)self->data)->data;
951
+ char *term_str, *res;
952
+ if (field == term->field) {
953
+ term_str = estrdup(term->text);
954
+ } else {
955
+ term_str = term_to_s(term);
956
+ }
957
+ res = epstrdup("span_term(%s)", strlen(term_str), term_str);
958
+ free(term_str);
959
+ return res;
960
+ }
961
+
962
+ void spantq_destroy(void *p)
963
+ {
964
+ Query *self = (Query *)p;
965
+ SpanQuery *sq = (SpanQuery *)self->data;
966
+ if (self->destroy_all) {
967
+ Term *term = (Term *)sq->data;
968
+ term_destroy(term);
969
+ }
970
+ free(sq);
971
+ q_destroy(self);
972
+ }
973
+
974
+ void spantq_extract_terms(Query *self, Array *terms)
975
+ {
976
+ Term *term = (Term *)((SpanQuery *)self->data)->data;
977
+ ary_append(terms, term);
978
+ }
979
+
980
+ Array *spantq_get_terms(Query *self)
981
+ {
982
+ Term *term = (Term *)((SpanQuery *)self->data)->data;
983
+ Array *terms = ary_create(1, &term_destroy);
984
+ ary_append(terms, term_clone(term));
985
+ return terms;
986
+ }
987
+
988
+ Query *spantq_create(Term *term)
989
+ {
990
+ Query *self = q_create();
991
+ SpanQuery *sq = ALLOC(SpanQuery);
992
+ sq->data = term;
993
+
994
+ sq->get_spans = &spante_create;
995
+ sq->get_terms = &spantq_get_terms;
996
+ sq->field = term->field;
997
+
998
+ self->type = SPAN_TERM_QUERY;
999
+ self->data = sq;
1000
+ self->create_weight = &spanw_create;
1001
+ self->extract_terms = &spantq_extract_terms;
1002
+ self->to_s = &spantq_to_s;
1003
+ self->destroy = &spantq_destroy;
1004
+ return self;
1005
+ }
1006
+
1007
+ /*****************************************************************************
1008
+ *
1009
+ * SpanFirstQuery
1010
+ *
1011
+ *****************************************************************************/
1012
+
1013
+ char *spanfq_to_s(Query *self, char *field)
1014
+ {
1015
+ SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1016
+ Query *match = sfq->match;
1017
+ char *q_str = match->to_s(match, field);
1018
+ char *res = epstrdup("span_first(%s, %d)", strlen(q_str) + 20, q_str, sfq->end);
1019
+ free(q_str);
1020
+ return res;
1021
+ }
1022
+
1023
+ void spanfq_extract_terms(Query *self, Array *terms)
1024
+ {
1025
+ SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1026
+ sfq->match->extract_terms(sfq->match, terms);
1027
+ }
1028
+
1029
+ Array *spanfq_get_terms(Query *self)
1030
+ {
1031
+ SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1032
+ SpanQuery *match_sq = (SpanQuery *)sfq->match->data;
1033
+ return match_sq->get_terms(sfq->match);
1034
+ }
1035
+
1036
+ Query *spanfq_rewrite(Query *self, IndexReader *ir)
1037
+ {
1038
+ SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1039
+ Query *clone = NULL;
1040
+ Query *rewritten = sfq->match->rewrite(sfq->match, ir);
1041
+ if (rewritten != sfq->match) {
1042
+ clone = spanfq_create(rewritten, sfq->end);
1043
+ sfq->match->rewritten = NULL; /* it will get destroyed with the clone */
1044
+ }
1045
+
1046
+ if (clone != NULL) {
1047
+ if (self->rewritten) self->rewritten->destroy(self->rewritten);
1048
+ return self->rewritten = clone; /* some clauses rewrote */
1049
+ } else {
1050
+ return self; /* no clauses rewrote */
1051
+ }
1052
+ }
1053
+
1054
+ void spanfq_destroy(void *p)
1055
+ {
1056
+ Query *self = (Query *)p;
1057
+ SpanQuery *sq = (SpanQuery *)self->data;
1058
+ SpanFirstQuery *sfq = (SpanFirstQuery *)sq->data;
1059
+ if (self->destroy_all) sfq->match->destroy(sfq->match);
1060
+ free(sfq);
1061
+ free(sq);
1062
+ q_destroy(self);
1063
+ }
1064
+
1065
+ Query *spanfq_create(Query *match, int end)
1066
+ {
1067
+ Query *self = q_create();
1068
+ SpanQuery *sq = ALLOC(SpanQuery);
1069
+ SpanFirstQuery *sfq = ALLOC(SpanFirstQuery);
1070
+ sfq->match = match;
1071
+ sfq->end = end;
1072
+ sq->data = sfq;
1073
+
1074
+ sq->get_spans = &spanfe_create;
1075
+ sq->get_terms = &spanfq_get_terms;
1076
+ sq->field = ((SpanQuery *)match->data)->field;
1077
+
1078
+ self->type = SPAN_FIRST_QUERY;
1079
+ self->data = sq;
1080
+ self->create_weight = &spanw_create;
1081
+ self->extract_terms = &spanfq_extract_terms;
1082
+ self->rewrite = &spanfq_rewrite;
1083
+ self->to_s = &spanfq_to_s;
1084
+ self->destroy = &spanfq_destroy;
1085
+ return self;
1086
+ }
1087
+
1088
+ /*****************************************************************************
1089
+ *
1090
+ * SpanOrQuery
1091
+ *
1092
+ *****************************************************************************/
1093
+
1094
+ char *spanoq_to_s(Query *self, char *field)
1095
+ {
1096
+ SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1097
+ char *res = estrdup("span_or["), *q_str;
1098
+ Query *clause;
1099
+ int i;
1100
+ for (i = 0; i < soq->c_cnt; i++) {
1101
+ clause = soq->clauses[i];
1102
+ q_str = clause->to_s(clause, field);
1103
+ REALLOC_N(res, char, strlen(res) + strlen(q_str) + 10);
1104
+ if (i > 0) strcat(res, ", ");
1105
+ strcat(res, q_str);
1106
+ free(q_str);
1107
+ }
1108
+ strcat(res, "]");
1109
+
1110
+ return res;
1111
+ }
1112
+
1113
+ void spanoq_extract_terms(Query *self, Array *terms)
1114
+ {
1115
+ SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1116
+ Query *clause;
1117
+ int i;
1118
+ for (i = 0; i < soq->c_cnt; i++) {
1119
+ clause = soq->clauses[i];
1120
+ clause->extract_terms(clause, terms);
1121
+ }
1122
+ }
1123
+
1124
+ Array *spanoq_get_terms(Query *self)
1125
+ {
1126
+ SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1127
+ Array *terms = ary_create(soq->c_cnt, NULL);
1128
+ Query *clause;
1129
+ int i;
1130
+ for (i = 0; i < soq->c_cnt; i++) {
1131
+ clause = soq->clauses[i];
1132
+ clause->extract_terms(clause, terms);
1133
+ }
1134
+
1135
+ return terms;
1136
+ }
1137
+
1138
+ SpanEnum *spanoq_get_spans(Query *self, IndexReader *ir)
1139
+ {
1140
+ SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1141
+ Query *q;
1142
+ if (soq->c_cnt == 1) {
1143
+ q = soq->clauses[0];
1144
+ return ((SpanQuery *)q->data)->get_spans(q, ir);
1145
+ }
1146
+
1147
+ return spanoe_create(self, ir);
1148
+ }
1149
+
1150
+ Query *spanoq_rewrite(Query *self, IndexReader *ir)
1151
+ {
1152
+ SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1153
+ Query *clone = NULL;
1154
+
1155
+ Query *clause, *rewritten;
1156
+ Query **new_clauses = ALLOC_N(Query *, soq->c_cnt);
1157
+ int i;
1158
+ for (i = 0; i < soq->c_cnt; i++) {
1159
+ clause = soq->clauses[i];
1160
+ rewritten = clause->rewrite(clause, ir);
1161
+ if ((clause != rewritten) && (clone == NULL)) {
1162
+ clone = spanoq_create(new_clauses, soq->c_cnt);
1163
+ /* The sub-clauses will be handled by the original query */
1164
+ clone->destroy_all = false;
1165
+ }
1166
+ new_clauses[i] = rewritten;
1167
+ }
1168
+
1169
+ if (clone != NULL) {
1170
+ if (self->rewritten) self->rewritten->destroy(self->rewritten);
1171
+ return self->rewritten = clone; /* some clauses rewrote */
1172
+ } else {
1173
+ free(new_clauses); /* no clauses rewrote */
1174
+ return self;
1175
+ }
1176
+ }
1177
+
1178
+ void spanoq_destroy(void *p)
1179
+ {
1180
+ Query *self = (Query *)p;
1181
+ SpanQuery *sq = (SpanQuery *)self->data;
1182
+ SpanOrQuery *soq = (SpanOrQuery *)sq->data;
1183
+
1184
+ if (self->destroy_all) {
1185
+ Query *clause;
1186
+ int i;
1187
+ for (i = 0; i < soq->c_cnt; i++) {
1188
+ clause = soq->clauses[i];
1189
+ clause->destroy(clause);
1190
+ }
1191
+ free(soq->clauses);
1192
+ }
1193
+
1194
+
1195
+ free(soq);
1196
+ free(sq);
1197
+ q_destroy(self);
1198
+ }
1199
+
1200
+ Query *spanoq_create(Query **clauses, int c_cnt)
1201
+ {
1202
+ Query *self = q_create();
1203
+ SpanQuery *sq = ALLOC(SpanQuery);
1204
+ SpanOrQuery *soq = ALLOC(SpanOrQuery);
1205
+ soq->clauses = clauses;
1206
+ soq->c_cnt = c_cnt;
1207
+ sq->data = soq;
1208
+
1209
+ sq->get_spans = &spanoq_get_spans;
1210
+ sq->get_terms = &spanoq_get_terms;
1211
+ sq->field = ((SpanQuery *)clauses[0]->data)->field;
1212
+
1213
+ self->type = SPAN_OR_QUERY;
1214
+ self->data = sq;
1215
+ self->create_weight = &spanw_create;
1216
+ self->extract_terms = &spanoq_extract_terms;
1217
+ self->rewrite = &spanoq_rewrite;
1218
+ self->to_s = &spanoq_to_s;
1219
+ self->destroy = &spanoq_destroy;
1220
+ return self;
1221
+ }
1222
+
1223
+ /*****************************************************************************
1224
+ *
1225
+ * SpanNearQuery
1226
+ *
1227
+ *****************************************************************************/
1228
+
1229
+ char *spannq_to_s(Query *self, char *field)
1230
+ {
1231
+ SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1232
+ char *res = estrdup("span_near(["), *q_str;
1233
+ Query *clause;
1234
+ int i;
1235
+ for (i = 0; i < snq->c_cnt; i++) {
1236
+ clause = snq->clauses[i];
1237
+ q_str = clause->to_s(clause, field);
1238
+ REALLOC_N(res, char, strlen(res) + strlen(q_str) + 10);
1239
+ if (i > 0) strcat(res, ", ");
1240
+ strcat(res, q_str);
1241
+ free(q_str);
1242
+ }
1243
+ REALLOC_N(res, char, strlen(res) + 40);
1244
+ sprintf(res + strlen(res), "], %d, %s)", snq->slop,
1245
+ snq->in_order ? "Ordered" : "Unordered");
1246
+
1247
+ return res;
1248
+ }
1249
+
1250
+ void spannq_extract_terms(Query *self, Array *terms)
1251
+ {
1252
+ SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1253
+ Query *clause;
1254
+ int i;
1255
+ for (i = 0; i < snq->c_cnt; i++) {
1256
+ clause = snq->clauses[i];
1257
+ clause->extract_terms(clause, terms);
1258
+ }
1259
+ }
1260
+
1261
+ Array *spannq_get_terms(Query *self)
1262
+ {
1263
+ SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1264
+ Array *terms = ary_create(snq->c_cnt, NULL);
1265
+ Query *clause;
1266
+ int i;
1267
+ for (i = 0; i < snq->c_cnt; i++) {
1268
+ clause = snq->clauses[i];
1269
+ clause->extract_terms(clause, terms);
1270
+ }
1271
+
1272
+ return terms;
1273
+ }
1274
+
1275
+ SpanEnum *spannq_get_spans(Query *self, IndexReader *ir)
1276
+ {
1277
+ SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1278
+ Query *q;
1279
+
1280
+ if (snq->c_cnt == 1) {
1281
+ q = snq->clauses[0];
1282
+ return ((SpanQuery *)q->data)->get_spans(q, ir);
1283
+ }
1284
+
1285
+ return spanne_create(self, ir);
1286
+ }
1287
+
1288
+ Query *spannq_rewrite(Query *self, IndexReader *ir)
1289
+ {
1290
+ SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1291
+ Query *clone = NULL;
1292
+
1293
+ Query *clause, *rewritten;
1294
+ Query **new_clauses = ALLOC_N(Query *, snq->c_cnt);
1295
+ int i;
1296
+ for (i = 0; i < snq->c_cnt; i++) {
1297
+ clause = snq->clauses[i];
1298
+ rewritten = clause->rewrite(clause, ir);
1299
+ if ((clause != rewritten) && (clone == NULL)) {
1300
+ clone = spannq_create(new_clauses, snq->c_cnt, snq->slop, snq->in_order);
1301
+ /* The sub-clauses will be handled by the original query */
1302
+ clone->destroy_all = false;
1303
+ }
1304
+ new_clauses[i] = rewritten;
1305
+ }
1306
+
1307
+ if (clone != NULL) {
1308
+ if (self->rewritten) self->rewritten->destroy(self->rewritten);
1309
+ return self->rewritten = clone; /* some clauses rewrote */
1310
+ } else {
1311
+ free(new_clauses); /* no clauses rewrote */
1312
+ return self;
1313
+ }
1314
+ }
1315
+
1316
+ void spannq_destroy(void *p)
1317
+ {
1318
+ Query *self = (Query *)p;
1319
+ SpanQuery *sq = (SpanQuery *)self->data;
1320
+ SpanNearQuery *snq = (SpanNearQuery *)sq->data;
1321
+
1322
+ if (self->destroy_all) {
1323
+ Query *clause;
1324
+ int i;
1325
+ for (i = 0; i < snq->c_cnt; i++) {
1326
+ clause = snq->clauses[i];
1327
+ clause->destroy(clause);
1328
+ }
1329
+ free(snq->clauses);
1330
+ }
1331
+
1332
+
1333
+ free(snq);
1334
+ free(sq);
1335
+ q_destroy(self);
1336
+ }
1337
+
1338
+ Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order)
1339
+ {
1340
+ Query *self = q_create();
1341
+ SpanQuery *sq = ALLOC(SpanQuery);
1342
+ SpanNearQuery *snq = ALLOC(SpanNearQuery);
1343
+ snq->clauses = clauses;
1344
+ snq->c_cnt = c_cnt;
1345
+ snq->slop = slop;
1346
+ snq->in_order = in_order;
1347
+ sq->data = snq;
1348
+
1349
+ sq->get_spans = &spannq_get_spans;
1350
+ sq->get_terms = &spannq_get_terms;
1351
+
1352
+ sq->field = ((SpanQuery *)clauses[0]->data)->field;
1353
+
1354
+ self->type = SPAN_NEAR_QUERY;
1355
+ self->data = sq;
1356
+ self->create_weight = &spanw_create;
1357
+ self->extract_terms = &spannq_extract_terms;
1358
+ self->rewrite = &spannq_rewrite;
1359
+ self->to_s = &spannq_to_s;
1360
+ self->destroy = &spannq_destroy;
1361
+ return self;
1362
+ }
1363
+
1364
+ /*****************************************************************************
1365
+ *
1366
+ * SpanNotQuery
1367
+ *
1368
+ *****************************************************************************/
1369
+
1370
+ char *spanxq_to_s(Query *self, char *field)
1371
+ {
1372
+ SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1373
+ char *inc_s = sxq->inc->to_s(sxq->inc, field);
1374
+ char *exc_s = sxq->exc->to_s(sxq->exc, field);
1375
+ char *res = epstrdup("span_not(inc:<%s>, exc:<%s>)",
1376
+ strlen(inc_s) + strlen(exc_s), inc_s, exc_s);
1377
+
1378
+ free(inc_s);
1379
+ free(exc_s);
1380
+ return res;
1381
+ }
1382
+
1383
+ void spanxq_extract_terms(Query *self, Array *terms)
1384
+ {
1385
+ SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1386
+ sxq->inc->extract_terms(sxq->inc, terms);
1387
+ }
1388
+
1389
+ Array *spanxq_get_terms(Query *self)
1390
+ {
1391
+ SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1392
+ Array *terms = ary_create(1, NULL);
1393
+ sxq->inc->extract_terms(sxq->inc, terms);
1394
+ return terms;
1395
+ }
1396
+
1397
+ Query *spanxq_rewrite(Query *self, IndexReader *ir)
1398
+ {
1399
+ SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1400
+ Query *clone = NULL;
1401
+ Query *inc, *exc;
1402
+ Query *inc_rewritten, *exc_rewritten;
1403
+
1404
+ inc = sxq->inc;
1405
+ inc_rewritten = inc->rewrite(inc, ir);
1406
+ exc = sxq->exc;
1407
+ exc_rewritten = exc->rewrite(exc, ir);
1408
+ if ((inc_rewritten != inc) || (exc_rewritten != exc)) {
1409
+ clone = spanxq_create(inc_rewritten, exc_rewritten);
1410
+ /* The sub-clauses will be handled by the original query */
1411
+ clone->destroy_all = false;
1412
+ }
1413
+
1414
+ if (clone != NULL) {
1415
+ if (self->rewritten) self->rewritten->destroy(self->rewritten);
1416
+ return self->rewritten = clone; /* some clauses rewrote */
1417
+ } else {
1418
+ return self; /* no clauses rewrote */
1419
+ }
1420
+ }
1421
+
1422
+ void spanxq_destroy(void *p)
1423
+ {
1424
+ Query *self = (Query *)p;
1425
+ SpanQuery *sq = (SpanQuery *)self->data;
1426
+ SpanNotQuery *sxq = (SpanNotQuery *)sq->data;
1427
+
1428
+ if (self->destroy_all) {
1429
+ sxq->inc->destroy(sxq->inc);
1430
+ sxq->exc->destroy(sxq->exc);
1431
+ }
1432
+
1433
+ free(sxq);
1434
+ free(sq);
1435
+ q_destroy(self);
1436
+ }
1437
+
1438
+ Query *spanxq_create(Query *inc, Query *exc)
1439
+ {
1440
+ Query *self = q_create();
1441
+ SpanQuery *sq = ALLOC(SpanQuery);
1442
+ SpanNotQuery *sxq = ALLOC(SpanNotQuery);
1443
+ sxq->inc = inc;
1444
+ sxq->exc = exc;
1445
+ sq->data = sxq;
1446
+
1447
+ sq->get_spans = &spanxe_create;
1448
+ sq->get_terms = &spanxq_get_terms;
1449
+ sq->field = ((SpanQuery *)inc->data)->field;
1450
+
1451
+ self->type = SPAN_NOT_QUERY;
1452
+ self->data = sq;
1453
+ self->create_weight = &spanw_create;
1454
+ self->extract_terms = &spanxq_extract_terms;
1455
+ self->rewrite = &spanxq_rewrite;
1456
+ self->to_s = &spanxq_to_s;
1457
+ self->destroy = &spanxq_destroy;
1458
+ return self;
1459
+ }
1460
+
1461
+ /***************************************************************************
1462
+ *
1463
+ * SpanScorer
1464
+ *
1465
+ ***************************************************************************/
1466
+
1467
+ float spansc_score(Scorer *self)
1468
+ {
1469
+ SpanScorer *spansc = (SpanScorer *)self->data;
1470
+ float raw = sim_tf(spansc->sim, spansc->freq) * spansc->value;
1471
+
1472
+ /* normalize */
1473
+ return raw * sim_decode_norm(self->similarity, spansc->norms[self->doc]);
1474
+ }
1475
+
1476
+ bool spansc_next(Scorer *self)
1477
+ {
1478
+ SpanScorer *spansc = (SpanScorer *)self->data;
1479
+ SpanEnum *se = spansc->spans;
1480
+ int match_length;
1481
+
1482
+ if (spansc->first_time) {
1483
+ spansc->more = se->next(se);
1484
+ spansc->first_time = false;
1485
+ }
1486
+
1487
+ if (!spansc->more) return false;
1488
+
1489
+ spansc->freq = 0.0;
1490
+ self->doc = se->doc(se);
1491
+
1492
+ while (spansc->more && (self->doc == se->doc(se))) {
1493
+ match_length = se->end(se) - se->start(se);
1494
+ spansc->freq += sim_sloppy_freq(spansc->sim, match_length);
1495
+ spansc->more = se->next(se);
1496
+ }
1497
+
1498
+ return (spansc->more || (spansc->freq != 0.0));
1499
+ }
1500
+
1501
+ bool spansc_skip_to(Scorer *self, int target)
1502
+ {
1503
+ SpanScorer *spansc = (SpanScorer *)self->data;
1504
+ SpanEnum *se = spansc->spans;
1505
+
1506
+ spansc->more = se->skip_to(se, target);
1507
+
1508
+ if (!spansc->more) return false;
1509
+
1510
+ spansc->freq = 0.0;
1511
+ self->doc = se->doc(se);
1512
+
1513
+ while (spansc->more && (se->doc(se) == target)) {
1514
+ spansc->freq += sim_sloppy_freq(spansc->sim, se->end(se) - se->start(se));
1515
+ spansc->more = se->next(se);
1516
+ }
1517
+
1518
+ return (spansc->more || (spansc->freq != 0.0));
1519
+ }
1520
+
1521
+ Explanation *spansc_explain(Scorer *self, int target)
1522
+ {
1523
+ SpanScorer *spansc = (SpanScorer *)self->data;
1524
+ float phrase_freq;
1525
+ self->skip_to(self, target);
1526
+ phrase_freq = (self->doc == target) ? spansc->freq : 0.0;
1527
+
1528
+ Explanation *tf_explanation = expl_create(sim_tf(self->similarity, phrase_freq),
1529
+ epstrdup("tf(phrase_freq(%#.5g)", 32, phrase_freq));
1530
+
1531
+ return tf_explanation;
1532
+ }
1533
+
1534
+ void spansc_destroy(void *p)
1535
+ {
1536
+ Scorer *self = (Scorer *)p;
1537
+ SpanScorer *spansc = (SpanScorer *)self->data;
1538
+ if (spansc->spans) spansc->spans->destroy(spansc->spans);
1539
+ //free(spansc->norms);
1540
+ scorer_destroy(p);
1541
+ }
1542
+
1543
+ Scorer *spansc_create(Weight *weight, IndexReader *ir)
1544
+ {
1545
+ Scorer *self = scorer_create(weight->similarity);
1546
+ SpanScorer *spansc = ALLOC(SpanScorer);
1547
+ SpanQuery *spanq = (SpanQuery *)weight->query->data;
1548
+ ZEROSET(spansc, SpanScorer, 1);
1549
+ spansc->first_time = true;
1550
+ spansc->more = true;
1551
+ spansc->spans = spanq->get_spans(weight->query, ir);
1552
+ spansc->sim = weight->similarity;
1553
+ spansc->norms = ir->get_norms(ir, spanq->field);
1554
+ spansc->weight = weight;
1555
+ spansc->value = weight->value;
1556
+ spansc->freq = 0.0;
1557
+
1558
+ self->data = spansc;
1559
+
1560
+ self->score = &spansc_score;
1561
+ self->next = &spansc_next;
1562
+ self->skip_to = &spansc_skip_to;
1563
+ self->explain = &spansc_explain;
1564
+ self->destroy = &spansc_destroy;
1565
+ return self;
1566
+ }