ferret 0.11.6 → 0.11.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +10 -22
- data/RELEASE_CHANGES +137 -0
- data/RELEASE_NOTES +60 -0
- data/Rakefile +379 -274
- data/TODO +100 -8
- data/bin/ferret-browser +0 -0
- data/ext/BZLIB_blocksort.c +1094 -0
- data/ext/BZLIB_bzlib.c +1578 -0
- data/ext/BZLIB_compress.c +672 -0
- data/ext/BZLIB_crctable.c +104 -0
- data/ext/BZLIB_decompress.c +626 -0
- data/ext/BZLIB_huffman.c +205 -0
- data/ext/BZLIB_randtable.c +84 -0
- data/ext/{api.c → STEMMER_api.c} +7 -10
- data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
- data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
- data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
- data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
- data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
- data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
- data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
- data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
- data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
- data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
- data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
- data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
- data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
- data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
- data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
- data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
- data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
- data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
- data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
- data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
- data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
- data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
- data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
- data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
- data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
- data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
- data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
- data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
- data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
- data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
- data/ext/analysis.c +276 -121
- data/ext/analysis.h +190 -143
- data/ext/api.h +3 -4
- data/ext/array.c +5 -3
- data/ext/array.h +52 -43
- data/ext/bitvector.c +38 -482
- data/ext/bitvector.h +446 -124
- data/ext/bzlib.h +282 -0
- data/ext/bzlib_private.h +503 -0
- data/ext/compound_io.c +23 -22
- data/ext/config.h +21 -11
- data/ext/document.c +43 -40
- data/ext/document.h +31 -21
- data/ext/except.c +20 -38
- data/ext/except.h +89 -76
- data/ext/extconf.rb +3 -2
- data/ext/ferret.c +49 -35
- data/ext/ferret.h +14 -11
- data/ext/field_index.c +262 -0
- data/ext/field_index.h +52 -0
- data/ext/filter.c +11 -10
- data/ext/fs_store.c +65 -47
- data/ext/global.c +245 -165
- data/ext/global.h +252 -54
- data/ext/hash.c +200 -243
- data/ext/hash.h +205 -163
- data/ext/hashset.c +118 -96
- data/ext/hashset.h +110 -82
- data/ext/header.h +19 -19
- data/ext/helper.c +11 -10
- data/ext/helper.h +14 -6
- data/ext/index.c +745 -366
- data/ext/index.h +503 -529
- data/ext/internal.h +1020 -0
- data/ext/lang.c +10 -0
- data/ext/lang.h +35 -15
- data/ext/mempool.c +5 -4
- data/ext/mempool.h +30 -22
- data/ext/modules.h +35 -7
- data/ext/multimapper.c +43 -2
- data/ext/multimapper.h +32 -23
- data/ext/posh.c +0 -0
- data/ext/posh.h +4 -38
- data/ext/priorityqueue.c +10 -12
- data/ext/priorityqueue.h +33 -21
- data/ext/q_boolean.c +22 -9
- data/ext/q_const_score.c +3 -2
- data/ext/q_filtered_query.c +15 -12
- data/ext/q_fuzzy.c +147 -135
- data/ext/q_match_all.c +3 -2
- data/ext/q_multi_term.c +28 -32
- data/ext/q_parser.c +451 -173
- data/ext/q_phrase.c +158 -79
- data/ext/q_prefix.c +16 -18
- data/ext/q_range.c +363 -31
- data/ext/q_span.c +130 -141
- data/ext/q_term.c +21 -21
- data/ext/q_wildcard.c +19 -23
- data/ext/r_analysis.c +369 -242
- data/ext/r_index.c +421 -434
- data/ext/r_qparser.c +142 -92
- data/ext/r_search.c +790 -407
- data/ext/r_store.c +44 -44
- data/ext/r_utils.c +264 -96
- data/ext/ram_store.c +29 -23
- data/ext/scanner.c +895 -0
- data/ext/scanner.h +36 -0
- data/ext/scanner_mb.c +6701 -0
- data/ext/scanner_utf8.c +4415 -0
- data/ext/search.c +210 -87
- data/ext/search.h +556 -488
- data/ext/similarity.c +17 -16
- data/ext/similarity.h +51 -44
- data/ext/sort.c +157 -354
- data/ext/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/stem_UTF_8_hungarian.h +16 -0
- data/ext/stem_UTF_8_romanian.h +16 -0
- data/ext/stem_UTF_8_turkish.h +16 -0
- data/ext/stopwords.c +287 -278
- data/ext/store.c +57 -51
- data/ext/store.h +308 -286
- data/ext/symbol.c +10 -0
- data/ext/symbol.h +23 -0
- data/ext/term_vectors.c +14 -293
- data/ext/threading.h +22 -22
- data/ext/win32.h +12 -4
- data/lib/ferret.rb +2 -1
- data/lib/ferret/browser.rb +1 -1
- data/lib/ferret/field_symbol.rb +94 -0
- data/lib/ferret/index.rb +221 -34
- data/lib/ferret/number_tools.rb +6 -6
- data/lib/ferret/version.rb +3 -0
- data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
- data/test/test_helper.rb +7 -2
- data/test/test_installed.rb +1 -0
- data/test/threading/thread_safety_index_test.rb +10 -1
- data/test/threading/thread_safety_read_write_test.rb +4 -7
- data/test/threading/thread_safety_test.rb +0 -0
- data/test/unit/analysis/tc_analyzer.rb +29 -27
- data/test/unit/analysis/tc_token_stream.rb +23 -16
- data/test/unit/index/tc_index.rb +116 -11
- data/test/unit/index/tc_index_reader.rb +27 -27
- data/test/unit/index/tc_index_writer.rb +10 -0
- data/test/unit/index/th_doc.rb +38 -21
- data/test/unit/search/tc_filter.rb +31 -10
- data/test/unit/search/tc_index_searcher.rb +6 -0
- data/test/unit/search/tm_searcher.rb +53 -1
- data/test/unit/store/tc_fs_store.rb +40 -2
- data/test/unit/store/tc_ram_store.rb +0 -0
- data/test/unit/store/tm_store.rb +0 -0
- data/test/unit/store/tm_store_lock.rb +7 -6
- data/test/unit/tc_field_symbol.rb +26 -0
- data/test/unit/ts_analysis.rb +0 -0
- data/test/unit/ts_index.rb +0 -0
- data/test/unit/ts_store.rb +0 -0
- data/test/unit/ts_utils.rb +0 -0
- data/test/unit/utils/tc_number_tools.rb +0 -0
- data/test/utils/content_generator.rb +226 -0
- metadata +262 -221
- data/ext/inc/lang.h +0 -48
- data/ext/inc/threading.h +0 -31
- data/ext/stem_ISO_8859_1_english.c +0 -1156
- data/ext/stem_ISO_8859_1_french.c +0 -1276
- data/ext/stem_ISO_8859_1_italian.c +0 -1091
- data/ext/stem_ISO_8859_1_norwegian.c +0 -296
- data/ext/stem_ISO_8859_1_spanish.c +0 -1119
- data/ext/stem_ISO_8859_1_swedish.c +0 -307
- data/ext/stem_UTF_8_danish.c +0 -344
- data/ext/stem_UTF_8_english.c +0 -1176
- data/ext/stem_UTF_8_french.c +0 -1296
- data/ext/stem_UTF_8_italian.c +0 -1113
- data/ext/stem_UTF_8_norwegian.c +0 -302
- data/ext/stem_UTF_8_portuguese.c +0 -1055
- data/ext/stem_UTF_8_russian.c +0 -709
- data/ext/stem_UTF_8_spanish.c +0 -1137
- data/ext/stem_UTF_8_swedish.c +0 -313
- data/lib/ferret_version.rb +0 -3
data/ext/q_span.c
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
#include <limits.h>
|
3
3
|
#include "search.h"
|
4
4
|
#include "hashset.h"
|
5
|
+
#include "symbol.h"
|
6
|
+
#include "internal.h"
|
5
7
|
|
6
8
|
#define CLAUSE_INIT_CAPA 4
|
7
9
|
|
@@ -19,12 +21,12 @@
|
|
19
21
|
|
20
22
|
static unsigned long spanq_hash(Query *self)
|
21
23
|
{
|
22
|
-
return
|
24
|
+
return SpQ(self)->field ? sym_hash(SpQ(self)->field) : 0;
|
23
25
|
}
|
24
26
|
|
25
27
|
static int spanq_eq(Query *self, Query *o)
|
26
28
|
{
|
27
|
-
return
|
29
|
+
return SpQ(self)->field == SpQ(o)->field;
|
28
30
|
}
|
29
31
|
|
30
32
|
static void spanq_destroy_i(Query *self)
|
@@ -35,15 +37,14 @@ static void spanq_destroy_i(Query *self)
|
|
35
37
|
static MatchVector *mv_to_term_mv(MatchVector *term_mv, MatchVector *full_mv,
|
36
38
|
HashSet *terms, TermVector *tv)
|
37
39
|
{
|
38
|
-
|
39
|
-
for (
|
40
|
-
char *term = (char *)
|
40
|
+
HashSetEntry *hse;
|
41
|
+
for (hse = terms->first; hse; hse = hse->next) {
|
42
|
+
char *term = (char *)hse->elem;
|
41
43
|
TVTerm *tv_term = tv_get_tv_term(tv, term);
|
42
44
|
if (tv_term) {
|
43
|
-
int
|
44
|
-
|
45
|
-
|
46
|
-
int pos = tv_term->positions[j];
|
45
|
+
int i, m_idx = 0;
|
46
|
+
for (i = 0; i < tv_term->freq; i++) {
|
47
|
+
int pos = tv_term->positions[i];
|
47
48
|
for (; m_idx < full_mv->size; m_idx++) {
|
48
49
|
if (pos <= full_mv->matches[m_idx].end) {
|
49
50
|
if (pos >= full_mv->matches[m_idx].start) {
|
@@ -134,29 +135,31 @@ static int tv_tde_doc_num(TermDocEnum *tde)
|
|
134
135
|
static TermDocEnum *spanq_ir_term_positions(IndexReader *ir)
|
135
136
|
{
|
136
137
|
TVTermDocEnum *tv_tde = ALLOC(TVTermDocEnum);
|
137
|
-
TermDocEnum *tde
|
138
|
-
tv_tde->tv
|
139
|
-
tde->seek
|
140
|
-
tde->doc_num
|
141
|
-
tde->freq
|
142
|
-
tde->next
|
143
|
-
tde->skip_to
|
144
|
-
tde->next_position
|
145
|
-
tde->close
|
146
|
-
|
138
|
+
TermDocEnum *tde = (TermDocEnum *)tv_tde;
|
139
|
+
tv_tde->tv = (TermVector *)ir->store;
|
140
|
+
tde->seek = &tv_tde_seek;
|
141
|
+
tde->doc_num = &tv_tde_doc_num;
|
142
|
+
tde->freq = &tv_tde_freq;
|
143
|
+
tde->next = &tv_tde_next;
|
144
|
+
tde->skip_to = &tv_tde_skip_to;
|
145
|
+
tde->next_position = &tv_tde_next_position;
|
146
|
+
tde->close = (void (*)(TermDocEnum *tde))&free;
|
147
|
+
|
147
148
|
return tde;
|
148
149
|
}
|
149
150
|
|
150
151
|
static MatchVector *spanq_get_matchv_i(Query *self, MatchVector *mv,
|
151
152
|
TermVector *tv)
|
152
153
|
{
|
153
|
-
if (
|
154
|
+
if (SpQ(self)->field == tv->field) {
|
154
155
|
SpanEnum *sp_enum;
|
155
156
|
IndexReader *ir = ALLOC(IndexReader);
|
156
157
|
MatchVector *full_mv = matchv_new();
|
157
158
|
HashSet *terms = SpQ(self)->get_terms(self);
|
158
|
-
|
159
|
-
|
159
|
+
/* FIXME What is going on here? Need to document this! */
|
160
|
+
ir->fis = fis_new(STORE_NO, INDEX_NO, TERM_VECTOR_NO);
|
161
|
+
fis_add_field(ir->fis,
|
162
|
+
fi_new(tv->field, STORE_NO, INDEX_NO, TERM_VECTOR_NO));
|
160
163
|
ir->store = (Store *)tv;
|
161
164
|
ir->term_positions = &spanq_ir_term_positions;
|
162
165
|
sp_enum = SpQ(self)->get_spans(self, ir);
|
@@ -166,7 +169,7 @@ static MatchVector *spanq_get_matchv_i(Query *self, MatchVector *mv,
|
|
166
169
|
sp_enum->end(sp_enum) - 1);
|
167
170
|
}
|
168
171
|
sp_enum->destroy(sp_enum);
|
169
|
-
|
172
|
+
|
170
173
|
fis_deref(ir->fis);
|
171
174
|
free(ir);
|
172
175
|
|
@@ -226,11 +229,11 @@ static bool spansc_next(Scorer *self)
|
|
226
229
|
spansc->freq = 0.0;
|
227
230
|
self->doc = se->doc(se);
|
228
231
|
|
229
|
-
|
232
|
+
do {
|
230
233
|
match_length = se->end(se) - se->start(se);
|
231
234
|
spansc->freq += sim_sloppy_freq(spansc->sim, match_length);
|
232
235
|
spansc->more = se->next(se);
|
233
|
-
}
|
236
|
+
} while (spansc->more && (self->doc == se->doc(se)));
|
234
237
|
|
235
238
|
return (spansc->more || (spansc->freq != 0.0));
|
236
239
|
}
|
@@ -241,7 +244,6 @@ static bool spansc_skip_to(Scorer *self, int target)
|
|
241
244
|
SpanEnum *se = spansc->spans;
|
242
245
|
|
243
246
|
spansc->more = se->skip_to(se, target);
|
244
|
-
|
245
247
|
if (!spansc->more) {
|
246
248
|
return false;
|
247
249
|
}
|
@@ -252,6 +254,9 @@ static bool spansc_skip_to(Scorer *self, int target)
|
|
252
254
|
while (spansc->more && (se->doc(se) == target)) {
|
253
255
|
spansc->freq += sim_sloppy_freq(spansc->sim, se->end(se) - se->start(se));
|
254
256
|
spansc->more = se->next(se);
|
257
|
+
if (spansc->first_time) {
|
258
|
+
spansc->first_time = false;
|
259
|
+
}
|
255
260
|
}
|
256
261
|
|
257
262
|
return (spansc->more || (spansc->freq != 0.0));
|
@@ -280,7 +285,7 @@ static void spansc_destroy(Scorer *self)
|
|
280
285
|
scorer_destroy_i(self);
|
281
286
|
}
|
282
287
|
|
283
|
-
Scorer *spansc_new(Weight *weight, IndexReader *ir)
|
288
|
+
static Scorer *spansc_new(Weight *weight, IndexReader *ir)
|
284
289
|
{
|
285
290
|
Scorer *self = NULL;
|
286
291
|
const int field_num = fis_get_field_num(ir->fis, SpQ(weight->query)->field);
|
@@ -349,9 +354,12 @@ static bool spante_skip_to(SpanEnum *self, int target)
|
|
349
354
|
TermDocEnum *tde = ste->positions;
|
350
355
|
|
351
356
|
/* are we already at the correct position? */
|
357
|
+
/* FIXME: perhaps this the the better solution but currently it ->skip_to
|
358
|
+
* does a ->next not matter what
|
352
359
|
if (ste->doc >= target) {
|
353
360
|
return true;
|
354
361
|
}
|
362
|
+
*/
|
355
363
|
|
356
364
|
if (! tde->skip_to(tde, target)) {
|
357
365
|
ste->doc = INT_MAX;
|
@@ -382,10 +390,9 @@ static int spante_end(SpanEnum *self)
|
|
382
390
|
return SpTEn(self)->position + 1;
|
383
391
|
}
|
384
392
|
|
385
|
-
static char *spante_to_s(SpanEnum *self)
|
393
|
+
static char *spante_to_s(SpanEnum *self)
|
386
394
|
{
|
387
|
-
char *
|
388
|
-
char *query_str = self->query->to_s(self->query, field);
|
395
|
+
char *query_str = self->query->to_s(self->query, NULL);
|
389
396
|
char pos_str[20];
|
390
397
|
size_t len = strlen(query_str);
|
391
398
|
int pos;
|
@@ -403,7 +410,7 @@ static char *spante_to_s(SpanEnum *self)
|
|
403
410
|
sprintf(pos_str, "%d", self->doc(self) - pos);
|
404
411
|
}
|
405
412
|
}
|
406
|
-
sprintf("SpanTermEnum(%s)@%s", query_str, pos_str);
|
413
|
+
sprintf(str, "SpanTermEnum(%s)@%s", query_str, pos_str);
|
407
414
|
free(query_str);
|
408
415
|
return str;
|
409
416
|
}
|
@@ -418,10 +425,10 @@ static void spante_destroy(SpanEnum *self)
|
|
418
425
|
static SpanEnum *spante_new(Query *query, IndexReader *ir)
|
419
426
|
{
|
420
427
|
char *term = SpTQ(query)->term;
|
421
|
-
|
422
|
-
SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanTermEnum));
|
428
|
+
SpanEnum *self = (SpanEnum *)ALLOC(SpanTermEnum);
|
423
429
|
|
424
|
-
SpTEn(self)->positions = ir_term_positions_for(ir, field,
|
430
|
+
SpTEn(self)->positions = ir_term_positions_for(ir, SpQ(query)->field,
|
431
|
+
term);
|
425
432
|
SpTEn(self)->position = -1;
|
426
433
|
SpTEn(self)->doc = -1;
|
427
434
|
SpTEn(self)->count = 0;
|
@@ -531,7 +538,7 @@ static bool spanmte_next(SpanEnum *self)
|
|
531
538
|
}
|
532
539
|
mte->tpew_pq = tpew_pq;
|
533
540
|
}
|
534
|
-
|
541
|
+
|
535
542
|
tpew = (TermPosEnumWrapper *)pq_top(tpew_pq);
|
536
543
|
if (tpew == NULL) {
|
537
544
|
return false;
|
@@ -612,8 +619,7 @@ static void spanmte_destroy(SpanEnum *self)
|
|
612
619
|
|
613
620
|
static SpanEnum *spanmte_new(Query *query, IndexReader *ir)
|
614
621
|
{
|
615
|
-
|
616
|
-
SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanMultiTermEnum));
|
622
|
+
SpanEnum *self = (SpanEnum *)ALLOC(SpanMultiTermEnum);
|
617
623
|
SpanMultiTermEnum *smte = SpMTEn(self);
|
618
624
|
SpanMultiTermQuery *smtq = SpMTQ(query);
|
619
625
|
int i;
|
@@ -622,7 +628,8 @@ static SpanEnum *spanmte_new(Query *query, IndexReader *ir)
|
|
622
628
|
smte->tpews = ALLOC_N(TermPosEnumWrapper *, smtq->term_cnt);
|
623
629
|
for (i = 0; i < smtq->term_cnt; i++) {
|
624
630
|
char *term = smtq->terms[i];
|
625
|
-
smte->tpews[i] = tpew_new(term,
|
631
|
+
smte->tpews[i] = tpew_new(term,
|
632
|
+
ir_term_positions_for(ir, SpQ(query)->field, term));
|
626
633
|
}
|
627
634
|
smte->tpew_cnt = smtq->term_cnt;
|
628
635
|
smte->tpew_pq = NULL;
|
@@ -681,7 +688,7 @@ static bool spanfe_skip_to(SpanEnum *self, int target)
|
|
681
688
|
return true;
|
682
689
|
}
|
683
690
|
|
684
|
-
return
|
691
|
+
return spanfe_next(self); /* scan to next match */
|
685
692
|
}
|
686
693
|
|
687
694
|
static int spanfe_doc(SpanEnum *self)
|
@@ -702,10 +709,9 @@ static int spanfe_end(SpanEnum *self)
|
|
702
709
|
return sub_enum->end(sub_enum);
|
703
710
|
}
|
704
711
|
|
705
|
-
static char *spanfe_to_s(SpanEnum *self)
|
712
|
+
static char *spanfe_to_s(SpanEnum *self)
|
706
713
|
{
|
707
|
-
char *
|
708
|
-
char *query_str = self->query->to_s(self->query, field);
|
714
|
+
char *query_str = self->query->to_s(self->query, NULL);
|
709
715
|
char *res = strfmt("SpanFirstEnum(%s)", query_str);
|
710
716
|
free(query_str);
|
711
717
|
return res;
|
@@ -720,7 +726,7 @@ static void spanfe_destroy(SpanEnum *self)
|
|
720
726
|
|
721
727
|
static SpanEnum *spanfe_new(Query *query, IndexReader *ir)
|
722
728
|
{
|
723
|
-
SpanEnum *self = (SpanEnum *)
|
729
|
+
SpanEnum *self = (SpanEnum *)ALLOC(SpanFirstEnum);
|
724
730
|
SpanFirstQuery *sfq = SpFQ(query);
|
725
731
|
|
726
732
|
SpFEn(self)->sub_enum = SpQ(sfq->match)->get_spans(sfq->match, ir);
|
@@ -822,7 +828,8 @@ static bool spanoe_skip_to(SpanEnum *self, int target)
|
|
822
828
|
}
|
823
829
|
else {
|
824
830
|
while ((soe->queue->size != 0) &&
|
825
|
-
((se = (SpanEnum *)pq_top(soe->queue))
|
831
|
+
((se = (SpanEnum *)pq_top(soe->queue)) != NULL) &&
|
832
|
+
(se->doc(se) < target)) {
|
826
833
|
if (se->skip_to(se, target)) {
|
827
834
|
pq_down(soe->queue);
|
828
835
|
}
|
@@ -855,11 +862,10 @@ static int spanoe_end(SpanEnum *self)
|
|
855
862
|
return se->end(se);
|
856
863
|
}
|
857
864
|
|
858
|
-
static char *spanoe_to_s(SpanEnum *self)
|
865
|
+
static char *spanoe_to_s(SpanEnum *self)
|
859
866
|
{
|
860
867
|
SpanOrEnum *soe = SpOEn(self);
|
861
|
-
char *
|
862
|
-
char *query_str = self->query->to_s(self->query, field);
|
868
|
+
char *query_str = self->query->to_s(self->query, NULL);
|
863
869
|
char doc_str[62];
|
864
870
|
size_t len = strlen(query_str);
|
865
871
|
char *str = ALLOC_N(char, len + 80);
|
@@ -876,7 +882,7 @@ static char *spanoe_to_s(SpanEnum *self)
|
|
876
882
|
self->start(self), self->end(self));
|
877
883
|
}
|
878
884
|
}
|
879
|
-
sprintf("SpanOrEnum(%s)@%s", query_str, doc_str);
|
885
|
+
sprintf(str, "SpanOrEnum(%s)@%s", query_str, doc_str);
|
880
886
|
free(query_str);
|
881
887
|
return str;
|
882
888
|
}
|
@@ -895,10 +901,10 @@ static void spanoe_destroy(SpanEnum *self)
|
|
895
901
|
free(self);
|
896
902
|
}
|
897
903
|
|
898
|
-
SpanEnum *spanoe_new(Query *query, IndexReader *ir)
|
904
|
+
static SpanEnum *spanoe_new(Query *query, IndexReader *ir)
|
899
905
|
{
|
900
906
|
Query *clause;
|
901
|
-
SpanEnum *self = (SpanEnum *)
|
907
|
+
SpanEnum *self = (SpanEnum *)ALLOC(SpanOrEnum);
|
902
908
|
SpanOrQuery *soq = SpOQ(query);
|
903
909
|
int i;
|
904
910
|
|
@@ -1168,11 +1174,10 @@ static int spanne_end(SpanEnum *self)
|
|
1168
1174
|
return SpNEn(self)->end;
|
1169
1175
|
}
|
1170
1176
|
|
1171
|
-
static char *spanne_to_s(SpanEnum *self)
|
1177
|
+
static char *spanne_to_s(SpanEnum *self)
|
1172
1178
|
{
|
1173
1179
|
SpanNearEnum *sne = SpNEn(self);
|
1174
|
-
char *
|
1175
|
-
char *query_str = self->query->to_s(self->query, field);
|
1180
|
+
char *query_str = self->query->to_s(self->query, NULL);
|
1176
1181
|
char doc_str[62];
|
1177
1182
|
size_t len = strlen(query_str);
|
1178
1183
|
char *str = ALLOC_N(char, len + 80);
|
@@ -1184,7 +1189,7 @@ static char *spanne_to_s(SpanEnum *self)
|
|
1184
1189
|
sprintf(doc_str, "%d:%d-%d", self->doc(self),
|
1185
1190
|
self->start(self), self->end(self));
|
1186
1191
|
}
|
1187
|
-
sprintf("SpanNearEnum(%s)@%s", query_str, doc_str);
|
1192
|
+
sprintf(str, "SpanNearEnum(%s)@%s", query_str, doc_str);
|
1188
1193
|
free(query_str);
|
1189
1194
|
return str;
|
1190
1195
|
}
|
@@ -1206,7 +1211,7 @@ static SpanEnum *spanne_new(Query *query, IndexReader *ir)
|
|
1206
1211
|
{
|
1207
1212
|
int i;
|
1208
1213
|
Query *clause;
|
1209
|
-
SpanEnum *self = (SpanEnum *)
|
1214
|
+
SpanEnum *self = (SpanEnum *)ALLOC(SpanNearEnum);
|
1210
1215
|
SpanNearQuery *snq = SpNQ(query);
|
1211
1216
|
|
1212
1217
|
SpNEn(self)->first_time = true;
|
@@ -1333,10 +1338,9 @@ static int spanxe_end(SpanEnum *self)
|
|
1333
1338
|
return inc->end(inc);
|
1334
1339
|
}
|
1335
1340
|
|
1336
|
-
static char *spanxe_to_s(SpanEnum *self)
|
1341
|
+
static char *spanxe_to_s(SpanEnum *self)
|
1337
1342
|
{
|
1338
|
-
char *
|
1339
|
-
char *query_str = self->query->to_s(self->query, field);
|
1343
|
+
char *query_str = self->query->to_s(self->query, NULL);
|
1340
1344
|
char *res = strfmt("SpanNotEnum(%s)", query_str);
|
1341
1345
|
free(query_str);
|
1342
1346
|
return res;
|
@@ -1352,7 +1356,7 @@ static void spanxe_destroy(SpanEnum *self)
|
|
1352
1356
|
|
1353
1357
|
static SpanEnum *spanxe_new(Query *query, IndexReader *ir)
|
1354
1358
|
{
|
1355
|
-
SpanEnum *self = (SpanEnum *)
|
1359
|
+
SpanEnum *self = (SpanEnum *)ALLOC(SpanNotEnum);
|
1356
1360
|
SpanNotEnum *sxe = SpXEn(self);
|
1357
1361
|
SpanNotQuery *sxq = SpXQ(query);
|
1358
1362
|
|
@@ -1399,27 +1403,26 @@ static Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
|
|
1399
1403
|
uchar *field_norms;
|
1400
1404
|
float field_norm;
|
1401
1405
|
Explanation *field_norm_expl;
|
1406
|
+
const char *field = S(SpQ(self->query)->field);
|
1402
1407
|
|
1403
1408
|
char *query_str;
|
1404
1409
|
HashSet *terms = SpW(self)->terms;
|
1405
|
-
|
1406
|
-
const int field_num = fis_get_field_num(ir->fis, field);
|
1410
|
+
const int field_num = fis_get_field_num(ir->fis, SpQ(self->query)->field);
|
1407
1411
|
char *doc_freqs = NULL;
|
1408
1412
|
size_t df_i = 0;
|
1409
|
-
|
1413
|
+
HashSetEntry *hse;
|
1410
1414
|
|
1411
1415
|
if (field_num < 0) {
|
1412
1416
|
return expl_new(0.0, "field \"%s\" does not exist in the index", field);
|
1413
1417
|
}
|
1414
1418
|
|
1415
|
-
query_str = self->query->to_s(self->query,
|
1419
|
+
query_str = self->query->to_s(self->query, NULL);
|
1416
1420
|
|
1417
|
-
for (
|
1418
|
-
char *term = (char *)
|
1421
|
+
for (hse = terms->first; hse; hse = hse->next) {
|
1422
|
+
char *term = (char *)hse->elem;
|
1419
1423
|
REALLOC_N(doc_freqs, char, df_i + strlen(term) + 23);
|
1420
|
-
sprintf(doc_freqs + df_i, "%s=%d, ", term,
|
1421
|
-
|
1422
|
-
df_i = strlen(doc_freqs);
|
1424
|
+
df_i += sprintf(doc_freqs + df_i, "%s=%d, ", term,
|
1425
|
+
ir->doc_freq(ir, field_num, term));
|
1423
1426
|
}
|
1424
1427
|
/* remove the ',' at the end of the string if it exists */
|
1425
1428
|
if (terms->size > 0) {
|
@@ -1468,8 +1471,8 @@ static Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
|
|
1468
1471
|
expl_add_detail(field_expl, idf_expl2);
|
1469
1472
|
|
1470
1473
|
field_norms = ir->get_norms(ir, field_num);
|
1471
|
-
field_norm = (field_norms
|
1472
|
-
? sim_decode_norm(self->similarity, field_norms[target])
|
1474
|
+
field_norm = (field_norms
|
1475
|
+
? sim_decode_norm(self->similarity, field_norms[target])
|
1473
1476
|
: (float)0.0);
|
1474
1477
|
field_norm_expl = expl_new(field_norm, "field_norm(field=%s, doc=%d)",
|
1475
1478
|
field, target);
|
@@ -1502,7 +1505,7 @@ static void spanw_destroy(Weight *self)
|
|
1502
1505
|
|
1503
1506
|
static Weight *spanw_new(Query *query, Searcher *searcher)
|
1504
1507
|
{
|
1505
|
-
|
1508
|
+
HashSetEntry *hse;
|
1506
1509
|
Weight *self = w_new(SpanWeight, query);
|
1507
1510
|
HashSet *terms = SpQ(query)->get_terms(query);
|
1508
1511
|
|
@@ -1515,10 +1518,10 @@ static Weight *spanw_new(Query *query, Searcher *searcher)
|
|
1515
1518
|
self->similarity = query->get_similarity(query, searcher);
|
1516
1519
|
|
1517
1520
|
self->idf = 0.0;
|
1518
|
-
|
1519
|
-
for (
|
1520
|
-
self->idf += sim_idf_term(self->similarity, SpQ(query)->field,
|
1521
|
-
(char *)
|
1521
|
+
|
1522
|
+
for (hse = terms->first; hse; hse = hse->next) {
|
1523
|
+
self->idf += sim_idf_term(self->similarity, SpQ(query)->field,
|
1524
|
+
(char *)hse->elem, searcher);
|
1522
1525
|
}
|
1523
1526
|
|
1524
1527
|
return self;
|
@@ -1528,20 +1531,19 @@ static Weight *spanw_new(Query *query, Searcher *searcher)
|
|
1528
1531
|
* SpanTermQuery
|
1529
1532
|
*****************************************************************************/
|
1530
1533
|
|
1531
|
-
static char *spantq_to_s(Query *self,
|
1534
|
+
static char *spantq_to_s(Query *self, Symbol default_field)
|
1532
1535
|
{
|
1533
|
-
if (
|
1536
|
+
if (default_field && default_field == SpQ(self)->field) {
|
1534
1537
|
return strfmt("span_terms(%s)", SpTQ(self)->term);
|
1535
1538
|
}
|
1536
1539
|
else {
|
1537
|
-
return strfmt("span_terms(%s:%s)", SpQ(self)->field, SpTQ(self)->term);
|
1540
|
+
return strfmt("span_terms(%s:%s)", S(SpQ(self)->field), SpTQ(self)->term);
|
1538
1541
|
}
|
1539
1542
|
}
|
1540
1543
|
|
1541
1544
|
static void spantq_destroy_i(Query *self)
|
1542
1545
|
{
|
1543
1546
|
free(SpTQ(self)->term);
|
1544
|
-
free(SpQ(self)->field);
|
1545
1547
|
spanq_destroy_i(self);
|
1546
1548
|
}
|
1547
1549
|
|
@@ -1567,12 +1569,12 @@ static int spantq_eq(Query *self, Query *o)
|
|
1567
1569
|
return spanq_eq(self, o) && strcmp(SpTQ(self)->term, SpTQ(o)->term) == 0;
|
1568
1570
|
}
|
1569
1571
|
|
1570
|
-
Query *spantq_new(
|
1572
|
+
Query *spantq_new(Symbol field, const char *term)
|
1571
1573
|
{
|
1572
1574
|
Query *self = q_new(SpanTermQuery);
|
1573
1575
|
|
1574
1576
|
SpTQ(self)->term = estrdup(term);
|
1575
|
-
SpQ(self)->field =
|
1577
|
+
SpQ(self)->field = field;
|
1576
1578
|
SpQ(self)->get_spans = &spante_new;
|
1577
1579
|
SpQ(self)->get_terms = &spantq_get_terms;
|
1578
1580
|
|
@@ -1591,10 +1593,10 @@ Query *spantq_new(const char *field, const char *term)
|
|
1591
1593
|
* SpanMultiTermQuery
|
1592
1594
|
*****************************************************************************/
|
1593
1595
|
|
1594
|
-
static char *spanmtq_to_s(Query *self,
|
1596
|
+
static char *spanmtq_to_s(Query *self, Symbol field)
|
1595
1597
|
{
|
1596
1598
|
char *terms = NULL, *p;
|
1597
|
-
int len =
|
1599
|
+
int len = 3, i;
|
1598
1600
|
SpanMultiTermQuery *smtq = SpMTQ(self);
|
1599
1601
|
for (i = 0; i < smtq->term_cnt; i++) {
|
1600
1602
|
len += strlen(smtq->terms[i]) + 2;
|
@@ -1602,11 +1604,10 @@ static char *spanmtq_to_s(Query *self, const char *field)
|
|
1602
1604
|
p = terms = ALLOC_N(char, len);
|
1603
1605
|
*(p++) = '[';
|
1604
1606
|
for (i = 0; i < smtq->term_cnt; i++) {
|
1607
|
+
if (i != 0) *(p++) = ',';
|
1605
1608
|
strcpy(p, smtq->terms[i]);
|
1606
1609
|
p += strlen(smtq->terms[i]);
|
1607
|
-
*(p++) = ',';
|
1608
1610
|
}
|
1609
|
-
if (p > terms) p--;
|
1610
1611
|
*(p++) = ']';
|
1611
1612
|
*p = '\0';
|
1612
1613
|
|
@@ -1614,7 +1615,7 @@ static char *spanmtq_to_s(Query *self, const char *field)
|
|
1614
1615
|
p = strfmt("span_terms(%s)", terms);
|
1615
1616
|
}
|
1616
1617
|
else {
|
1617
|
-
p = strfmt("span_terms(%s:%s)", SpQ(self)->field, terms);
|
1618
|
+
p = strfmt("span_terms(%s:%s)", S(SpQ(self)->field), terms);
|
1618
1619
|
}
|
1619
1620
|
free(terms);
|
1620
1621
|
return p;
|
@@ -1628,7 +1629,6 @@ static void spanmtq_destroy_i(Query *self)
|
|
1628
1629
|
free(smtq->terms[i]);
|
1629
1630
|
}
|
1630
1631
|
free(smtq->terms);
|
1631
|
-
free(SpQ(self)->field);
|
1632
1632
|
spanq_destroy_i(self);
|
1633
1633
|
}
|
1634
1634
|
|
@@ -1676,7 +1676,7 @@ static int spanmtq_eq(Query *self, Query *o)
|
|
1676
1676
|
return true;;
|
1677
1677
|
}
|
1678
1678
|
|
1679
|
-
Query *spanmtq_new_conf(
|
1679
|
+
Query *spanmtq_new_conf(Symbol field, int max_terms)
|
1680
1680
|
{
|
1681
1681
|
Query *self = q_new(SpanMultiTermQuery);
|
1682
1682
|
|
@@ -1684,7 +1684,7 @@ Query *spanmtq_new_conf(const char *field, int max_terms)
|
|
1684
1684
|
SpMTQ(self)->term_cnt = 0;
|
1685
1685
|
SpMTQ(self)->term_capa = max_terms;
|
1686
1686
|
|
1687
|
-
SpQ(self)->field =
|
1687
|
+
SpQ(self)->field = field;
|
1688
1688
|
SpQ(self)->get_spans = &spanmte_new;
|
1689
1689
|
SpQ(self)->get_terms = &spanmtq_get_terms;
|
1690
1690
|
|
@@ -1700,7 +1700,7 @@ Query *spanmtq_new_conf(const char *field, int max_terms)
|
|
1700
1700
|
return self;
|
1701
1701
|
}
|
1702
1702
|
|
1703
|
-
Query *spanmtq_new(
|
1703
|
+
Query *spanmtq_new(Symbol field)
|
1704
1704
|
{
|
1705
1705
|
return spanmtq_new_conf(field, SPAN_MULTI_TERM_QUERY_CAPA);
|
1706
1706
|
}
|
@@ -1719,7 +1719,7 @@ void spanmtq_add_term(Query *self, const char *term)
|
|
1719
1719
|
*
|
1720
1720
|
*****************************************************************************/
|
1721
1721
|
|
1722
|
-
static char *spanfq_to_s(Query *self,
|
1722
|
+
static char *spanfq_to_s(Query *self, Symbol field)
|
1723
1723
|
{
|
1724
1724
|
Query *match = SpFQ(self)->match;
|
1725
1725
|
char *q_str = match->to_s(match, field);
|
@@ -1755,7 +1755,6 @@ static Query *spanfq_rewrite(Query *self, IndexReader *ir)
|
|
1755
1755
|
static void spanfq_destroy_i(Query *self)
|
1756
1756
|
{
|
1757
1757
|
q_deref(SpFQ(self)->match);
|
1758
|
-
free(SpQ(self)->field);
|
1759
1758
|
spanq_destroy_i(self);
|
1760
1759
|
}
|
1761
1760
|
|
@@ -1780,7 +1779,7 @@ Query *spanfq_new_nr(Query *match, int end)
|
|
1780
1779
|
SpFQ(self)->match = match;
|
1781
1780
|
SpFQ(self)->end = end;
|
1782
1781
|
|
1783
|
-
SpQ(self)->field =
|
1782
|
+
SpQ(self)->field = SpQ(match)->field;
|
1784
1783
|
SpQ(self)->get_spans = &spanfe_new;
|
1785
1784
|
SpQ(self)->get_terms = &spanfq_get_terms;
|
1786
1785
|
|
@@ -1809,7 +1808,7 @@ Query *spanfq_new(Query *match, int end)
|
|
1809
1808
|
*
|
1810
1809
|
*****************************************************************************/
|
1811
1810
|
|
1812
|
-
static char *spanoq_to_s(Query *self,
|
1811
|
+
static char *spanoq_to_s(Query *self, Symbol field)
|
1813
1812
|
{
|
1814
1813
|
int i;
|
1815
1814
|
SpanOrQuery *soq = SpOQ(self);
|
@@ -1823,16 +1822,16 @@ static char *spanoq_to_s(Query *self, const char *field)
|
|
1823
1822
|
}
|
1824
1823
|
|
1825
1824
|
res_p = res = ALLOC_N(char, len);
|
1826
|
-
sprintf(res_p, "span_or[
|
1827
|
-
res_p += strlen(res_p);
|
1825
|
+
res_p += sprintf(res_p, "span_or[");
|
1828
1826
|
for (i = 0; i < soq->c_cnt; i++) {
|
1829
|
-
|
1827
|
+
if (i != 0) *(res_p++) = ',';
|
1828
|
+
res_p += sprintf(res_p, "%s", q_strs[i]);
|
1830
1829
|
free(q_strs[i]);
|
1831
|
-
res_p += strlen(res_p);
|
1832
1830
|
}
|
1833
1831
|
free(q_strs);
|
1834
1832
|
|
1835
|
-
|
1833
|
+
*(res_p)++ = ']';
|
1834
|
+
*res_p = 0;
|
1836
1835
|
return res;
|
1837
1836
|
}
|
1838
1837
|
|
@@ -1898,7 +1897,6 @@ static void spanoq_destroy_i(Query *self)
|
|
1898
1897
|
q_deref(clause);
|
1899
1898
|
}
|
1900
1899
|
free(soq->clauses);
|
1901
|
-
free(SpQ(self)->field);
|
1902
1900
|
|
1903
1901
|
spanq_destroy_i(self);
|
1904
1902
|
}
|
@@ -1942,7 +1940,7 @@ Query *spanoq_new()
|
|
1942
1940
|
SpOQ(self)->clauses = ALLOC_N(Query *, CLAUSE_INIT_CAPA);
|
1943
1941
|
SpOQ(self)->c_capa = CLAUSE_INIT_CAPA;
|
1944
1942
|
|
1945
|
-
SpQ(self)->field =
|
1943
|
+
SpQ(self)->field = NULL;
|
1946
1944
|
SpQ(self)->get_spans = &spanoq_get_spans;
|
1947
1945
|
SpQ(self)->get_terms = &spanoq_get_terms;
|
1948
1946
|
|
@@ -1967,13 +1965,12 @@ Query *spanoq_add_clause_nr(Query *self, Query *clause)
|
|
1967
1965
|
"SpanQuery.", q_get_query_name(clause->type));
|
1968
1966
|
}
|
1969
1967
|
if (curr_index == 0) {
|
1970
|
-
|
1971
|
-
SpQ(self)->field = estrdup(SpQ(clause)->field);
|
1968
|
+
SpQ(self)->field = SpQ(clause)->field;
|
1972
1969
|
}
|
1973
|
-
else if (
|
1970
|
+
else if (SpQ(self)->field != SpQ(clause)->field) {
|
1974
1971
|
RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
|
1975
1972
|
"Attempted to add a SpanQuery with field \"%s\" to a SpanOrQuery "
|
1976
|
-
"with field \"%s\"", SpQ(clause)->field, SpQ(self)->field);
|
1973
|
+
"with field \"%s\"", S(SpQ(clause)->field), S(SpQ(self)->field));
|
1977
1974
|
}
|
1978
1975
|
if (curr_index >= SpOQ(self)->c_capa) {
|
1979
1976
|
SpOQ(self)->c_capa <<= 1;
|
@@ -1995,7 +1992,7 @@ Query *spanoq_add_clause(Query *self, Query *clause)
|
|
1995
1992
|
*
|
1996
1993
|
*****************************************************************************/
|
1997
1994
|
|
1998
|
-
static char *spannq_to_s(Query *self,
|
1995
|
+
static char *spannq_to_s(Query *self, Symbol field)
|
1999
1996
|
{
|
2000
1997
|
int i;
|
2001
1998
|
SpanNearQuery *snq = SpNQ(self);
|
@@ -2009,16 +2006,16 @@ static char *spannq_to_s(Query *self, const char *field)
|
|
2009
2006
|
}
|
2010
2007
|
|
2011
2008
|
res_p = res = ALLOC_N(char, len);
|
2012
|
-
sprintf(res_p, "span_near[
|
2013
|
-
res_p += strlen(res_p);
|
2009
|
+
res_p += sprintf(res_p, "span_near[");
|
2014
2010
|
for (i = 0; i < snq->c_cnt; i++) {
|
2015
|
-
|
2011
|
+
if (i != 0) *(res_p)++ = ',';
|
2012
|
+
res_p += sprintf(res_p, "%s", q_strs[i]);
|
2016
2013
|
free(q_strs[i]);
|
2017
|
-
res_p += strlen(res_p);
|
2018
2014
|
}
|
2019
2015
|
free(q_strs);
|
2020
2016
|
|
2021
|
-
|
2017
|
+
*(res_p++) = ']';
|
2018
|
+
*res_p = 0;
|
2022
2019
|
return res;
|
2023
2020
|
}
|
2024
2021
|
|
@@ -2083,7 +2080,6 @@ static void spannq_destroy(Query *self)
|
|
2083
2080
|
q_deref(clause);
|
2084
2081
|
}
|
2085
2082
|
free(snq->clauses);
|
2086
|
-
free(SpQ(self)->field);
|
2087
2083
|
|
2088
2084
|
spanq_destroy_i(self);
|
2089
2085
|
}
|
@@ -2136,7 +2132,7 @@ Query *spannq_new(int slop, bool in_order)
|
|
2136
2132
|
|
2137
2133
|
SpQ(self)->get_spans = &spannq_get_spans;
|
2138
2134
|
SpQ(self)->get_terms = &spannq_get_terms;
|
2139
|
-
SpQ(self)->field =
|
2135
|
+
SpQ(self)->field = NULL;
|
2140
2136
|
|
2141
2137
|
self->type = SPAN_NEAR_QUERY;
|
2142
2138
|
self->rewrite = &spannq_rewrite;
|
@@ -2159,13 +2155,12 @@ Query *spannq_add_clause_nr(Query *self, Query *clause)
|
|
2159
2155
|
"SpanQuery.", q_get_query_name(clause->type));
|
2160
2156
|
}
|
2161
2157
|
if (curr_index == 0) {
|
2162
|
-
|
2163
|
-
SpQ(self)->field = estrdup(SpQ(clause)->field);
|
2158
|
+
SpQ(self)->field = SpQ(clause)->field;
|
2164
2159
|
}
|
2165
|
-
else if (
|
2160
|
+
else if (SpQ(self)->field != SpQ(clause)->field) {
|
2166
2161
|
RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
|
2167
2162
|
"Attempted to add a SpanQuery with field \"%s\" to SpanNearQuery "
|
2168
|
-
"with field \"%s\"", SpQ(clause)->field, SpQ(self)->field);
|
2163
|
+
"with field \"%s\"", S(SpQ(clause)->field), S(SpQ(self)->field));
|
2169
2164
|
}
|
2170
2165
|
if (curr_index >= SpNQ(self)->c_capa) {
|
2171
2166
|
SpNQ(self)->c_capa <<= 1;
|
@@ -2187,7 +2182,7 @@ Query *spannq_add_clause(Query *self, Query *clause)
|
|
2187
2182
|
*
|
2188
2183
|
*****************************************************************************/
|
2189
2184
|
|
2190
|
-
static char *spanxq_to_s(Query *self,
|
2185
|
+
static char *spanxq_to_s(Query *self, Symbol field)
|
2191
2186
|
{
|
2192
2187
|
SpanNotQuery *sxq = SpXQ(self);
|
2193
2188
|
char *inc_s = sxq->inc->to_s(sxq->inc, field);
|
@@ -2237,8 +2232,6 @@ static void spanxq_destroy(Query *self)
|
|
2237
2232
|
q_deref(sxq->inc);
|
2238
2233
|
q_deref(sxq->exc);
|
2239
2234
|
|
2240
|
-
free(SpQ(self)->field);
|
2241
|
-
|
2242
2235
|
spanq_destroy_i(self);
|
2243
2236
|
}
|
2244
2237
|
|
@@ -2261,18 +2254,18 @@ static int spanxq_eq(Query *self, Query *o)
|
|
2261
2254
|
Query *spanxq_new_nr(Query *inc, Query *exc)
|
2262
2255
|
{
|
2263
2256
|
Query *self;
|
2264
|
-
if (
|
2257
|
+
if (SpQ(inc)->field != SpQ(inc)->field) {
|
2265
2258
|
RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
|
2266
2259
|
"Attempted to add a SpanQuery with field \"%s\" along with a "
|
2267
2260
|
"SpanQuery with field \"%s\" to an SpanNotQuery",
|
2268
|
-
SpQ(inc)->field, SpQ(exc)->field);
|
2261
|
+
S(SpQ(inc)->field), S(SpQ(exc)->field));
|
2269
2262
|
}
|
2270
2263
|
self = q_new(SpanNotQuery);
|
2271
2264
|
|
2272
2265
|
SpXQ(self)->inc = inc;
|
2273
2266
|
SpXQ(self)->exc = exc;
|
2274
2267
|
|
2275
|
-
SpQ(self)->field =
|
2268
|
+
SpQ(self)->field = SpQ(inc)->field;
|
2276
2269
|
SpQ(self)->get_spans = &spanxe_new;
|
2277
2270
|
SpQ(self)->get_terms = &spanxq_get_terms;
|
2278
2271
|
|
@@ -2311,23 +2304,21 @@ Query *spanxq_new(Query *inc, Query *exc)
|
|
2311
2304
|
|
2312
2305
|
#define SpPfxQ(query) ((SpanPrefixQuery *)(query))
|
2313
2306
|
|
2314
|
-
static char *spanprq_to_s(Query *self,
|
2307
|
+
static char *spanprq_to_s(Query *self, Symbol default_field)
|
2315
2308
|
{
|
2316
2309
|
char *buffer, *bptr;
|
2317
2310
|
const char *prefix = SpPfxQ(self)->prefix;
|
2318
|
-
|
2311
|
+
Symbol field = SpQ(self)->field;
|
2319
2312
|
size_t plen = strlen(prefix);
|
2320
|
-
size_t flen =
|
2313
|
+
size_t flen = sym_len(field);
|
2321
2314
|
|
2322
2315
|
bptr = buffer = ALLOC_N(char, plen + flen + 35);
|
2323
2316
|
|
2324
|
-
if (
|
2325
|
-
sprintf(bptr, "%s:", field);
|
2326
|
-
bptr += flen + 1;
|
2317
|
+
if (default_field == NULL || (field != default_field)) {
|
2318
|
+
bptr += sprintf(bptr, "%s:", S(field));
|
2327
2319
|
}
|
2328
2320
|
|
2329
|
-
sprintf(bptr, "%s*", prefix);
|
2330
|
-
bptr += plen + 1;
|
2321
|
+
bptr += sprintf(bptr, "%s*", prefix);
|
2331
2322
|
if (self->boost != 1.0) {
|
2332
2323
|
*bptr = '^';
|
2333
2324
|
dbl_to_s(++bptr, self->boost);
|
@@ -2338,9 +2329,8 @@ static char *spanprq_to_s(Query *self, const char *current_field)
|
|
2338
2329
|
|
2339
2330
|
static Query *spanprq_rewrite(Query *self, IndexReader *ir)
|
2340
2331
|
{
|
2341
|
-
const
|
2342
|
-
|
2343
|
-
Query *volatile q = spanmtq_new_conf(field, SpPfxQ(self)->max_terms);
|
2332
|
+
const int field_num = fis_get_field_num(ir->fis, SpQ(self)->field);
|
2333
|
+
Query *volatile q = spanmtq_new_conf(SpQ(self)->field, SpPfxQ(self)->max_terms);
|
2344
2334
|
q->boost = self->boost; /* set the boost */
|
2345
2335
|
|
2346
2336
|
if (field_num >= 0) {
|
@@ -2350,7 +2340,7 @@ static Query *spanprq_rewrite(Query *self, IndexReader *ir)
|
|
2350
2340
|
size_t prefix_len = strlen(prefix);
|
2351
2341
|
|
2352
2342
|
TRY
|
2353
|
-
do {
|
2343
|
+
do {
|
2354
2344
|
if (strncmp(term, prefix, prefix_len) != 0) {
|
2355
2345
|
break;
|
2356
2346
|
}
|
@@ -2366,27 +2356,26 @@ static Query *spanprq_rewrite(Query *self, IndexReader *ir)
|
|
2366
2356
|
|
2367
2357
|
static void spanprq_destroy(Query *self)
|
2368
2358
|
{
|
2369
|
-
free(SpQ(self)->field);
|
2370
2359
|
free(SpPfxQ(self)->prefix);
|
2371
2360
|
spanq_destroy_i(self);
|
2372
2361
|
}
|
2373
2362
|
|
2374
2363
|
static unsigned long spanprq_hash(Query *self)
|
2375
2364
|
{
|
2376
|
-
return
|
2365
|
+
return sym_hash(SpQ(self)->field) ^ str_hash(SpPfxQ(self)->prefix);
|
2377
2366
|
}
|
2378
2367
|
|
2379
2368
|
static int spanprq_eq(Query *self, Query *o)
|
2380
2369
|
{
|
2381
|
-
return (strcmp(SpPfxQ(self)->prefix, SpPfxQ(o)->prefix) == 0)
|
2382
|
-
&& (
|
2370
|
+
return (strcmp(SpPfxQ(self)->prefix, SpPfxQ(o)->prefix) == 0)
|
2371
|
+
&& (SpQ(self)->field == SpQ(o)->field);
|
2383
2372
|
}
|
2384
2373
|
|
2385
|
-
Query *spanprq_new(
|
2374
|
+
Query *spanprq_new(Symbol field, const char *prefix)
|
2386
2375
|
{
|
2387
2376
|
Query *self = q_new(SpanPrefixQuery);
|
2388
2377
|
|
2389
|
-
SpQ(self)->field =
|
2378
|
+
SpQ(self)->field = field;
|
2390
2379
|
SpPfxQ(self)->prefix = estrdup(prefix);
|
2391
2380
|
SpPfxQ(self)->max_terms = SPAN_PREFIX_QUERY_MAX_TERMS;
|
2392
2381
|
|