ferret 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +6 -5
- data/Rakefile +34 -13
- data/TODO +1 -0
- data/TUTORIAL +1 -1
- data/ext/analysis.c +87 -70
- data/ext/analysis.h +18 -6
- data/ext/array.c +1 -2
- data/ext/array.h +1 -1
- data/ext/bitvector.c +10 -6
- data/ext/bitvector.h +2 -2
- data/ext/compound_io.c +30 -27
- data/ext/document.c +15 -15
- data/ext/document.h +5 -5
- data/ext/except.c +2 -0
- data/ext/except.h +25 -23
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +10 -8
- data/ext/ferret.h +9 -8
- data/ext/field.c +29 -25
- data/ext/filter.c +52 -14
- data/ext/frtio.h +13 -0
- data/ext/fs_store.c +115 -170
- data/ext/global.c +9 -8
- data/ext/global.h +17 -13
- data/ext/hash.c +13 -19
- data/ext/hash.h +11 -11
- data/ext/hashset.c +5 -7
- data/ext/hashset.h +9 -8
- data/ext/helper.c +1 -1
- data/ext/helper.h +2 -1
- data/ext/inc/except.h +25 -23
- data/ext/inc/lang.h +11 -1
- data/ext/ind.c +33 -21
- data/ext/index.h +44 -39
- data/ext/index_io.c +61 -57
- data/ext/index_rw.c +418 -361
- data/ext/lang.c +10 -0
- data/ext/lang.h +11 -1
- data/ext/nix_io.c +135 -0
- data/ext/priorityqueue.c +16 -16
- data/ext/priorityqueue.h +9 -6
- data/ext/q_boolean.c +128 -76
- data/ext/q_const_score.c +20 -20
- data/ext/q_filtered_query.c +20 -20
- data/ext/q_fuzzy.c +37 -23
- data/ext/q_match_all.c +15 -19
- data/ext/q_multi_phrase.c +87 -46
- data/ext/q_parser.c +247 -119
- data/ext/q_phrase.c +86 -52
- data/ext/q_prefix.c +25 -14
- data/ext/q_range.c +59 -14
- data/ext/q_span.c +263 -172
- data/ext/q_term.c +62 -51
- data/ext/q_wildcard.c +24 -13
- data/ext/r_analysis.c +328 -80
- data/ext/r_doc.c +11 -6
- data/ext/r_index_io.c +40 -32
- data/ext/r_qparser.c +15 -14
- data/ext/r_search.c +270 -152
- data/ext/r_store.c +32 -17
- data/ext/ram_store.c +38 -22
- data/ext/search.c +617 -87
- data/ext/search.h +227 -163
- data/ext/similarity.c +54 -45
- data/ext/similarity.h +3 -3
- data/ext/sort.c +132 -53
- data/ext/store.c +21 -2
- data/ext/store.h +14 -14
- data/ext/tags +4322 -232
- data/ext/term.c +140 -109
- data/ext/termdocs.c +74 -60
- data/ext/vector.c +181 -152
- data/ext/w32_io.c +150 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
- data/lib/ferret/document/field.rb +1 -1
- data/lib/ferret/index/field_infos.rb +1 -1
- data/lib/ferret/index/term.rb +1 -1
- data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
- data/lib/ferret/search.rb +1 -0
- data/lib/ferret/search/boolean_query.rb +0 -4
- data/lib/ferret/search/index_searcher.rb +21 -8
- data/lib/ferret/search/multi_phrase_query.rb +7 -0
- data/lib/ferret/search/multi_searcher.rb +261 -0
- data/lib/ferret/search/phrase_query.rb +1 -1
- data/lib/ferret/search/query.rb +34 -5
- data/lib/ferret/search/sort.rb +7 -3
- data/lib/ferret/search/sort_field.rb +8 -4
- data/lib/ferret/store/fs_store.rb +13 -6
- data/lib/ferret/store/index_io.rb +0 -14
- data/lib/ferret/store/ram_store.rb +3 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/analysis/ctc_analyzer.rb +131 -0
- data/test/unit/analysis/ctc_tokenstream.rb +98 -9
- data/test/unit/index/tc_index.rb +40 -1
- data/test/unit/index/tc_term.rb +7 -0
- data/test/unit/index/th_doc.rb +8 -0
- data/test/unit/query_parser/tc_query_parser.rb +6 -4
- data/test/unit/search/rtc_sort_field.rb +6 -6
- data/test/unit/search/tc_index_searcher.rb +8 -0
- data/test/unit/search/tc_multi_searcher.rb +275 -0
- data/test/unit/search/tc_multi_searcher2.rb +126 -0
- data/test/unit/search/tc_search_and_sort.rb +66 -0
- metadata +31 -26
- data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/q_span.c
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
#include <string.h>
|
2
2
|
#include "search.h"
|
3
3
|
|
4
|
-
|
5
4
|
/*****************************************************************************
|
6
5
|
*
|
7
6
|
* NearSpanEnum
|
@@ -16,11 +15,24 @@
|
|
16
15
|
|
17
16
|
Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
|
18
17
|
{
|
18
|
+
Explanation *expl;
|
19
|
+
Explanation *idf_expl1;
|
20
|
+
Explanation *idf_expl2;
|
21
|
+
Explanation *query_expl;
|
22
|
+
Explanation *qnorm_expl;
|
23
|
+
Explanation *field_expl;
|
24
|
+
Explanation *tf_expl;
|
25
|
+
Scorer *scorer;
|
26
|
+
uchar *field_norms;
|
27
|
+
float field_norm;
|
28
|
+
Explanation *field_norm_expl;
|
29
|
+
|
19
30
|
char *query_str = self->query->to_s(self->query, "");
|
20
|
-
|
31
|
+
HashSet *terms = (HashSet *)self->data;
|
21
32
|
char *field = ((SpanQuery *)self->query->data)->field;
|
22
33
|
char *doc_freqs = NULL;
|
23
|
-
|
34
|
+
size_t df_i = 0;
|
35
|
+
int i;
|
24
36
|
Term *t;
|
25
37
|
|
26
38
|
|
@@ -38,19 +50,21 @@ Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
|
|
38
50
|
doc_freqs = "";
|
39
51
|
}
|
40
52
|
|
41
|
-
|
53
|
+
expl = expl_create(0.0,
|
42
54
|
strfmt("weight(%s in %d), product of:", query_str, target));
|
43
55
|
|
44
56
|
/* We need two of these as it's included in both the query explanation
|
45
57
|
* and the field explanation */
|
46
|
-
|
58
|
+
idf_expl1 = expl_create(self->idf,
|
47
59
|
strfmt("idf(%s: %s)", field, doc_freqs));
|
48
|
-
|
60
|
+
idf_expl2 = expl_create(self->idf,
|
49
61
|
strfmt("idf(%s: %s)", field, doc_freqs));
|
50
|
-
if (terms->size > 0)
|
62
|
+
if (terms->size > 0) {
|
63
|
+
free(doc_freqs); /* only free if allocated */
|
64
|
+
}
|
51
65
|
|
52
66
|
/* explain query weight */
|
53
|
-
|
67
|
+
query_expl = expl_create(0.0,
|
54
68
|
strfmt("query_weight(%s), product of:", query_str));
|
55
69
|
|
56
70
|
if (self->query->boost != 1.0) {
|
@@ -59,7 +73,7 @@ Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
|
|
59
73
|
|
60
74
|
expl_add_detail(query_expl, idf_expl1);
|
61
75
|
|
62
|
-
|
76
|
+
qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
|
63
77
|
expl_add_detail(query_expl, qnorm_expl);
|
64
78
|
|
65
79
|
query_expl->value = self->query->boost * idf_expl1->value * qnorm_expl->value;
|
@@ -67,19 +81,21 @@ Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
|
|
67
81
|
expl_add_detail(expl, query_expl);
|
68
82
|
|
69
83
|
/* explain field weight */
|
70
|
-
|
84
|
+
field_expl = expl_create(0.0,
|
71
85
|
strfmt("field_weight(%s:%s in %d), product of:", field, query_str, target));
|
72
86
|
free(query_str);
|
73
87
|
|
74
|
-
|
75
|
-
|
88
|
+
scorer = self->scorer(self, ir);
|
89
|
+
tf_expl = scorer->explain(scorer, target);
|
76
90
|
scorer->destroy(scorer);
|
77
91
|
expl_add_detail(field_expl, tf_expl);
|
78
92
|
expl_add_detail(field_expl, idf_expl2);
|
79
93
|
|
80
|
-
|
81
|
-
|
82
|
-
|
94
|
+
field_norms = ir->get_norms(ir, field);
|
95
|
+
field_norm = (field_norms
|
96
|
+
? sim_decode_norm(self->similarity, field_norms[target])
|
97
|
+
: (float)0.0);
|
98
|
+
field_norm_expl = expl_create(field_norm,
|
83
99
|
strfmt("field_norm(field=%s, doc=%d)", field, target));
|
84
100
|
expl_add_detail(field_expl, field_norm_expl);
|
85
101
|
|
@@ -101,22 +117,19 @@ char *spanw_to_s(Weight *self)
|
|
101
117
|
return strfmt("SpanWeight(%f)", self->value);
|
102
118
|
}
|
103
119
|
|
104
|
-
void spanw_destroy(
|
120
|
+
void spanw_destroy(Weight *self)
|
105
121
|
{
|
106
|
-
|
107
|
-
|
108
|
-
free(p);
|
122
|
+
hs_destroy_all(self->data);
|
123
|
+
w_destroy(self);
|
109
124
|
}
|
110
125
|
|
111
126
|
Weight *spanw_create(Query *query, Searcher *searcher)
|
112
127
|
{
|
113
|
-
Weight *self =
|
128
|
+
Weight *self = w_create(query);
|
114
129
|
SpanQuery *spanq = (SpanQuery *)query->data;
|
115
|
-
|
116
|
-
|
117
|
-
self->
|
118
|
-
self->get_value = &w_get_value;
|
119
|
-
self->normalize = &w_normalize;
|
130
|
+
HashSet *terms = spanq->get_terms(query);
|
131
|
+
|
132
|
+
self->data = terms;
|
120
133
|
self->scorer = &spansc_create;
|
121
134
|
self->explain = &spanw_explain;
|
122
135
|
self->to_s = &spanw_to_s;
|
@@ -125,10 +138,8 @@ Weight *spanw_create(Query *query, Searcher *searcher)
|
|
125
138
|
|
126
139
|
self->similarity = query->get_similarity(query, searcher);
|
127
140
|
|
128
|
-
self->idf = sim_idf_phrase(self->similarity, (Term **)terms->elems,
|
129
|
-
|
130
|
-
self->value = 0.0;
|
131
|
-
self->data = terms;
|
141
|
+
self->idf = sim_idf_phrase(self->similarity, (Term **)terms->elems,
|
142
|
+
terms->size, searcher);
|
132
143
|
|
133
144
|
return self;
|
134
145
|
}
|
@@ -204,7 +215,8 @@ char *spante_to_s(SpanEnum *self)
|
|
204
215
|
char *field = ((SpanQuery *)self->query->data)->field;
|
205
216
|
char *query_str = self->query->to_s(self->query, field);
|
206
217
|
char pos_str[20];
|
207
|
-
|
218
|
+
size_t len = strlen(query_str);
|
219
|
+
int pos;
|
208
220
|
char *str = ALLOC_N(char, len + 40);
|
209
221
|
|
210
222
|
if (self->doc(self) < 0) {
|
@@ -222,9 +234,8 @@ char *spante_to_s(SpanEnum *self)
|
|
222
234
|
return str;
|
223
235
|
}
|
224
236
|
|
225
|
-
void spante_destroy(
|
237
|
+
void spante_destroy(SpanEnum *self)
|
226
238
|
{
|
227
|
-
SpanEnum *self = (SpanEnum *)p;
|
228
239
|
SpanTermEnum *ste = (SpanTermEnum *)self->data;
|
229
240
|
TermDocEnum *tde = ste->positions;
|
230
241
|
tde->close(tde);
|
@@ -315,9 +326,8 @@ char *spanfe_to_s(SpanEnum *self)
|
|
315
326
|
return res;
|
316
327
|
}
|
317
328
|
|
318
|
-
void spanfe_destroy(
|
329
|
+
void spanfe_destroy(SpanEnum *self)
|
319
330
|
{
|
320
|
-
SpanEnum *self = (SpanEnum *)p;
|
321
331
|
SpanEnum *se = (SpanEnum *)self->data;
|
322
332
|
se->destroy(se);
|
323
333
|
free(self);
|
@@ -449,7 +459,7 @@ char *spanoe_to_s(SpanEnum *self)
|
|
449
459
|
char *field = ((SpanQuery *)self->query->data)->field;
|
450
460
|
char *query_str = self->query->to_s(self->query, field);
|
451
461
|
char doc_str[62];
|
452
|
-
|
462
|
+
size_t len = strlen(query_str);
|
453
463
|
char *str = ALLOC_N(char, len + 80);
|
454
464
|
|
455
465
|
if (soe->first_time) {
|
@@ -467,9 +477,9 @@ char *spanoe_to_s(SpanEnum *self)
|
|
467
477
|
return str;
|
468
478
|
}
|
469
479
|
|
470
|
-
void spanoe_destroy(
|
480
|
+
void spanoe_destroy(SpanEnum *self)
|
471
481
|
{
|
472
|
-
SpanEnum *
|
482
|
+
SpanEnum *se;
|
473
483
|
SpanOrEnum *soe = (SpanOrEnum *)self->data;
|
474
484
|
int i;
|
475
485
|
pq_destroy(soe->queue);
|
@@ -730,7 +740,7 @@ char *spanne_to_s(SpanEnum *self)
|
|
730
740
|
char *field = ((SpanQuery *)self->query->data)->field;
|
731
741
|
char *query_str = self->query->to_s(self->query, field);
|
732
742
|
char doc_str[62];
|
733
|
-
|
743
|
+
size_t len = strlen(query_str);
|
734
744
|
char *str = ALLOC_N(char, len + 80);
|
735
745
|
|
736
746
|
if (sne->first_time) {
|
@@ -744,9 +754,9 @@ char *spanne_to_s(SpanEnum *self)
|
|
744
754
|
return str;
|
745
755
|
}
|
746
756
|
|
747
|
-
void spanne_destroy(
|
757
|
+
void spanne_destroy(SpanEnum *self)
|
748
758
|
{
|
749
|
-
SpanEnum *
|
759
|
+
SpanEnum *se;
|
750
760
|
SpanNearEnum *sne = (SpanNearEnum *)self->data;
|
751
761
|
int i;
|
752
762
|
for (i = 0; i < sne->s_cnt; i++) {
|
@@ -886,9 +896,8 @@ char *spanxe_to_s(SpanEnum *self)
|
|
886
896
|
return res;
|
887
897
|
}
|
888
898
|
|
889
|
-
void spanxe_destroy(
|
899
|
+
void spanxe_destroy(SpanEnum *self)
|
890
900
|
{
|
891
|
-
SpanEnum *self = (SpanEnum *)p;
|
892
901
|
SpanNotEnum *sxe = (SpanNotEnum *)self->data;
|
893
902
|
sxe->inc->destroy(sxe->inc);
|
894
903
|
sxe->exc->destroy(sxe->exc);
|
@@ -924,12 +933,11 @@ SpanEnum *spanxe_create(Query *query, IndexReader *ir)
|
|
924
933
|
*
|
925
934
|
*****************************************************************************/
|
926
935
|
|
927
|
-
void spanq_destroy(
|
936
|
+
void spanq_destroy(Query *self)
|
928
937
|
{
|
929
|
-
Query *self = (Query *)p;
|
930
938
|
SpanQuery *sq = (SpanQuery *)self->data;
|
931
939
|
free(sq);
|
932
|
-
|
940
|
+
q_destroy_i(self);
|
933
941
|
}
|
934
942
|
|
935
943
|
/*****************************************************************************
|
@@ -952,48 +960,60 @@ char *spantq_to_s(Query *self, char *field)
|
|
952
960
|
return res;
|
953
961
|
}
|
954
962
|
|
955
|
-
void spantq_destroy(
|
963
|
+
static void spantq_destroy(Query *self)
|
956
964
|
{
|
957
|
-
Query *self = (Query *)p;
|
958
965
|
SpanQuery *sq = (SpanQuery *)self->data;
|
959
966
|
if (self->destroy_all) {
|
960
967
|
Term *term = (Term *)sq->data;
|
961
968
|
term_destroy(term);
|
962
969
|
}
|
963
970
|
free(sq);
|
964
|
-
|
971
|
+
q_destroy_i(self);
|
965
972
|
}
|
966
973
|
|
967
|
-
void spantq_extract_terms(Query *self,
|
974
|
+
static void spantq_extract_terms(Query *self, HashSet *terms)
|
968
975
|
{
|
969
976
|
Term *term = (Term *)((SpanQuery *)self->data)->data;
|
970
|
-
|
977
|
+
hs_add(terms, term_clone(term));
|
971
978
|
}
|
972
979
|
|
973
|
-
|
980
|
+
static HashSet *spantq_get_terms(Query *self)
|
974
981
|
{
|
975
982
|
Term *term = (Term *)((SpanQuery *)self->data)->data;
|
976
|
-
|
977
|
-
|
983
|
+
HashSet *terms = term_set_create();
|
984
|
+
hs_add(terms, term_clone(term));
|
978
985
|
return terms;
|
979
986
|
}
|
980
987
|
|
988
|
+
static uint spantq_hash(Query *self)
|
989
|
+
{
|
990
|
+
return term_hash((Term *)((SpanQuery *)self->data)->data);
|
991
|
+
}
|
992
|
+
|
993
|
+
static int spantq_eq(Query *self, Query *o)
|
994
|
+
{
|
995
|
+
return term_eq((Term *)((SpanQuery *)self->data)->data,
|
996
|
+
(Term *)((SpanQuery *)o->data)->data);
|
997
|
+
}
|
998
|
+
|
981
999
|
Query *spantq_create(Term *term)
|
982
1000
|
{
|
983
1001
|
Query *self = q_create();
|
1002
|
+
|
984
1003
|
SpanQuery *sq = ALLOC(SpanQuery);
|
985
1004
|
sq->data = term;
|
986
|
-
|
987
1005
|
sq->get_spans = &spante_create;
|
988
1006
|
sq->get_terms = &spantq_get_terms;
|
989
1007
|
sq->field = term->field;
|
1008
|
+
self->data = sq;
|
990
1009
|
|
991
1010
|
self->type = SPAN_TERM_QUERY;
|
992
|
-
self->data = sq;
|
993
|
-
self->create_weight = &spanw_create;
|
994
1011
|
self->extract_terms = &spantq_extract_terms;
|
995
1012
|
self->to_s = &spantq_to_s;
|
996
|
-
self->
|
1013
|
+
self->hash = &spantq_hash;
|
1014
|
+
self->eq = &spantq_eq;
|
1015
|
+
self->destroy_i = &spantq_destroy;
|
1016
|
+
self->create_weight_i = &spanw_create;
|
997
1017
|
return self;
|
998
1018
|
}
|
999
1019
|
|
@@ -1013,13 +1033,13 @@ char *spanfq_to_s(Query *self, char *field)
|
|
1013
1033
|
return res;
|
1014
1034
|
}
|
1015
1035
|
|
1016
|
-
void spanfq_extract_terms(Query *self,
|
1036
|
+
void spanfq_extract_terms(Query *self, HashSet *terms)
|
1017
1037
|
{
|
1018
1038
|
SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
|
1019
1039
|
sfq->match->extract_terms(sfq->match, terms);
|
1020
1040
|
}
|
1021
1041
|
|
1022
|
-
|
1042
|
+
HashSet *spanfq_get_terms(Query *self)
|
1023
1043
|
{
|
1024
1044
|
SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
|
1025
1045
|
SpanQuery *match_sq = (SpanQuery *)sfq->match->data;
|
@@ -1029,36 +1049,46 @@ Array *spanfq_get_terms(Query *self)
|
|
1029
1049
|
Query *spanfq_rewrite(Query *self, IndexReader *ir)
|
1030
1050
|
{
|
1031
1051
|
SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
|
1032
|
-
Query *
|
1033
|
-
Query *rewritten = sfq->match->rewrite(sfq->match, ir);
|
1034
|
-
if (rewritten != sfq->match) {
|
1035
|
-
clone = spanfq_create(rewritten, sfq->end);
|
1036
|
-
sfq->match->rewritten = NULL; /* it will get destroyed with the clone */
|
1037
|
-
}
|
1052
|
+
Query *q, *rq;
|
1038
1053
|
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1054
|
+
q = sfq->match;
|
1055
|
+
rq = q->rewrite(q, ir);
|
1056
|
+
if (rq == q || self->destroy_all) q_deref(q);
|
1057
|
+
sfq->match = rq;
|
1058
|
+
|
1059
|
+
self->ref_cnt++;
|
1060
|
+
return self; /* no clauses rewrote */
|
1045
1061
|
}
|
1046
1062
|
|
1047
|
-
void spanfq_destroy(
|
1063
|
+
void spanfq_destroy(Query *self)
|
1048
1064
|
{
|
1049
|
-
Query *self = (Query *)p;
|
1050
1065
|
SpanQuery *sq = (SpanQuery *)self->data;
|
1051
1066
|
SpanFirstQuery *sfq = (SpanFirstQuery *)sq->data;
|
1052
|
-
if (self->destroy_all)
|
1067
|
+
if (self->destroy_all) q_deref(sfq->match);
|
1053
1068
|
free(sfq);
|
1054
1069
|
free(sq);
|
1055
|
-
|
1070
|
+
q_destroy_i(self);
|
1071
|
+
}
|
1072
|
+
|
1073
|
+
static uint spanfq_hash(Query *self)
|
1074
|
+
{
|
1075
|
+
SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
|
1076
|
+
return sfq->match->hash(sfq->match) ^ sfq->end;
|
1077
|
+
}
|
1078
|
+
|
1079
|
+
static int spanfq_eq(Query *self, Query *o)
|
1080
|
+
{
|
1081
|
+
SpanFirstQuery *sfq1 = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
|
1082
|
+
SpanFirstQuery *sfq2 = (SpanFirstQuery *)((SpanQuery *)o->data)->data;
|
1083
|
+
return sfq1->match->eq(sfq1->match, sfq2->match) && (sfq1->end == sfq2->end);
|
1056
1084
|
}
|
1057
1085
|
|
1058
1086
|
Query *spanfq_create(Query *match, int end)
|
1059
1087
|
{
|
1060
1088
|
Query *self = q_create();
|
1089
|
+
|
1061
1090
|
SpanQuery *sq = ALLOC(SpanQuery);
|
1091
|
+
|
1062
1092
|
SpanFirstQuery *sfq = ALLOC(SpanFirstQuery);
|
1063
1093
|
sfq->match = match;
|
1064
1094
|
sfq->end = end;
|
@@ -1067,14 +1097,16 @@ Query *spanfq_create(Query *match, int end)
|
|
1067
1097
|
sq->get_spans = &spanfe_create;
|
1068
1098
|
sq->get_terms = &spanfq_get_terms;
|
1069
1099
|
sq->field = ((SpanQuery *)match->data)->field;
|
1100
|
+
self->data = sq;
|
1070
1101
|
|
1071
1102
|
self->type = SPAN_FIRST_QUERY;
|
1072
|
-
self->data = sq;
|
1073
|
-
self->create_weight = &spanw_create;
|
1074
|
-
self->extract_terms = &spanfq_extract_terms;
|
1075
1103
|
self->rewrite = &spanfq_rewrite;
|
1104
|
+
self->extract_terms = &spanfq_extract_terms;
|
1076
1105
|
self->to_s = &spanfq_to_s;
|
1077
|
-
self->
|
1106
|
+
self->hash = &spanfq_hash;
|
1107
|
+
self->eq = &spanfq_eq;
|
1108
|
+
self->destroy_i = &spanfq_destroy;
|
1109
|
+
self->create_weight_i = &spanw_create;
|
1078
1110
|
return self;
|
1079
1111
|
}
|
1080
1112
|
|
@@ -1103,7 +1135,7 @@ char *spanoq_to_s(Query *self, char *field)
|
|
1103
1135
|
return res;
|
1104
1136
|
}
|
1105
1137
|
|
1106
|
-
void spanoq_extract_terms(Query *self,
|
1138
|
+
void spanoq_extract_terms(Query *self, HashSet *terms)
|
1107
1139
|
{
|
1108
1140
|
SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
|
1109
1141
|
Query *clause;
|
@@ -1114,10 +1146,10 @@ void spanoq_extract_terms(Query *self, Array *terms)
|
|
1114
1146
|
}
|
1115
1147
|
}
|
1116
1148
|
|
1117
|
-
|
1149
|
+
HashSet *spanoq_get_terms(Query *self)
|
1118
1150
|
{
|
1119
1151
|
SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
|
1120
|
-
|
1152
|
+
HashSet *terms = term_set_create();
|
1121
1153
|
Query *clause;
|
1122
1154
|
int i;
|
1123
1155
|
for (i = 0; i < soq->c_cnt; i++) {
|
@@ -1143,34 +1175,22 @@ SpanEnum *spanoq_get_spans(Query *self, IndexReader *ir)
|
|
1143
1175
|
Query *spanoq_rewrite(Query *self, IndexReader *ir)
|
1144
1176
|
{
|
1145
1177
|
SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
|
1146
|
-
Query *clone = NULL;
|
1147
|
-
|
1148
1178
|
Query *clause, *rewritten;
|
1149
|
-
Query **new_clauses = ALLOC_N(Query *, soq->c_cnt);
|
1150
1179
|
int i;
|
1180
|
+
/* replace clauses with their rewritten queries */
|
1151
1181
|
for (i = 0; i < soq->c_cnt; i++) {
|
1152
1182
|
clause = soq->clauses[i];
|
1153
1183
|
rewritten = clause->rewrite(clause, ir);
|
1154
|
-
if ((clause
|
1155
|
-
|
1156
|
-
/* The sub-clauses will be handled by the original query */
|
1157
|
-
clone->destroy_all = false;
|
1158
|
-
}
|
1159
|
-
new_clauses[i] = rewritten;
|
1184
|
+
if ((rewritten == clause) || self->destroy_all) q_deref(clause);
|
1185
|
+
soq->clauses[i] = rewritten;
|
1160
1186
|
}
|
1161
1187
|
|
1162
|
-
|
1163
|
-
|
1164
|
-
return self->rewritten = clone; /* some clauses rewrote */
|
1165
|
-
} else {
|
1166
|
-
free(new_clauses); /* no clauses rewrote */
|
1167
|
-
return self;
|
1168
|
-
}
|
1188
|
+
self->ref_cnt++;
|
1189
|
+
return self;
|
1169
1190
|
}
|
1170
1191
|
|
1171
|
-
void spanoq_destroy(
|
1192
|
+
void spanoq_destroy(Query *self)
|
1172
1193
|
{
|
1173
|
-
Query *self = (Query *)p;
|
1174
1194
|
SpanQuery *sq = (SpanQuery *)self->data;
|
1175
1195
|
SpanOrQuery *soq = (SpanOrQuery *)sq->data;
|
1176
1196
|
|
@@ -1179,7 +1199,7 @@ void spanoq_destroy(void *p)
|
|
1179
1199
|
int i;
|
1180
1200
|
for (i = 0; i < soq->c_cnt; i++) {
|
1181
1201
|
clause = soq->clauses[i];
|
1182
|
-
|
1202
|
+
q_deref(clause);
|
1183
1203
|
}
|
1184
1204
|
free(soq->clauses);
|
1185
1205
|
}
|
@@ -1187,13 +1207,44 @@ void spanoq_destroy(void *p)
|
|
1187
1207
|
|
1188
1208
|
free(soq);
|
1189
1209
|
free(sq);
|
1190
|
-
|
1210
|
+
q_destroy_i(self);
|
1211
|
+
}
|
1212
|
+
|
1213
|
+
static uint spanoq_hash(Query *self)
|
1214
|
+
{
|
1215
|
+
int i;
|
1216
|
+
uint hash = 0;
|
1217
|
+
Query *q;
|
1218
|
+
SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
|
1219
|
+
|
1220
|
+
for (i = 0; i < soq->c_cnt; i++) {
|
1221
|
+
q = soq->clauses[i];
|
1222
|
+
hash ^= q->hash(q);
|
1223
|
+
}
|
1224
|
+
return hash;
|
1225
|
+
}
|
1226
|
+
|
1227
|
+
static int spanoq_eq(Query *self, Query *o)
|
1228
|
+
{
|
1229
|
+
int i;
|
1230
|
+
Query *q1, *q2;
|
1231
|
+
SpanOrQuery *soq1 = (SpanOrQuery *)((SpanQuery *)self->data)->data;
|
1232
|
+
SpanOrQuery *soq2 = (SpanOrQuery *)((SpanQuery *)o->data)->data;
|
1233
|
+
if (soq1->c_cnt != soq2->c_cnt) return false;
|
1234
|
+
for (i = 0; i < soq1->c_cnt; i++) {
|
1235
|
+
q1 = soq1->clauses[i];
|
1236
|
+
q2 = soq2->clauses[i];
|
1237
|
+
if (!q1->eq(q1, q2)) return false;
|
1238
|
+
}
|
1239
|
+
return true;
|
1191
1240
|
}
|
1192
1241
|
|
1193
1242
|
Query *spanoq_create(Query **clauses, int c_cnt)
|
1194
1243
|
{
|
1195
1244
|
Query *self = q_create();
|
1245
|
+
|
1196
1246
|
SpanQuery *sq = ALLOC(SpanQuery);
|
1247
|
+
|
1197
1248
|
SpanOrQuery *soq = ALLOC(SpanOrQuery);
|
1198
1249
|
soq->clauses = clauses;
|
1199
1250
|
soq->c_cnt = c_cnt;
|
@@ -1202,14 +1253,16 @@ Query *spanoq_create(Query **clauses, int c_cnt)
|
|
1202
1253
|
sq->get_spans = &spanoq_get_spans;
|
1203
1254
|
sq->get_terms = &spanoq_get_terms;
|
1204
1255
|
sq->field = ((SpanQuery *)clauses[0]->data)->field;
|
1256
|
+
self->data = sq;
|
1205
1257
|
|
1206
1258
|
self->type = SPAN_OR_QUERY;
|
1207
|
-
self->data = sq;
|
1208
|
-
self->create_weight = &spanw_create;
|
1209
|
-
self->extract_terms = &spanoq_extract_terms;
|
1210
1259
|
self->rewrite = &spanoq_rewrite;
|
1260
|
+
self->extract_terms = &spanoq_extract_terms;
|
1211
1261
|
self->to_s = &spanoq_to_s;
|
1212
|
-
self->
|
1262
|
+
self->hash = &spanoq_hash;
|
1263
|
+
self->eq = &spanoq_eq;
|
1264
|
+
self->destroy_i = &spanoq_destroy;
|
1265
|
+
self->create_weight_i = &spanw_create;
|
1213
1266
|
return self;
|
1214
1267
|
}
|
1215
1268
|
|
@@ -1240,7 +1293,7 @@ char *spannq_to_s(Query *self, char *field)
|
|
1240
1293
|
return res;
|
1241
1294
|
}
|
1242
1295
|
|
1243
|
-
void spannq_extract_terms(Query *self,
|
1296
|
+
void spannq_extract_terms(Query *self, HashSet *terms)
|
1244
1297
|
{
|
1245
1298
|
SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
|
1246
1299
|
Query *clause;
|
@@ -1251,10 +1304,10 @@ void spannq_extract_terms(Query *self, Array *terms)
|
|
1251
1304
|
}
|
1252
1305
|
}
|
1253
1306
|
|
1254
|
-
|
1307
|
+
HashSet *spannq_get_terms(Query *self)
|
1255
1308
|
{
|
1256
1309
|
SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
|
1257
|
-
|
1310
|
+
HashSet *terms = term_set_create();
|
1258
1311
|
Query *clause;
|
1259
1312
|
int i;
|
1260
1313
|
for (i = 0; i < snq->c_cnt; i++) {
|
@@ -1281,34 +1334,21 @@ SpanEnum *spannq_get_spans(Query *self, IndexReader *ir)
|
|
1281
1334
|
Query *spannq_rewrite(Query *self, IndexReader *ir)
|
1282
1335
|
{
|
1283
1336
|
SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
|
1284
|
-
Query *clone = NULL;
|
1285
|
-
|
1286
1337
|
Query *clause, *rewritten;
|
1287
|
-
Query **new_clauses = ALLOC_N(Query *, snq->c_cnt);
|
1288
1338
|
int i;
|
1289
1339
|
for (i = 0; i < snq->c_cnt; i++) {
|
1290
1340
|
clause = snq->clauses[i];
|
1291
1341
|
rewritten = clause->rewrite(clause, ir);
|
1292
|
-
if ((clause
|
1293
|
-
|
1294
|
-
/* The sub-clauses will be handled by the original query */
|
1295
|
-
clone->destroy_all = false;
|
1296
|
-
}
|
1297
|
-
new_clauses[i] = rewritten;
|
1342
|
+
if ((rewritten == clause) || self->destroy_all) q_deref(clause);
|
1343
|
+
snq->clauses[i] = rewritten;
|
1298
1344
|
}
|
1299
1345
|
|
1300
|
-
|
1301
|
-
|
1302
|
-
return self->rewritten = clone; /* some clauses rewrote */
|
1303
|
-
} else {
|
1304
|
-
free(new_clauses); /* no clauses rewrote */
|
1305
|
-
return self;
|
1306
|
-
}
|
1346
|
+
self->ref_cnt++;
|
1347
|
+
return self;
|
1307
1348
|
}
|
1308
1349
|
|
1309
|
-
void spannq_destroy(
|
1350
|
+
void spannq_destroy(Query *self)
|
1310
1351
|
{
|
1311
|
-
Query *self = (Query *)p;
|
1312
1352
|
SpanQuery *sq = (SpanQuery *)self->data;
|
1313
1353
|
SpanNearQuery *snq = (SpanNearQuery *)sq->data;
|
1314
1354
|
|
@@ -1317,21 +1357,57 @@ void spannq_destroy(void *p)
|
|
1317
1357
|
int i;
|
1318
1358
|
for (i = 0; i < snq->c_cnt; i++) {
|
1319
1359
|
clause = snq->clauses[i];
|
1320
|
-
|
1360
|
+
q_deref(clause);
|
1321
1361
|
}
|
1322
1362
|
free(snq->clauses);
|
1323
1363
|
}
|
1324
1364
|
|
1325
|
-
|
1326
1365
|
free(snq);
|
1327
1366
|
free(sq);
|
1328
|
-
|
1367
|
+
q_destroy_i(self);
|
1368
|
+
}
|
1369
|
+
|
1370
|
+
static uint spannq_hash(Query *self)
|
1371
|
+
{
|
1372
|
+
int i;
|
1373
|
+
uint hash = 0;
|
1374
|
+
Query *q;
|
1375
|
+
SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
|
1376
|
+
|
1377
|
+
for (i = 0; i < snq->c_cnt; i++) {
|
1378
|
+
q = snq->clauses[i];
|
1379
|
+
hash ^= q->hash(q);
|
1380
|
+
}
|
1381
|
+
return ((hash ^ snq->slop) << 1) | snq->in_order;
|
1382
|
+
}
|
1383
|
+
|
1384
|
+
static int spannq_eq(Query *self, Query *o)
|
1385
|
+
{
|
1386
|
+
int i;
|
1387
|
+
Query *q1, *q2;
|
1388
|
+
SpanNearQuery *snq1 = (SpanNearQuery *)((SpanQuery *)self->data)->data;
|
1389
|
+
SpanNearQuery *snq2 = (SpanNearQuery *)((SpanQuery *)o->data)->data;
|
1390
|
+
if (snq1->c_cnt != snq2->c_cnt ||
|
1391
|
+
snq1->slop != snq2->slop ||
|
1392
|
+
snq1->in_order != snq2->in_order) {
|
1393
|
+
return false;
|
1394
|
+
}
|
1395
|
+
|
1396
|
+
for (i = 0; i < snq1->c_cnt; i++) {
|
1397
|
+
q1 = snq1->clauses[i];
|
1398
|
+
q2 = snq2->clauses[i];
|
1399
|
+
if (!q1->eq(q1, q2)) return false;
|
1400
|
+
}
|
1401
|
+
|
1402
|
+
return true;
|
1329
1403
|
}
|
1330
1404
|
|
1331
1405
|
Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order)
|
1332
1406
|
{
|
1333
1407
|
Query *self = q_create();
|
1408
|
+
|
1334
1409
|
SpanQuery *sq = ALLOC(SpanQuery);
|
1410
|
+
|
1335
1411
|
SpanNearQuery *snq = ALLOC(SpanNearQuery);
|
1336
1412
|
snq->clauses = clauses;
|
1337
1413
|
snq->c_cnt = c_cnt;
|
@@ -1341,16 +1417,17 @@ Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order)
|
|
1341
1417
|
|
1342
1418
|
sq->get_spans = &spannq_get_spans;
|
1343
1419
|
sq->get_terms = &spannq_get_terms;
|
1344
|
-
|
1345
1420
|
sq->field = ((SpanQuery *)clauses[0]->data)->field;
|
1421
|
+
self->data = sq;
|
1346
1422
|
|
1347
1423
|
self->type = SPAN_NEAR_QUERY;
|
1348
|
-
self->data = sq;
|
1349
|
-
self->create_weight = &spanw_create;
|
1350
|
-
self->extract_terms = &spannq_extract_terms;
|
1351
1424
|
self->rewrite = &spannq_rewrite;
|
1425
|
+
self->extract_terms = &spannq_extract_terms;
|
1352
1426
|
self->to_s = &spannq_to_s;
|
1353
|
-
self->
|
1427
|
+
self->hash = &spannq_hash;
|
1428
|
+
self->eq = &spannq_eq;
|
1429
|
+
self->destroy_i = &spannq_destroy;
|
1430
|
+
self->create_weight_i = &spanw_create;
|
1354
1431
|
return self;
|
1355
1432
|
}
|
1356
1433
|
|
@@ -1372,16 +1449,16 @@ char *spanxq_to_s(Query *self, char *field)
|
|
1372
1449
|
return res;
|
1373
1450
|
}
|
1374
1451
|
|
1375
|
-
void spanxq_extract_terms(Query *self,
|
1452
|
+
void spanxq_extract_terms(Query *self, HashSet *terms)
|
1376
1453
|
{
|
1377
1454
|
SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
|
1378
1455
|
sxq->inc->extract_terms(sxq->inc, terms);
|
1379
1456
|
}
|
1380
1457
|
|
1381
|
-
|
1458
|
+
HashSet *spanxq_get_terms(Query *self)
|
1382
1459
|
{
|
1383
1460
|
SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
|
1384
|
-
|
1461
|
+
HashSet *terms = term_set_create();
|
1385
1462
|
sxq->inc->extract_terms(sxq->inc, terms);
|
1386
1463
|
return terms;
|
1387
1464
|
}
|
@@ -1389,48 +1466,60 @@ Array *spanxq_get_terms(Query *self)
|
|
1389
1466
|
Query *spanxq_rewrite(Query *self, IndexReader *ir)
|
1390
1467
|
{
|
1391
1468
|
SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
|
1392
|
-
Query *
|
1393
|
-
Query *inc, *exc;
|
1394
|
-
Query *inc_rewritten, *exc_rewritten;
|
1395
|
-
|
1396
|
-
inc = sxq->inc;
|
1397
|
-
inc_rewritten = inc->rewrite(inc, ir);
|
1398
|
-
exc = sxq->exc;
|
1399
|
-
exc_rewritten = exc->rewrite(exc, ir);
|
1400
|
-
if ((inc_rewritten != inc) || (exc_rewritten != exc)) {
|
1401
|
-
clone = spanxq_create(inc_rewritten, exc_rewritten);
|
1402
|
-
/* The sub-clauses will be handled by the original query */
|
1403
|
-
clone->destroy_all = false;
|
1404
|
-
}
|
1469
|
+
Query *q, *rq;
|
1405
1470
|
|
1406
|
-
|
1407
|
-
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1471
|
+
/* rewrite inclusive query */
|
1472
|
+
q = sxq->inc;
|
1473
|
+
rq = q->rewrite(q, ir);
|
1474
|
+
if (rq == q || self->destroy_all) q_deref(q);
|
1475
|
+
sxq->inc = rq;
|
1476
|
+
|
1477
|
+
/* rewrite exclusive query */
|
1478
|
+
q = sxq->exc;
|
1479
|
+
rq = q->rewrite(q, ir);
|
1480
|
+
if (rq == q || self->destroy_all) q_deref(q);
|
1481
|
+
sxq->exc = rq;
|
1482
|
+
|
1483
|
+
self->ref_cnt++;
|
1484
|
+
return self;
|
1412
1485
|
}
|
1413
1486
|
|
1414
|
-
void spanxq_destroy(
|
1487
|
+
void spanxq_destroy(Query *self)
|
1415
1488
|
{
|
1416
|
-
Query *self = (Query *)p;
|
1417
1489
|
SpanQuery *sq = (SpanQuery *)self->data;
|
1418
1490
|
SpanNotQuery *sxq = (SpanNotQuery *)sq->data;
|
1419
1491
|
|
1420
1492
|
if (self->destroy_all) {
|
1421
|
-
|
1422
|
-
|
1493
|
+
q_deref(sxq->inc);
|
1494
|
+
q_deref(sxq->exc);
|
1423
1495
|
}
|
1424
1496
|
|
1425
1497
|
free(sxq);
|
1426
1498
|
free(sq);
|
1427
|
-
|
1499
|
+
q_destroy_i(self);
|
1428
1500
|
}
|
1429
1501
|
|
1502
|
+
static uint spanxq_hash(Query *self)
|
1503
|
+
{
|
1504
|
+
SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
|
1505
|
+
return sxq->inc->hash(sxq->inc) ^ sxq->exc->hash(sxq->exc);
|
1506
|
+
}
|
1507
|
+
|
1508
|
+
static int spanxq_eq(Query *self, Query *o)
|
1509
|
+
{
|
1510
|
+
SpanNotQuery *sxq1 = (SpanNotQuery *)((SpanQuery *)self->data)->data;
|
1511
|
+
SpanNotQuery *sxq2 = (SpanNotQuery *)((SpanQuery *)o->data)->data;
|
1512
|
+
return sxq1->inc->eq(sxq1->inc, sxq2->inc) &&
|
1513
|
+
sxq1->exc->eq(sxq1->exc, sxq2->exc);
|
1514
|
+
}
|
1515
|
+
|
1516
|
+
|
1430
1517
|
Query *spanxq_create(Query *inc, Query *exc)
|
1431
1518
|
{
|
1432
1519
|
Query *self = q_create();
|
1520
|
+
|
1433
1521
|
SpanQuery *sq = ALLOC(SpanQuery);
|
1522
|
+
|
1434
1523
|
SpanNotQuery *sxq = ALLOC(SpanNotQuery);
|
1435
1524
|
sxq->inc = inc;
|
1436
1525
|
sxq->exc = exc;
|
@@ -1439,14 +1528,17 @@ Query *spanxq_create(Query *inc, Query *exc)
|
|
1439
1528
|
sq->get_spans = &spanxe_create;
|
1440
1529
|
sq->get_terms = &spanxq_get_terms;
|
1441
1530
|
sq->field = ((SpanQuery *)inc->data)->field;
|
1531
|
+
self->data = sq;
|
1442
1532
|
|
1443
1533
|
self->type = SPAN_NOT_QUERY;
|
1444
|
-
self->data = sq;
|
1445
|
-
self->create_weight = &spanw_create;
|
1446
|
-
self->extract_terms = &spanxq_extract_terms;
|
1447
1534
|
self->rewrite = &spanxq_rewrite;
|
1535
|
+
self->extract_terms = &spanxq_extract_terms;
|
1448
1536
|
self->to_s = &spanxq_to_s;
|
1449
|
-
self->
|
1537
|
+
self->hash = &spanxq_hash;
|
1538
|
+
self->eq = &spanxq_eq;
|
1539
|
+
self->destroy_i = &spanxq_destroy;
|
1540
|
+
self->create_weight_i = &spanw_create;
|
1541
|
+
|
1450
1542
|
return self;
|
1451
1543
|
}
|
1452
1544
|
|
@@ -1512,24 +1604,23 @@ bool spansc_skip_to(Scorer *self, int target)
|
|
1512
1604
|
|
1513
1605
|
Explanation *spansc_explain(Scorer *self, int target)
|
1514
1606
|
{
|
1607
|
+
Explanation *tf_explanation;
|
1515
1608
|
SpanScorer *spansc = (SpanScorer *)self->data;
|
1516
1609
|
float phrase_freq;
|
1517
1610
|
self->skip_to(self, target);
|
1518
|
-
phrase_freq = (self->doc == target) ? spansc->freq : 0.0;
|
1611
|
+
phrase_freq = (self->doc == target) ? spansc->freq : (float)0.0;
|
1519
1612
|
|
1520
|
-
|
1613
|
+
tf_explanation = expl_create(sim_tf(self->similarity, phrase_freq),
|
1521
1614
|
strfmt("tf(phrase_freq(%f)", phrase_freq));
|
1522
1615
|
|
1523
1616
|
return tf_explanation;
|
1524
1617
|
}
|
1525
1618
|
|
1526
|
-
void spansc_destroy(
|
1619
|
+
void spansc_destroy(Scorer *self)
|
1527
1620
|
{
|
1528
|
-
Scorer *self = (Scorer *)p;
|
1529
1621
|
SpanScorer *spansc = (SpanScorer *)self->data;
|
1530
1622
|
if (spansc->spans) spansc->spans->destroy(spansc->spans);
|
1531
|
-
|
1532
|
-
scorer_destroy(p);
|
1623
|
+
scorer_destroy_i(self);
|
1533
1624
|
}
|
1534
1625
|
|
1535
1626
|
Scorer *spansc_create(Weight *weight, IndexReader *ir)
|