ferret 0.9.1 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +6 -5
- data/Rakefile +34 -13
- data/TODO +1 -0
- data/TUTORIAL +1 -1
- data/ext/analysis.c +87 -70
- data/ext/analysis.h +18 -6
- data/ext/array.c +1 -2
- data/ext/array.h +1 -1
- data/ext/bitvector.c +10 -6
- data/ext/bitvector.h +2 -2
- data/ext/compound_io.c +30 -27
- data/ext/document.c +15 -15
- data/ext/document.h +5 -5
- data/ext/except.c +2 -0
- data/ext/except.h +25 -23
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +10 -8
- data/ext/ferret.h +9 -8
- data/ext/field.c +29 -25
- data/ext/filter.c +52 -14
- data/ext/frtio.h +13 -0
- data/ext/fs_store.c +115 -170
- data/ext/global.c +9 -8
- data/ext/global.h +17 -13
- data/ext/hash.c +13 -19
- data/ext/hash.h +11 -11
- data/ext/hashset.c +5 -7
- data/ext/hashset.h +9 -8
- data/ext/helper.c +1 -1
- data/ext/helper.h +2 -1
- data/ext/inc/except.h +25 -23
- data/ext/inc/lang.h +11 -1
- data/ext/ind.c +33 -21
- data/ext/index.h +44 -39
- data/ext/index_io.c +61 -57
- data/ext/index_rw.c +418 -361
- data/ext/lang.c +10 -0
- data/ext/lang.h +11 -1
- data/ext/nix_io.c +135 -0
- data/ext/priorityqueue.c +16 -16
- data/ext/priorityqueue.h +9 -6
- data/ext/q_boolean.c +128 -76
- data/ext/q_const_score.c +20 -20
- data/ext/q_filtered_query.c +20 -20
- data/ext/q_fuzzy.c +37 -23
- data/ext/q_match_all.c +15 -19
- data/ext/q_multi_phrase.c +87 -46
- data/ext/q_parser.c +247 -119
- data/ext/q_phrase.c +86 -52
- data/ext/q_prefix.c +25 -14
- data/ext/q_range.c +59 -14
- data/ext/q_span.c +263 -172
- data/ext/q_term.c +62 -51
- data/ext/q_wildcard.c +24 -13
- data/ext/r_analysis.c +328 -80
- data/ext/r_doc.c +11 -6
- data/ext/r_index_io.c +40 -32
- data/ext/r_qparser.c +15 -14
- data/ext/r_search.c +270 -152
- data/ext/r_store.c +32 -17
- data/ext/ram_store.c +38 -22
- data/ext/search.c +617 -87
- data/ext/search.h +227 -163
- data/ext/similarity.c +54 -45
- data/ext/similarity.h +3 -3
- data/ext/sort.c +132 -53
- data/ext/store.c +21 -2
- data/ext/store.h +14 -14
- data/ext/tags +4322 -232
- data/ext/term.c +140 -109
- data/ext/termdocs.c +74 -60
- data/ext/vector.c +181 -152
- data/ext/w32_io.c +150 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
- data/lib/ferret/document/field.rb +1 -1
- data/lib/ferret/index/field_infos.rb +1 -1
- data/lib/ferret/index/term.rb +1 -1
- data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
- data/lib/ferret/search.rb +1 -0
- data/lib/ferret/search/boolean_query.rb +0 -4
- data/lib/ferret/search/index_searcher.rb +21 -8
- data/lib/ferret/search/multi_phrase_query.rb +7 -0
- data/lib/ferret/search/multi_searcher.rb +261 -0
- data/lib/ferret/search/phrase_query.rb +1 -1
- data/lib/ferret/search/query.rb +34 -5
- data/lib/ferret/search/sort.rb +7 -3
- data/lib/ferret/search/sort_field.rb +8 -4
- data/lib/ferret/store/fs_store.rb +13 -6
- data/lib/ferret/store/index_io.rb +0 -14
- data/lib/ferret/store/ram_store.rb +3 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/analysis/ctc_analyzer.rb +131 -0
- data/test/unit/analysis/ctc_tokenstream.rb +98 -9
- data/test/unit/index/tc_index.rb +40 -1
- data/test/unit/index/tc_term.rb +7 -0
- data/test/unit/index/th_doc.rb +8 -0
- data/test/unit/query_parser/tc_query_parser.rb +6 -4
- data/test/unit/search/rtc_sort_field.rb +6 -6
- data/test/unit/search/tc_index_searcher.rb +8 -0
- data/test/unit/search/tc_multi_searcher.rb +275 -0
- data/test/unit/search/tc_multi_searcher2.rb +126 -0
- data/test/unit/search/tc_search_and_sort.rb +66 -0
- metadata +31 -26
- data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/q_span.c
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
#include <string.h>
|
2
2
|
#include "search.h"
|
3
3
|
|
4
|
-
|
5
4
|
/*****************************************************************************
|
6
5
|
*
|
7
6
|
* NearSpanEnum
|
@@ -16,11 +15,24 @@
|
|
16
15
|
|
17
16
|
Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
|
18
17
|
{
|
18
|
+
Explanation *expl;
|
19
|
+
Explanation *idf_expl1;
|
20
|
+
Explanation *idf_expl2;
|
21
|
+
Explanation *query_expl;
|
22
|
+
Explanation *qnorm_expl;
|
23
|
+
Explanation *field_expl;
|
24
|
+
Explanation *tf_expl;
|
25
|
+
Scorer *scorer;
|
26
|
+
uchar *field_norms;
|
27
|
+
float field_norm;
|
28
|
+
Explanation *field_norm_expl;
|
29
|
+
|
19
30
|
char *query_str = self->query->to_s(self->query, "");
|
20
|
-
|
31
|
+
HashSet *terms = (HashSet *)self->data;
|
21
32
|
char *field = ((SpanQuery *)self->query->data)->field;
|
22
33
|
char *doc_freqs = NULL;
|
23
|
-
|
34
|
+
size_t df_i = 0;
|
35
|
+
int i;
|
24
36
|
Term *t;
|
25
37
|
|
26
38
|
|
@@ -38,19 +50,21 @@ Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
|
|
38
50
|
doc_freqs = "";
|
39
51
|
}
|
40
52
|
|
41
|
-
|
53
|
+
expl = expl_create(0.0,
|
42
54
|
strfmt("weight(%s in %d), product of:", query_str, target));
|
43
55
|
|
44
56
|
/* We need two of these as it's included in both the query explanation
|
45
57
|
* and the field explanation */
|
46
|
-
|
58
|
+
idf_expl1 = expl_create(self->idf,
|
47
59
|
strfmt("idf(%s: %s)", field, doc_freqs));
|
48
|
-
|
60
|
+
idf_expl2 = expl_create(self->idf,
|
49
61
|
strfmt("idf(%s: %s)", field, doc_freqs));
|
50
|
-
if (terms->size > 0)
|
62
|
+
if (terms->size > 0) {
|
63
|
+
free(doc_freqs); /* only free if allocated */
|
64
|
+
}
|
51
65
|
|
52
66
|
/* explain query weight */
|
53
|
-
|
67
|
+
query_expl = expl_create(0.0,
|
54
68
|
strfmt("query_weight(%s), product of:", query_str));
|
55
69
|
|
56
70
|
if (self->query->boost != 1.0) {
|
@@ -59,7 +73,7 @@ Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
|
|
59
73
|
|
60
74
|
expl_add_detail(query_expl, idf_expl1);
|
61
75
|
|
62
|
-
|
76
|
+
qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
|
63
77
|
expl_add_detail(query_expl, qnorm_expl);
|
64
78
|
|
65
79
|
query_expl->value = self->query->boost * idf_expl1->value * qnorm_expl->value;
|
@@ -67,19 +81,21 @@ Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
|
|
67
81
|
expl_add_detail(expl, query_expl);
|
68
82
|
|
69
83
|
/* explain field weight */
|
70
|
-
|
84
|
+
field_expl = expl_create(0.0,
|
71
85
|
strfmt("field_weight(%s:%s in %d), product of:", field, query_str, target));
|
72
86
|
free(query_str);
|
73
87
|
|
74
|
-
|
75
|
-
|
88
|
+
scorer = self->scorer(self, ir);
|
89
|
+
tf_expl = scorer->explain(scorer, target);
|
76
90
|
scorer->destroy(scorer);
|
77
91
|
expl_add_detail(field_expl, tf_expl);
|
78
92
|
expl_add_detail(field_expl, idf_expl2);
|
79
93
|
|
80
|
-
|
81
|
-
|
82
|
-
|
94
|
+
field_norms = ir->get_norms(ir, field);
|
95
|
+
field_norm = (field_norms
|
96
|
+
? sim_decode_norm(self->similarity, field_norms[target])
|
97
|
+
: (float)0.0);
|
98
|
+
field_norm_expl = expl_create(field_norm,
|
83
99
|
strfmt("field_norm(field=%s, doc=%d)", field, target));
|
84
100
|
expl_add_detail(field_expl, field_norm_expl);
|
85
101
|
|
@@ -101,22 +117,19 @@ char *spanw_to_s(Weight *self)
|
|
101
117
|
return strfmt("SpanWeight(%f)", self->value);
|
102
118
|
}
|
103
119
|
|
104
|
-
void spanw_destroy(
|
120
|
+
void spanw_destroy(Weight *self)
|
105
121
|
{
|
106
|
-
|
107
|
-
|
108
|
-
free(p);
|
122
|
+
hs_destroy_all(self->data);
|
123
|
+
w_destroy(self);
|
109
124
|
}
|
110
125
|
|
111
126
|
Weight *spanw_create(Query *query, Searcher *searcher)
|
112
127
|
{
|
113
|
-
Weight *self =
|
128
|
+
Weight *self = w_create(query);
|
114
129
|
SpanQuery *spanq = (SpanQuery *)query->data;
|
115
|
-
|
116
|
-
|
117
|
-
self->
|
118
|
-
self->get_value = &w_get_value;
|
119
|
-
self->normalize = &w_normalize;
|
130
|
+
HashSet *terms = spanq->get_terms(query);
|
131
|
+
|
132
|
+
self->data = terms;
|
120
133
|
self->scorer = &spansc_create;
|
121
134
|
self->explain = &spanw_explain;
|
122
135
|
self->to_s = &spanw_to_s;
|
@@ -125,10 +138,8 @@ Weight *spanw_create(Query *query, Searcher *searcher)
|
|
125
138
|
|
126
139
|
self->similarity = query->get_similarity(query, searcher);
|
127
140
|
|
128
|
-
self->idf = sim_idf_phrase(self->similarity, (Term **)terms->elems,
|
129
|
-
|
130
|
-
self->value = 0.0;
|
131
|
-
self->data = terms;
|
141
|
+
self->idf = sim_idf_phrase(self->similarity, (Term **)terms->elems,
|
142
|
+
terms->size, searcher);
|
132
143
|
|
133
144
|
return self;
|
134
145
|
}
|
@@ -204,7 +215,8 @@ char *spante_to_s(SpanEnum *self)
|
|
204
215
|
char *field = ((SpanQuery *)self->query->data)->field;
|
205
216
|
char *query_str = self->query->to_s(self->query, field);
|
206
217
|
char pos_str[20];
|
207
|
-
|
218
|
+
size_t len = strlen(query_str);
|
219
|
+
int pos;
|
208
220
|
char *str = ALLOC_N(char, len + 40);
|
209
221
|
|
210
222
|
if (self->doc(self) < 0) {
|
@@ -222,9 +234,8 @@ char *spante_to_s(SpanEnum *self)
|
|
222
234
|
return str;
|
223
235
|
}
|
224
236
|
|
225
|
-
void spante_destroy(
|
237
|
+
void spante_destroy(SpanEnum *self)
|
226
238
|
{
|
227
|
-
SpanEnum *self = (SpanEnum *)p;
|
228
239
|
SpanTermEnum *ste = (SpanTermEnum *)self->data;
|
229
240
|
TermDocEnum *tde = ste->positions;
|
230
241
|
tde->close(tde);
|
@@ -315,9 +326,8 @@ char *spanfe_to_s(SpanEnum *self)
|
|
315
326
|
return res;
|
316
327
|
}
|
317
328
|
|
318
|
-
void spanfe_destroy(
|
329
|
+
void spanfe_destroy(SpanEnum *self)
|
319
330
|
{
|
320
|
-
SpanEnum *self = (SpanEnum *)p;
|
321
331
|
SpanEnum *se = (SpanEnum *)self->data;
|
322
332
|
se->destroy(se);
|
323
333
|
free(self);
|
@@ -449,7 +459,7 @@ char *spanoe_to_s(SpanEnum *self)
|
|
449
459
|
char *field = ((SpanQuery *)self->query->data)->field;
|
450
460
|
char *query_str = self->query->to_s(self->query, field);
|
451
461
|
char doc_str[62];
|
452
|
-
|
462
|
+
size_t len = strlen(query_str);
|
453
463
|
char *str = ALLOC_N(char, len + 80);
|
454
464
|
|
455
465
|
if (soe->first_time) {
|
@@ -467,9 +477,9 @@ char *spanoe_to_s(SpanEnum *self)
|
|
467
477
|
return str;
|
468
478
|
}
|
469
479
|
|
470
|
-
void spanoe_destroy(
|
480
|
+
void spanoe_destroy(SpanEnum *self)
|
471
481
|
{
|
472
|
-
SpanEnum *
|
482
|
+
SpanEnum *se;
|
473
483
|
SpanOrEnum *soe = (SpanOrEnum *)self->data;
|
474
484
|
int i;
|
475
485
|
pq_destroy(soe->queue);
|
@@ -730,7 +740,7 @@ char *spanne_to_s(SpanEnum *self)
|
|
730
740
|
char *field = ((SpanQuery *)self->query->data)->field;
|
731
741
|
char *query_str = self->query->to_s(self->query, field);
|
732
742
|
char doc_str[62];
|
733
|
-
|
743
|
+
size_t len = strlen(query_str);
|
734
744
|
char *str = ALLOC_N(char, len + 80);
|
735
745
|
|
736
746
|
if (sne->first_time) {
|
@@ -744,9 +754,9 @@ char *spanne_to_s(SpanEnum *self)
|
|
744
754
|
return str;
|
745
755
|
}
|
746
756
|
|
747
|
-
void spanne_destroy(
|
757
|
+
void spanne_destroy(SpanEnum *self)
|
748
758
|
{
|
749
|
-
SpanEnum *
|
759
|
+
SpanEnum *se;
|
750
760
|
SpanNearEnum *sne = (SpanNearEnum *)self->data;
|
751
761
|
int i;
|
752
762
|
for (i = 0; i < sne->s_cnt; i++) {
|
@@ -886,9 +896,8 @@ char *spanxe_to_s(SpanEnum *self)
|
|
886
896
|
return res;
|
887
897
|
}
|
888
898
|
|
889
|
-
void spanxe_destroy(
|
899
|
+
void spanxe_destroy(SpanEnum *self)
|
890
900
|
{
|
891
|
-
SpanEnum *self = (SpanEnum *)p;
|
892
901
|
SpanNotEnum *sxe = (SpanNotEnum *)self->data;
|
893
902
|
sxe->inc->destroy(sxe->inc);
|
894
903
|
sxe->exc->destroy(sxe->exc);
|
@@ -924,12 +933,11 @@ SpanEnum *spanxe_create(Query *query, IndexReader *ir)
|
|
924
933
|
*
|
925
934
|
*****************************************************************************/
|
926
935
|
|
927
|
-
void spanq_destroy(
|
936
|
+
void spanq_destroy(Query *self)
|
928
937
|
{
|
929
|
-
Query *self = (Query *)p;
|
930
938
|
SpanQuery *sq = (SpanQuery *)self->data;
|
931
939
|
free(sq);
|
932
|
-
|
940
|
+
q_destroy_i(self);
|
933
941
|
}
|
934
942
|
|
935
943
|
/*****************************************************************************
|
@@ -952,48 +960,60 @@ char *spantq_to_s(Query *self, char *field)
|
|
952
960
|
return res;
|
953
961
|
}
|
954
962
|
|
955
|
-
void spantq_destroy(
|
963
|
+
static void spantq_destroy(Query *self)
|
956
964
|
{
|
957
|
-
Query *self = (Query *)p;
|
958
965
|
SpanQuery *sq = (SpanQuery *)self->data;
|
959
966
|
if (self->destroy_all) {
|
960
967
|
Term *term = (Term *)sq->data;
|
961
968
|
term_destroy(term);
|
962
969
|
}
|
963
970
|
free(sq);
|
964
|
-
|
971
|
+
q_destroy_i(self);
|
965
972
|
}
|
966
973
|
|
967
|
-
void spantq_extract_terms(Query *self,
|
974
|
+
static void spantq_extract_terms(Query *self, HashSet *terms)
|
968
975
|
{
|
969
976
|
Term *term = (Term *)((SpanQuery *)self->data)->data;
|
970
|
-
|
977
|
+
hs_add(terms, term_clone(term));
|
971
978
|
}
|
972
979
|
|
973
|
-
|
980
|
+
static HashSet *spantq_get_terms(Query *self)
|
974
981
|
{
|
975
982
|
Term *term = (Term *)((SpanQuery *)self->data)->data;
|
976
|
-
|
977
|
-
|
983
|
+
HashSet *terms = term_set_create();
|
984
|
+
hs_add(terms, term_clone(term));
|
978
985
|
return terms;
|
979
986
|
}
|
980
987
|
|
988
|
+
static uint spantq_hash(Query *self)
|
989
|
+
{
|
990
|
+
return term_hash((Term *)((SpanQuery *)self->data)->data);
|
991
|
+
}
|
992
|
+
|
993
|
+
static int spantq_eq(Query *self, Query *o)
|
994
|
+
{
|
995
|
+
return term_eq((Term *)((SpanQuery *)self->data)->data,
|
996
|
+
(Term *)((SpanQuery *)o->data)->data);
|
997
|
+
}
|
998
|
+
|
981
999
|
Query *spantq_create(Term *term)
|
982
1000
|
{
|
983
1001
|
Query *self = q_create();
|
1002
|
+
|
984
1003
|
SpanQuery *sq = ALLOC(SpanQuery);
|
985
1004
|
sq->data = term;
|
986
|
-
|
987
1005
|
sq->get_spans = &spante_create;
|
988
1006
|
sq->get_terms = &spantq_get_terms;
|
989
1007
|
sq->field = term->field;
|
1008
|
+
self->data = sq;
|
990
1009
|
|
991
1010
|
self->type = SPAN_TERM_QUERY;
|
992
|
-
self->data = sq;
|
993
|
-
self->create_weight = &spanw_create;
|
994
1011
|
self->extract_terms = &spantq_extract_terms;
|
995
1012
|
self->to_s = &spantq_to_s;
|
996
|
-
self->
|
1013
|
+
self->hash = &spantq_hash;
|
1014
|
+
self->eq = &spantq_eq;
|
1015
|
+
self->destroy_i = &spantq_destroy;
|
1016
|
+
self->create_weight_i = &spanw_create;
|
997
1017
|
return self;
|
998
1018
|
}
|
999
1019
|
|
@@ -1013,13 +1033,13 @@ char *spanfq_to_s(Query *self, char *field)
|
|
1013
1033
|
return res;
|
1014
1034
|
}
|
1015
1035
|
|
1016
|
-
void spanfq_extract_terms(Query *self,
|
1036
|
+
void spanfq_extract_terms(Query *self, HashSet *terms)
|
1017
1037
|
{
|
1018
1038
|
SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
|
1019
1039
|
sfq->match->extract_terms(sfq->match, terms);
|
1020
1040
|
}
|
1021
1041
|
|
1022
|
-
|
1042
|
+
HashSet *spanfq_get_terms(Query *self)
|
1023
1043
|
{
|
1024
1044
|
SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
|
1025
1045
|
SpanQuery *match_sq = (SpanQuery *)sfq->match->data;
|
@@ -1029,36 +1049,46 @@ Array *spanfq_get_terms(Query *self)
|
|
1029
1049
|
Query *spanfq_rewrite(Query *self, IndexReader *ir)
|
1030
1050
|
{
|
1031
1051
|
SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
|
1032
|
-
Query *
|
1033
|
-
Query *rewritten = sfq->match->rewrite(sfq->match, ir);
|
1034
|
-
if (rewritten != sfq->match) {
|
1035
|
-
clone = spanfq_create(rewritten, sfq->end);
|
1036
|
-
sfq->match->rewritten = NULL; /* it will get destroyed with the clone */
|
1037
|
-
}
|
1052
|
+
Query *q, *rq;
|
1038
1053
|
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1054
|
+
q = sfq->match;
|
1055
|
+
rq = q->rewrite(q, ir);
|
1056
|
+
if (rq == q || self->destroy_all) q_deref(q);
|
1057
|
+
sfq->match = rq;
|
1058
|
+
|
1059
|
+
self->ref_cnt++;
|
1060
|
+
return self; /* no clauses rewrote */
|
1045
1061
|
}
|
1046
1062
|
|
1047
|
-
void spanfq_destroy(
|
1063
|
+
void spanfq_destroy(Query *self)
|
1048
1064
|
{
|
1049
|
-
Query *self = (Query *)p;
|
1050
1065
|
SpanQuery *sq = (SpanQuery *)self->data;
|
1051
1066
|
SpanFirstQuery *sfq = (SpanFirstQuery *)sq->data;
|
1052
|
-
if (self->destroy_all)
|
1067
|
+
if (self->destroy_all) q_deref(sfq->match);
|
1053
1068
|
free(sfq);
|
1054
1069
|
free(sq);
|
1055
|
-
|
1070
|
+
q_destroy_i(self);
|
1071
|
+
}
|
1072
|
+
|
1073
|
+
static uint spanfq_hash(Query *self)
|
1074
|
+
{
|
1075
|
+
SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
|
1076
|
+
return sfq->match->hash(sfq->match) ^ sfq->end;
|
1077
|
+
}
|
1078
|
+
|
1079
|
+
static int spanfq_eq(Query *self, Query *o)
|
1080
|
+
{
|
1081
|
+
SpanFirstQuery *sfq1 = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
|
1082
|
+
SpanFirstQuery *sfq2 = (SpanFirstQuery *)((SpanQuery *)o->data)->data;
|
1083
|
+
return sfq1->match->eq(sfq1->match, sfq2->match) && (sfq1->end == sfq2->end);
|
1056
1084
|
}
|
1057
1085
|
|
1058
1086
|
Query *spanfq_create(Query *match, int end)
|
1059
1087
|
{
|
1060
1088
|
Query *self = q_create();
|
1089
|
+
|
1061
1090
|
SpanQuery *sq = ALLOC(SpanQuery);
|
1091
|
+
|
1062
1092
|
SpanFirstQuery *sfq = ALLOC(SpanFirstQuery);
|
1063
1093
|
sfq->match = match;
|
1064
1094
|
sfq->end = end;
|
@@ -1067,14 +1097,16 @@ Query *spanfq_create(Query *match, int end)
|
|
1067
1097
|
sq->get_spans = &spanfe_create;
|
1068
1098
|
sq->get_terms = &spanfq_get_terms;
|
1069
1099
|
sq->field = ((SpanQuery *)match->data)->field;
|
1100
|
+
self->data = sq;
|
1070
1101
|
|
1071
1102
|
self->type = SPAN_FIRST_QUERY;
|
1072
|
-
self->data = sq;
|
1073
|
-
self->create_weight = &spanw_create;
|
1074
|
-
self->extract_terms = &spanfq_extract_terms;
|
1075
1103
|
self->rewrite = &spanfq_rewrite;
|
1104
|
+
self->extract_terms = &spanfq_extract_terms;
|
1076
1105
|
self->to_s = &spanfq_to_s;
|
1077
|
-
self->
|
1106
|
+
self->hash = &spanfq_hash;
|
1107
|
+
self->eq = &spanfq_eq;
|
1108
|
+
self->destroy_i = &spanfq_destroy;
|
1109
|
+
self->create_weight_i = &spanw_create;
|
1078
1110
|
return self;
|
1079
1111
|
}
|
1080
1112
|
|
@@ -1103,7 +1135,7 @@ char *spanoq_to_s(Query *self, char *field)
|
|
1103
1135
|
return res;
|
1104
1136
|
}
|
1105
1137
|
|
1106
|
-
void spanoq_extract_terms(Query *self,
|
1138
|
+
void spanoq_extract_terms(Query *self, HashSet *terms)
|
1107
1139
|
{
|
1108
1140
|
SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
|
1109
1141
|
Query *clause;
|
@@ -1114,10 +1146,10 @@ void spanoq_extract_terms(Query *self, Array *terms)
|
|
1114
1146
|
}
|
1115
1147
|
}
|
1116
1148
|
|
1117
|
-
|
1149
|
+
HashSet *spanoq_get_terms(Query *self)
|
1118
1150
|
{
|
1119
1151
|
SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
|
1120
|
-
|
1152
|
+
HashSet *terms = term_set_create();
|
1121
1153
|
Query *clause;
|
1122
1154
|
int i;
|
1123
1155
|
for (i = 0; i < soq->c_cnt; i++) {
|
@@ -1143,34 +1175,22 @@ SpanEnum *spanoq_get_spans(Query *self, IndexReader *ir)
|
|
1143
1175
|
Query *spanoq_rewrite(Query *self, IndexReader *ir)
|
1144
1176
|
{
|
1145
1177
|
SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
|
1146
|
-
Query *clone = NULL;
|
1147
|
-
|
1148
1178
|
Query *clause, *rewritten;
|
1149
|
-
Query **new_clauses = ALLOC_N(Query *, soq->c_cnt);
|
1150
1179
|
int i;
|
1180
|
+
/* replace clauses with their rewritten queries */
|
1151
1181
|
for (i = 0; i < soq->c_cnt; i++) {
|
1152
1182
|
clause = soq->clauses[i];
|
1153
1183
|
rewritten = clause->rewrite(clause, ir);
|
1154
|
-
if ((clause
|
1155
|
-
|
1156
|
-
/* The sub-clauses will be handled by the original query */
|
1157
|
-
clone->destroy_all = false;
|
1158
|
-
}
|
1159
|
-
new_clauses[i] = rewritten;
|
1184
|
+
if ((rewritten == clause) || self->destroy_all) q_deref(clause);
|
1185
|
+
soq->clauses[i] = rewritten;
|
1160
1186
|
}
|
1161
1187
|
|
1162
|
-
|
1163
|
-
|
1164
|
-
return self->rewritten = clone; /* some clauses rewrote */
|
1165
|
-
} else {
|
1166
|
-
free(new_clauses); /* no clauses rewrote */
|
1167
|
-
return self;
|
1168
|
-
}
|
1188
|
+
self->ref_cnt++;
|
1189
|
+
return self;
|
1169
1190
|
}
|
1170
1191
|
|
1171
|
-
void spanoq_destroy(
|
1192
|
+
void spanoq_destroy(Query *self)
|
1172
1193
|
{
|
1173
|
-
Query *self = (Query *)p;
|
1174
1194
|
SpanQuery *sq = (SpanQuery *)self->data;
|
1175
1195
|
SpanOrQuery *soq = (SpanOrQuery *)sq->data;
|
1176
1196
|
|
@@ -1179,7 +1199,7 @@ void spanoq_destroy(void *p)
|
|
1179
1199
|
int i;
|
1180
1200
|
for (i = 0; i < soq->c_cnt; i++) {
|
1181
1201
|
clause = soq->clauses[i];
|
1182
|
-
|
1202
|
+
q_deref(clause);
|
1183
1203
|
}
|
1184
1204
|
free(soq->clauses);
|
1185
1205
|
}
|
@@ -1187,13 +1207,44 @@ void spanoq_destroy(void *p)
|
|
1187
1207
|
|
1188
1208
|
free(soq);
|
1189
1209
|
free(sq);
|
1190
|
-
|
1210
|
+
q_destroy_i(self);
|
1211
|
+
}
|
1212
|
+
|
1213
|
+
static uint spanoq_hash(Query *self)
|
1214
|
+
{
|
1215
|
+
int i;
|
1216
|
+
uint hash = 0;
|
1217
|
+
Query *q;
|
1218
|
+
SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
|
1219
|
+
|
1220
|
+
for (i = 0; i < soq->c_cnt; i++) {
|
1221
|
+
q = soq->clauses[i];
|
1222
|
+
hash ^= q->hash(q);
|
1223
|
+
}
|
1224
|
+
return hash;
|
1225
|
+
}
|
1226
|
+
|
1227
|
+
static int spanoq_eq(Query *self, Query *o)
|
1228
|
+
{
|
1229
|
+
int i;
|
1230
|
+
Query *q1, *q2;
|
1231
|
+
SpanOrQuery *soq1 = (SpanOrQuery *)((SpanQuery *)self->data)->data;
|
1232
|
+
SpanOrQuery *soq2 = (SpanOrQuery *)((SpanQuery *)o->data)->data;
|
1233
|
+
if (soq1->c_cnt != soq2->c_cnt) return false;
|
1234
|
+
for (i = 0; i < soq1->c_cnt; i++) {
|
1235
|
+
q1 = soq1->clauses[i];
|
1236
|
+
q2 = soq2->clauses[i];
|
1237
|
+
if (!q1->eq(q1, q2)) return false;
|
1238
|
+
}
|
1239
|
+
return true;
|
1191
1240
|
}
|
1192
1241
|
|
1193
1242
|
Query *spanoq_create(Query **clauses, int c_cnt)
|
1194
1243
|
{
|
1195
1244
|
Query *self = q_create();
|
1245
|
+
|
1196
1246
|
SpanQuery *sq = ALLOC(SpanQuery);
|
1247
|
+
|
1197
1248
|
SpanOrQuery *soq = ALLOC(SpanOrQuery);
|
1198
1249
|
soq->clauses = clauses;
|
1199
1250
|
soq->c_cnt = c_cnt;
|
@@ -1202,14 +1253,16 @@ Query *spanoq_create(Query **clauses, int c_cnt)
|
|
1202
1253
|
sq->get_spans = &spanoq_get_spans;
|
1203
1254
|
sq->get_terms = &spanoq_get_terms;
|
1204
1255
|
sq->field = ((SpanQuery *)clauses[0]->data)->field;
|
1256
|
+
self->data = sq;
|
1205
1257
|
|
1206
1258
|
self->type = SPAN_OR_QUERY;
|
1207
|
-
self->data = sq;
|
1208
|
-
self->create_weight = &spanw_create;
|
1209
|
-
self->extract_terms = &spanoq_extract_terms;
|
1210
1259
|
self->rewrite = &spanoq_rewrite;
|
1260
|
+
self->extract_terms = &spanoq_extract_terms;
|
1211
1261
|
self->to_s = &spanoq_to_s;
|
1212
|
-
self->
|
1262
|
+
self->hash = &spanoq_hash;
|
1263
|
+
self->eq = &spanoq_eq;
|
1264
|
+
self->destroy_i = &spanoq_destroy;
|
1265
|
+
self->create_weight_i = &spanw_create;
|
1213
1266
|
return self;
|
1214
1267
|
}
|
1215
1268
|
|
@@ -1240,7 +1293,7 @@ char *spannq_to_s(Query *self, char *field)
|
|
1240
1293
|
return res;
|
1241
1294
|
}
|
1242
1295
|
|
1243
|
-
void spannq_extract_terms(Query *self,
|
1296
|
+
void spannq_extract_terms(Query *self, HashSet *terms)
|
1244
1297
|
{
|
1245
1298
|
SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
|
1246
1299
|
Query *clause;
|
@@ -1251,10 +1304,10 @@ void spannq_extract_terms(Query *self, Array *terms)
|
|
1251
1304
|
}
|
1252
1305
|
}
|
1253
1306
|
|
1254
|
-
|
1307
|
+
HashSet *spannq_get_terms(Query *self)
|
1255
1308
|
{
|
1256
1309
|
SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
|
1257
|
-
|
1310
|
+
HashSet *terms = term_set_create();
|
1258
1311
|
Query *clause;
|
1259
1312
|
int i;
|
1260
1313
|
for (i = 0; i < snq->c_cnt; i++) {
|
@@ -1281,34 +1334,21 @@ SpanEnum *spannq_get_spans(Query *self, IndexReader *ir)
|
|
1281
1334
|
Query *spannq_rewrite(Query *self, IndexReader *ir)
|
1282
1335
|
{
|
1283
1336
|
SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
|
1284
|
-
Query *clone = NULL;
|
1285
|
-
|
1286
1337
|
Query *clause, *rewritten;
|
1287
|
-
Query **new_clauses = ALLOC_N(Query *, snq->c_cnt);
|
1288
1338
|
int i;
|
1289
1339
|
for (i = 0; i < snq->c_cnt; i++) {
|
1290
1340
|
clause = snq->clauses[i];
|
1291
1341
|
rewritten = clause->rewrite(clause, ir);
|
1292
|
-
if ((clause
|
1293
|
-
|
1294
|
-
/* The sub-clauses will be handled by the original query */
|
1295
|
-
clone->destroy_all = false;
|
1296
|
-
}
|
1297
|
-
new_clauses[i] = rewritten;
|
1342
|
+
if ((rewritten == clause) || self->destroy_all) q_deref(clause);
|
1343
|
+
snq->clauses[i] = rewritten;
|
1298
1344
|
}
|
1299
1345
|
|
1300
|
-
|
1301
|
-
|
1302
|
-
return self->rewritten = clone; /* some clauses rewrote */
|
1303
|
-
} else {
|
1304
|
-
free(new_clauses); /* no clauses rewrote */
|
1305
|
-
return self;
|
1306
|
-
}
|
1346
|
+
self->ref_cnt++;
|
1347
|
+
return self;
|
1307
1348
|
}
|
1308
1349
|
|
1309
|
-
void spannq_destroy(
|
1350
|
+
void spannq_destroy(Query *self)
|
1310
1351
|
{
|
1311
|
-
Query *self = (Query *)p;
|
1312
1352
|
SpanQuery *sq = (SpanQuery *)self->data;
|
1313
1353
|
SpanNearQuery *snq = (SpanNearQuery *)sq->data;
|
1314
1354
|
|
@@ -1317,21 +1357,57 @@ void spannq_destroy(void *p)
|
|
1317
1357
|
int i;
|
1318
1358
|
for (i = 0; i < snq->c_cnt; i++) {
|
1319
1359
|
clause = snq->clauses[i];
|
1320
|
-
|
1360
|
+
q_deref(clause);
|
1321
1361
|
}
|
1322
1362
|
free(snq->clauses);
|
1323
1363
|
}
|
1324
1364
|
|
1325
|
-
|
1326
1365
|
free(snq);
|
1327
1366
|
free(sq);
|
1328
|
-
|
1367
|
+
q_destroy_i(self);
|
1368
|
+
}
|
1369
|
+
|
1370
|
+
static uint spannq_hash(Query *self)
|
1371
|
+
{
|
1372
|
+
int i;
|
1373
|
+
uint hash = 0;
|
1374
|
+
Query *q;
|
1375
|
+
SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
|
1376
|
+
|
1377
|
+
for (i = 0; i < snq->c_cnt; i++) {
|
1378
|
+
q = snq->clauses[i];
|
1379
|
+
hash ^= q->hash(q);
|
1380
|
+
}
|
1381
|
+
return ((hash ^ snq->slop) << 1) | snq->in_order;
|
1382
|
+
}
|
1383
|
+
|
1384
|
+
static int spannq_eq(Query *self, Query *o)
|
1385
|
+
{
|
1386
|
+
int i;
|
1387
|
+
Query *q1, *q2;
|
1388
|
+
SpanNearQuery *snq1 = (SpanNearQuery *)((SpanQuery *)self->data)->data;
|
1389
|
+
SpanNearQuery *snq2 = (SpanNearQuery *)((SpanQuery *)o->data)->data;
|
1390
|
+
if (snq1->c_cnt != snq2->c_cnt ||
|
1391
|
+
snq1->slop != snq2->slop ||
|
1392
|
+
snq1->in_order != snq2->in_order) {
|
1393
|
+
return false;
|
1394
|
+
}
|
1395
|
+
|
1396
|
+
for (i = 0; i < snq1->c_cnt; i++) {
|
1397
|
+
q1 = snq1->clauses[i];
|
1398
|
+
q2 = snq2->clauses[i];
|
1399
|
+
if (!q1->eq(q1, q2)) return false;
|
1400
|
+
}
|
1401
|
+
|
1402
|
+
return true;
|
1329
1403
|
}
|
1330
1404
|
|
1331
1405
|
Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order)
|
1332
1406
|
{
|
1333
1407
|
Query *self = q_create();
|
1408
|
+
|
1334
1409
|
SpanQuery *sq = ALLOC(SpanQuery);
|
1410
|
+
|
1335
1411
|
SpanNearQuery *snq = ALLOC(SpanNearQuery);
|
1336
1412
|
snq->clauses = clauses;
|
1337
1413
|
snq->c_cnt = c_cnt;
|
@@ -1341,16 +1417,17 @@ Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order)
|
|
1341
1417
|
|
1342
1418
|
sq->get_spans = &spannq_get_spans;
|
1343
1419
|
sq->get_terms = &spannq_get_terms;
|
1344
|
-
|
1345
1420
|
sq->field = ((SpanQuery *)clauses[0]->data)->field;
|
1421
|
+
self->data = sq;
|
1346
1422
|
|
1347
1423
|
self->type = SPAN_NEAR_QUERY;
|
1348
|
-
self->data = sq;
|
1349
|
-
self->create_weight = &spanw_create;
|
1350
|
-
self->extract_terms = &spannq_extract_terms;
|
1351
1424
|
self->rewrite = &spannq_rewrite;
|
1425
|
+
self->extract_terms = &spannq_extract_terms;
|
1352
1426
|
self->to_s = &spannq_to_s;
|
1353
|
-
self->
|
1427
|
+
self->hash = &spannq_hash;
|
1428
|
+
self->eq = &spannq_eq;
|
1429
|
+
self->destroy_i = &spannq_destroy;
|
1430
|
+
self->create_weight_i = &spanw_create;
|
1354
1431
|
return self;
|
1355
1432
|
}
|
1356
1433
|
|
@@ -1372,16 +1449,16 @@ char *spanxq_to_s(Query *self, char *field)
|
|
1372
1449
|
return res;
|
1373
1450
|
}
|
1374
1451
|
|
1375
|
-
void spanxq_extract_terms(Query *self,
|
1452
|
+
void spanxq_extract_terms(Query *self, HashSet *terms)
|
1376
1453
|
{
|
1377
1454
|
SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
|
1378
1455
|
sxq->inc->extract_terms(sxq->inc, terms);
|
1379
1456
|
}
|
1380
1457
|
|
1381
|
-
|
1458
|
+
HashSet *spanxq_get_terms(Query *self)
|
1382
1459
|
{
|
1383
1460
|
SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
|
1384
|
-
|
1461
|
+
HashSet *terms = term_set_create();
|
1385
1462
|
sxq->inc->extract_terms(sxq->inc, terms);
|
1386
1463
|
return terms;
|
1387
1464
|
}
|
@@ -1389,48 +1466,60 @@ Array *spanxq_get_terms(Query *self)
|
|
1389
1466
|
Query *spanxq_rewrite(Query *self, IndexReader *ir)
|
1390
1467
|
{
|
1391
1468
|
SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
|
1392
|
-
Query *
|
1393
|
-
Query *inc, *exc;
|
1394
|
-
Query *inc_rewritten, *exc_rewritten;
|
1395
|
-
|
1396
|
-
inc = sxq->inc;
|
1397
|
-
inc_rewritten = inc->rewrite(inc, ir);
|
1398
|
-
exc = sxq->exc;
|
1399
|
-
exc_rewritten = exc->rewrite(exc, ir);
|
1400
|
-
if ((inc_rewritten != inc) || (exc_rewritten != exc)) {
|
1401
|
-
clone = spanxq_create(inc_rewritten, exc_rewritten);
|
1402
|
-
/* The sub-clauses will be handled by the original query */
|
1403
|
-
clone->destroy_all = false;
|
1404
|
-
}
|
1469
|
+
Query *q, *rq;
|
1405
1470
|
|
1406
|
-
|
1407
|
-
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1471
|
+
/* rewrite inclusive query */
|
1472
|
+
q = sxq->inc;
|
1473
|
+
rq = q->rewrite(q, ir);
|
1474
|
+
if (rq == q || self->destroy_all) q_deref(q);
|
1475
|
+
sxq->inc = rq;
|
1476
|
+
|
1477
|
+
/* rewrite exclusive query */
|
1478
|
+
q = sxq->exc;
|
1479
|
+
rq = q->rewrite(q, ir);
|
1480
|
+
if (rq == q || self->destroy_all) q_deref(q);
|
1481
|
+
sxq->exc = rq;
|
1482
|
+
|
1483
|
+
self->ref_cnt++;
|
1484
|
+
return self;
|
1412
1485
|
}
|
1413
1486
|
|
1414
|
-
void spanxq_destroy(
|
1487
|
+
void spanxq_destroy(Query *self)
|
1415
1488
|
{
|
1416
|
-
Query *self = (Query *)p;
|
1417
1489
|
SpanQuery *sq = (SpanQuery *)self->data;
|
1418
1490
|
SpanNotQuery *sxq = (SpanNotQuery *)sq->data;
|
1419
1491
|
|
1420
1492
|
if (self->destroy_all) {
|
1421
|
-
|
1422
|
-
|
1493
|
+
q_deref(sxq->inc);
|
1494
|
+
q_deref(sxq->exc);
|
1423
1495
|
}
|
1424
1496
|
|
1425
1497
|
free(sxq);
|
1426
1498
|
free(sq);
|
1427
|
-
|
1499
|
+
q_destroy_i(self);
|
1428
1500
|
}
|
1429
1501
|
|
1502
|
+
static uint spanxq_hash(Query *self)
|
1503
|
+
{
|
1504
|
+
SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
|
1505
|
+
return sxq->inc->hash(sxq->inc) ^ sxq->exc->hash(sxq->exc);
|
1506
|
+
}
|
1507
|
+
|
1508
|
+
static int spanxq_eq(Query *self, Query *o)
|
1509
|
+
{
|
1510
|
+
SpanNotQuery *sxq1 = (SpanNotQuery *)((SpanQuery *)self->data)->data;
|
1511
|
+
SpanNotQuery *sxq2 = (SpanNotQuery *)((SpanQuery *)o->data)->data;
|
1512
|
+
return sxq1->inc->eq(sxq1->inc, sxq2->inc) &&
|
1513
|
+
sxq1->exc->eq(sxq1->exc, sxq2->exc);
|
1514
|
+
}
|
1515
|
+
|
1516
|
+
|
1430
1517
|
Query *spanxq_create(Query *inc, Query *exc)
|
1431
1518
|
{
|
1432
1519
|
Query *self = q_create();
|
1520
|
+
|
1433
1521
|
SpanQuery *sq = ALLOC(SpanQuery);
|
1522
|
+
|
1434
1523
|
SpanNotQuery *sxq = ALLOC(SpanNotQuery);
|
1435
1524
|
sxq->inc = inc;
|
1436
1525
|
sxq->exc = exc;
|
@@ -1439,14 +1528,17 @@ Query *spanxq_create(Query *inc, Query *exc)
|
|
1439
1528
|
sq->get_spans = &spanxe_create;
|
1440
1529
|
sq->get_terms = &spanxq_get_terms;
|
1441
1530
|
sq->field = ((SpanQuery *)inc->data)->field;
|
1531
|
+
self->data = sq;
|
1442
1532
|
|
1443
1533
|
self->type = SPAN_NOT_QUERY;
|
1444
|
-
self->data = sq;
|
1445
|
-
self->create_weight = &spanw_create;
|
1446
|
-
self->extract_terms = &spanxq_extract_terms;
|
1447
1534
|
self->rewrite = &spanxq_rewrite;
|
1535
|
+
self->extract_terms = &spanxq_extract_terms;
|
1448
1536
|
self->to_s = &spanxq_to_s;
|
1449
|
-
self->
|
1537
|
+
self->hash = &spanxq_hash;
|
1538
|
+
self->eq = &spanxq_eq;
|
1539
|
+
self->destroy_i = &spanxq_destroy;
|
1540
|
+
self->create_weight_i = &spanw_create;
|
1541
|
+
|
1450
1542
|
return self;
|
1451
1543
|
}
|
1452
1544
|
|
@@ -1512,24 +1604,23 @@ bool spansc_skip_to(Scorer *self, int target)
|
|
1512
1604
|
|
1513
1605
|
Explanation *spansc_explain(Scorer *self, int target)
|
1514
1606
|
{
|
1607
|
+
Explanation *tf_explanation;
|
1515
1608
|
SpanScorer *spansc = (SpanScorer *)self->data;
|
1516
1609
|
float phrase_freq;
|
1517
1610
|
self->skip_to(self, target);
|
1518
|
-
phrase_freq = (self->doc == target) ? spansc->freq : 0.0;
|
1611
|
+
phrase_freq = (self->doc == target) ? spansc->freq : (float)0.0;
|
1519
1612
|
|
1520
|
-
|
1613
|
+
tf_explanation = expl_create(sim_tf(self->similarity, phrase_freq),
|
1521
1614
|
strfmt("tf(phrase_freq(%f)", phrase_freq));
|
1522
1615
|
|
1523
1616
|
return tf_explanation;
|
1524
1617
|
}
|
1525
1618
|
|
1526
|
-
void spansc_destroy(
|
1619
|
+
void spansc_destroy(Scorer *self)
|
1527
1620
|
{
|
1528
|
-
Scorer *self = (Scorer *)p;
|
1529
1621
|
SpanScorer *spansc = (SpanScorer *)self->data;
|
1530
1622
|
if (spansc->spans) spansc->spans->destroy(spansc->spans);
|
1531
|
-
|
1532
|
-
scorer_destroy(p);
|
1623
|
+
scorer_destroy_i(self);
|
1533
1624
|
}
|
1534
1625
|
|
1535
1626
|
Scorer *spansc_create(Weight *weight, IndexReader *ir)
|