isomorfeus-ferret 0.12.7 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +101 -19
- data/README.md +54 -1
- data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
- data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
- data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
- data/ext/isomorfeus_ferret_ext/bzip_blocksort.c +1094 -0
- data/ext/isomorfeus_ferret_ext/bzip_huffman.c +205 -0
- data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
- data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
- data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
- data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
- data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
- data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
- data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
- data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
- data/ext/isomorfeus_ferret_ext/frb_index.c +492 -474
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
- data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
- data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
- data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
- data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
- data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
- data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
- data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
- data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +105 -63
- data/ext/isomorfeus_ferret_ext/frt_global.h +7 -3
- data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
- data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_index.c +580 -399
- data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +67 -91
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
- data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
- data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
- data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
- data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
- data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +21 -109
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
- data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
- data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
- data/ext/isomorfeus_ferret_ext/test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
- data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
- data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
- data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
- data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
- data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
- data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -62
- data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
- data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
- data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
- data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +27 -57
- data/ext/isomorfeus_ferret_ext/email.rl +0 -21
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
- data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
- data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
- data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
- data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
- data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
- data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
- data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
- data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -10,21 +10,18 @@
|
|
10
10
|
#define FQSc(scorer) ((FilteredQueryScorer *)(scorer))
|
11
11
|
#define FQQ(query) ((FrtFilteredQuery *)(query))
|
12
12
|
|
13
|
-
typedef struct FilteredQueryScorer
|
14
|
-
|
15
|
-
FrtScorer
|
16
|
-
|
17
|
-
FrtBitVector *bv;
|
13
|
+
typedef struct FilteredQueryScorer {
|
14
|
+
FrtScorer super;
|
15
|
+
FrtScorer *sub_scorer;
|
16
|
+
FrtBitVector *bv;
|
18
17
|
} FilteredQueryScorer;
|
19
18
|
|
20
|
-
static float fqsc_score(FrtScorer *self)
|
21
|
-
{
|
19
|
+
static float fqsc_score(FrtScorer *self) {
|
22
20
|
FrtScorer *sub_sc = FQSc(self)->sub_scorer;
|
23
21
|
return sub_sc->score(sub_sc);
|
24
22
|
}
|
25
23
|
|
26
|
-
static bool fqsc_next(FrtScorer *self)
|
27
|
-
{
|
24
|
+
static bool fqsc_next(FrtScorer *self) {
|
28
25
|
FrtScorer *sub_sc = FQSc(self)->sub_scorer;
|
29
26
|
FrtBitVector *bv = FQSc(self)->bv;
|
30
27
|
while (sub_sc->next(sub_sc)) {
|
@@ -34,8 +31,7 @@ static bool fqsc_next(FrtScorer *self)
|
|
34
31
|
return false;
|
35
32
|
}
|
36
33
|
|
37
|
-
static bool fqsc_skip_to(FrtScorer *self, int doc_num)
|
38
|
-
{
|
34
|
+
static bool fqsc_skip_to(FrtScorer *self, int doc_num) {
|
39
35
|
FrtScorer *sub_sc = FQSc(self)->sub_scorer;
|
40
36
|
FrtBitVector *bv = FQSc(self)->bv;
|
41
37
|
if (sub_sc->skip_to(sub_sc, doc_num)) {
|
@@ -49,26 +45,21 @@ static bool fqsc_skip_to(FrtScorer *self, int doc_num)
|
|
49
45
|
return false;
|
50
46
|
}
|
51
47
|
|
52
|
-
static FrtExplanation *fqsc_explain(FrtScorer *self, int doc_num)
|
53
|
-
{
|
48
|
+
static FrtExplanation *fqsc_explain(FrtScorer *self, int doc_num) {
|
54
49
|
FrtScorer *sub_sc = FQSc(self)->sub_scorer;
|
55
50
|
return sub_sc->explain(sub_sc, doc_num);
|
56
51
|
}
|
57
52
|
|
58
|
-
static void fqsc_destroy(FrtScorer *self)
|
59
|
-
{
|
53
|
+
static void fqsc_destroy(FrtScorer *self) {
|
60
54
|
FilteredQueryScorer *fqsc = FQSc(self);
|
61
55
|
fqsc->sub_scorer->destroy(fqsc->sub_scorer);
|
62
56
|
frt_scorer_destroy_i(self);
|
63
57
|
}
|
64
58
|
|
65
|
-
static FrtScorer *fqsc_new(FrtScorer *scorer, FrtBitVector *bv, FrtSimilarity *sim)
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
FQSc(self)->sub_scorer = scorer;
|
70
|
-
FQSc(self)->bv = bv;
|
71
|
-
|
59
|
+
static FrtScorer *fqsc_new(FrtScorer *scorer, FrtBitVector *bv, FrtSimilarity *sim) {
|
60
|
+
FrtScorer *self = frt_scorer_new(FilteredQueryScorer, sim);
|
61
|
+
FQSc(self)->sub_scorer = scorer;
|
62
|
+
FQSc(self)->bv = bv;
|
72
63
|
self->score = &fqsc_score;
|
73
64
|
self->next = &fqsc_next;
|
74
65
|
self->skip_to = &fqsc_skip_to;
|
@@ -85,43 +76,37 @@ static FrtScorer *fqsc_new(FrtScorer *scorer, FrtBitVector *bv, FrtSimilarity *s
|
|
85
76
|
***************************************************************************/
|
86
77
|
|
87
78
|
#define FQW(weight) ((FilteredQueryWeight *)(weight))
|
88
|
-
|
89
|
-
{
|
79
|
+
|
80
|
+
typedef struct FilteredQueryWeight {
|
90
81
|
FrtWeight super;
|
91
82
|
FrtWeight *sub_weight;
|
92
83
|
} FilteredQueryWeight;
|
93
84
|
|
94
|
-
static char *fqw_to_s(FrtWeight *self)
|
95
|
-
{
|
85
|
+
static char *fqw_to_s(FrtWeight *self) {
|
96
86
|
return frt_strfmt("FilteredQueryWeight(%f)", self->value);
|
97
87
|
}
|
98
88
|
|
99
|
-
static float fqw_sum_of_squared_weights(FrtWeight *self)
|
100
|
-
{
|
89
|
+
static float fqw_sum_of_squared_weights(FrtWeight *self) {
|
101
90
|
FrtWeight *sub_weight = FQW(self)->sub_weight;
|
102
91
|
return sub_weight->sum_of_squared_weights(sub_weight);
|
103
92
|
}
|
104
93
|
|
105
|
-
static void fqw_normalize(FrtWeight *self, float normalization_factor)
|
106
|
-
{
|
94
|
+
static void fqw_normalize(FrtWeight *self, float normalization_factor) {
|
107
95
|
FrtWeight *sub_weight = FQW(self)->sub_weight;
|
108
96
|
sub_weight->normalize(sub_weight, normalization_factor);
|
109
97
|
}
|
110
98
|
|
111
|
-
static float fqw_get_value(FrtWeight *self)
|
112
|
-
{
|
99
|
+
static float fqw_get_value(FrtWeight *self) {
|
113
100
|
FrtWeight *sub_weight = FQW(self)->sub_weight;
|
114
101
|
return sub_weight->get_value(sub_weight);
|
115
102
|
}
|
116
103
|
|
117
|
-
static FrtExplanation *fqw_explain(FrtWeight *self, FrtIndexReader *ir, int doc_num)
|
118
|
-
{
|
104
|
+
static FrtExplanation *fqw_explain(FrtWeight *self, FrtIndexReader *ir, int doc_num) {
|
119
105
|
FrtWeight *sub_weight = FQW(self)->sub_weight;
|
120
106
|
return sub_weight->explain(sub_weight, ir, doc_num);
|
121
107
|
}
|
122
108
|
|
123
|
-
static FrtScorer *fqw_scorer(FrtWeight *self, FrtIndexReader *ir)
|
124
|
-
{
|
109
|
+
static FrtScorer *fqw_scorer(FrtWeight *self, FrtIndexReader *ir) {
|
125
110
|
FrtWeight *sub_weight = FQW(self)->sub_weight;
|
126
111
|
FrtScorer *scorer = sub_weight->scorer(sub_weight, ir);
|
127
112
|
FrtFilter *filter = FQQ(self->query)->filter;
|
@@ -129,30 +114,25 @@ static FrtScorer *fqw_scorer(FrtWeight *self, FrtIndexReader *ir)
|
|
129
114
|
return fqsc_new(scorer, frt_filt_get_bv(filter, ir), self->similarity);
|
130
115
|
}
|
131
116
|
|
132
|
-
static void fqw_destroy(FrtWeight *self)
|
133
|
-
{
|
117
|
+
static void fqw_destroy(FrtWeight *self) {
|
134
118
|
FrtWeight *sub_weight = FQW(self)->sub_weight;
|
135
119
|
sub_weight->destroy(sub_weight);
|
136
120
|
frt_w_destroy(self);
|
137
121
|
}
|
138
122
|
|
139
|
-
static FrtWeight *fqw_new(FrtQuery *query, FrtWeight *sub_weight, FrtSimilarity *sim)
|
140
|
-
{
|
123
|
+
static FrtWeight *fqw_new(FrtQuery *query, FrtWeight *sub_weight, FrtSimilarity *sim) {
|
141
124
|
FrtWeight *self = w_new(FilteredQueryWeight, query);
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
self->
|
146
|
-
self->
|
147
|
-
self->
|
148
|
-
self->
|
149
|
-
self->
|
150
|
-
self->
|
151
|
-
self->
|
152
|
-
|
153
|
-
self->similarity = sim;
|
154
|
-
self->idf = 1.0f;
|
155
|
-
self->value = sub_weight->value;
|
125
|
+
FQW(self)->sub_weight = sub_weight;
|
126
|
+
self->get_value = &fqw_get_value;
|
127
|
+
self->normalize = &fqw_normalize;
|
128
|
+
self->scorer = &fqw_scorer;
|
129
|
+
self->explain = &fqw_explain;
|
130
|
+
self->to_s = &fqw_to_s;
|
131
|
+
self->destroy = &fqw_destroy;
|
132
|
+
self->sum_of_squared_weights = &fqw_sum_of_squared_weights;
|
133
|
+
self->similarity = sim;
|
134
|
+
self->idf = 1.0f;
|
135
|
+
self->value = sub_weight->value;
|
156
136
|
|
157
137
|
return self;
|
158
138
|
}
|
@@ -163,8 +143,7 @@ static FrtWeight *fqw_new(FrtQuery *query, FrtWeight *sub_weight, FrtSimilarity
|
|
163
143
|
*
|
164
144
|
***************************************************************************/
|
165
145
|
|
166
|
-
static char *fq_to_s(FrtQuery *self,
|
167
|
-
{
|
146
|
+
static char *fq_to_s(FrtQuery *self, ID default_field) {
|
168
147
|
FrtFilteredQuery *fq = FQQ(self);
|
169
148
|
char *filter_str = fq->filter->to_s(fq->filter);
|
170
149
|
char *query_str = fq->query->to_s(fq->query, default_field);
|
@@ -179,31 +158,33 @@ static char *fq_to_s(FrtQuery *self, FrtSymbol default_field)
|
|
179
158
|
return buffer;;
|
180
159
|
}
|
181
160
|
|
182
|
-
static void fq_destroy(FrtQuery *self)
|
183
|
-
{
|
161
|
+
static void fq_destroy(FrtQuery *self) {
|
184
162
|
frt_filt_deref(FQQ(self)->filter);
|
185
163
|
frt_q_deref(FQQ(self)->query);
|
186
164
|
frt_q_destroy_i(self);
|
187
165
|
}
|
188
166
|
|
189
|
-
static FrtWeight *fq_new_weight(FrtQuery *self, FrtSearcher *searcher)
|
190
|
-
{
|
167
|
+
static FrtWeight *fq_new_weight(FrtQuery *self, FrtSearcher *searcher) {
|
191
168
|
FrtQuery *sub_query = FQQ(self)->query;
|
192
|
-
return fqw_new(self, frt_q_weight(sub_query, searcher),
|
193
|
-
searcher->similarity);
|
169
|
+
return fqw_new(self, frt_q_weight(sub_query, searcher), searcher->similarity);
|
194
170
|
}
|
195
171
|
|
196
|
-
FrtQuery *
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
FQQ(self)->query = query;
|
201
|
-
FQQ(self)->filter = filter;
|
172
|
+
FrtQuery *frt_fq_alloc(void) {
|
173
|
+
return frt_q_new(FrtFilteredQuery);
|
174
|
+
}
|
202
175
|
|
203
|
-
|
204
|
-
self->
|
205
|
-
self->
|
206
|
-
self->
|
176
|
+
FrtQuery *frt_fq_init(FrtQuery *self, FrtQuery *query, FrtFilter *filter) {
|
177
|
+
FQQ(self)->query = query;
|
178
|
+
FQQ(self)->filter = filter;
|
179
|
+
self->type = FILTERED_QUERY;
|
180
|
+
self->to_s = &fq_to_s;
|
181
|
+
self->destroy_i = &fq_destroy;
|
182
|
+
self->create_weight_i = &fq_new_weight;
|
207
183
|
|
208
184
|
return self;
|
209
185
|
}
|
186
|
+
|
187
|
+
FrtQuery *frt_fq_new(FrtQuery *query, FrtFilter *filter) {
|
188
|
+
FrtQuery *self = frt_fq_alloc();
|
189
|
+
return frt_fq_init(self, query, filter);
|
190
|
+
}
|
@@ -2,6 +2,8 @@
|
|
2
2
|
#include "frt_search.h"
|
3
3
|
#include "frt_helper.h"
|
4
4
|
|
5
|
+
#undef close
|
6
|
+
|
5
7
|
/****************************************************************************
|
6
8
|
*
|
7
9
|
* FuzzyStuff
|
@@ -24,8 +26,7 @@
|
|
24
26
|
* of the shorter string out of the query string and the index term being
|
25
27
|
* compared.
|
26
28
|
*/
|
27
|
-
static int fuzq_calculate_max_distance(FrtFuzzyQuery *fuzq, int m)
|
28
|
-
{
|
29
|
+
static int fuzq_calculate_max_distance(FrtFuzzyQuery *fuzq, int m) {
|
29
30
|
return (int)((1.0 - fuzq->min_sim) * (FRT_MIN(fuzq->text_len, m) + fuzq->pre_len));
|
30
31
|
}
|
31
32
|
|
@@ -35,8 +36,7 @@ static int fuzq_calculate_max_distance(FrtFuzzyQuery *fuzq, int m)
|
|
35
36
|
* lengths up to the FRT_TYPICAL_LONGEST_WORD limit. For words longer than this we
|
36
37
|
* calculate the value live.
|
37
38
|
*/
|
38
|
-
static void fuzq_initialize_max_distances(FrtFuzzyQuery *fuzq)
|
39
|
-
{
|
39
|
+
static void fuzq_initialize_max_distances(FrtFuzzyQuery *fuzq) {
|
40
40
|
int i;
|
41
41
|
for (i = 0; i < FRT_TYPICAL_LONGEST_WORD; i++) {
|
42
42
|
fuzq->max_distances[i] = fuzq_calculate_max_distance(fuzq, i);
|
@@ -47,8 +47,7 @@ static void fuzq_initialize_max_distances(FrtFuzzyQuery *fuzq)
|
|
47
47
|
* Return the cached max-distance value if the word is within the
|
48
48
|
* FRT_TYPICAL_LONGEST_WORD limit.
|
49
49
|
*/
|
50
|
-
static int fuzq_get_max_distance(FrtFuzzyQuery *fuzq, int m)
|
51
|
-
{
|
50
|
+
static int fuzq_get_max_distance(FrtFuzzyQuery *fuzq, int m) {
|
52
51
|
if (m < FRT_TYPICAL_LONGEST_WORD)
|
53
52
|
return fuzq->max_distances[m];
|
54
53
|
return fuzq_calculate_max_distance(fuzq, m);
|
@@ -62,10 +61,7 @@ static int fuzq_get_max_distance(FrtFuzzyQuery *fuzq, int m)
|
|
62
61
|
* @params m the string length of +target+
|
63
62
|
* @params n the string length of the query string minus length of the prefix
|
64
63
|
*/
|
65
|
-
static float fuzq_score_mn(FrtFuzzyQuery *fuzq,
|
66
|
-
const char *target,
|
67
|
-
const int m, const int n)
|
68
|
-
{
|
64
|
+
static float fuzq_score_mn(FrtFuzzyQuery *fuzq, const char *target, const int m, const int n) {
|
69
65
|
int i, j, prune;
|
70
66
|
int *d_curr, *d_prev;
|
71
67
|
const char *text = fuzq->text;
|
@@ -124,8 +120,7 @@ static float fuzq_score_mn(FrtFuzzyQuery *fuzq,
|
|
124
120
|
*
|
125
121
|
* http://mail-archives.apache.org/mod_mbox/lucene-java-dev/200606.mbox/%3c448F0E8C.3050901@alias-i.com%3e
|
126
122
|
*/
|
127
|
-
float frt_fuzq_score(FrtFuzzyQuery *fuzq, const char *target)
|
128
|
-
{
|
123
|
+
float frt_fuzq_score(FrtFuzzyQuery *fuzq, const char *target) {
|
129
124
|
const int m = (int)strlen(target);
|
130
125
|
const int n = fuzq->text_len;
|
131
126
|
|
@@ -148,11 +143,10 @@ float frt_fuzq_score(FrtFuzzyQuery *fuzq, const char *target)
|
|
148
143
|
|
149
144
|
#define FzQ(query) ((FrtFuzzyQuery *)(query))
|
150
145
|
|
151
|
-
static char *fuzq_to_s(FrtQuery *self,
|
152
|
-
{
|
146
|
+
static char *fuzq_to_s(FrtQuery *self, ID curr_field) {
|
153
147
|
char *buffer, *bptr;
|
154
148
|
char *term = FzQ(self)->term;
|
155
|
-
|
149
|
+
ID field = FzQ(self)->field;
|
156
150
|
const char *field_name = rb_id2name(field);
|
157
151
|
bptr = buffer = FRT_ALLOC_N(char, strlen(term) + strlen(field_name) + 70);
|
158
152
|
|
@@ -174,8 +168,7 @@ static char *fuzq_to_s(FrtQuery *self, FrtSymbol curr_field)
|
|
174
168
|
return buffer;
|
175
169
|
}
|
176
170
|
|
177
|
-
static FrtQuery *fuzq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
178
|
-
{
|
171
|
+
static FrtQuery *fuzq_rewrite(FrtQuery *self, FrtIndexReader *ir) {
|
179
172
|
FrtQuery *q;
|
180
173
|
FrtFuzzyQuery *fuzq = FzQ(self);
|
181
174
|
|
@@ -198,8 +191,7 @@ static FrtQuery *fuzq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
|
198
191
|
strncpy(prefix, term, pre_len);
|
199
192
|
prefix[pre_len] = '\0';
|
200
193
|
te = ir->terms_from(ir, field_num, prefix);
|
201
|
-
}
|
202
|
-
else {
|
194
|
+
} else {
|
203
195
|
te = ir->terms(ir, field_num);
|
204
196
|
}
|
205
197
|
|
@@ -228,21 +220,18 @@ static FrtQuery *fuzq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
|
228
220
|
return q;
|
229
221
|
}
|
230
222
|
|
231
|
-
static void fuzq_destroy(FrtQuery *self)
|
232
|
-
{
|
223
|
+
static void fuzq_destroy(FrtQuery *self) {
|
233
224
|
free(FzQ(self)->term);
|
234
225
|
free(FzQ(self)->da);
|
235
226
|
frt_q_destroy_i(self);
|
236
227
|
}
|
237
228
|
|
238
|
-
static unsigned long long fuzq_hash(FrtQuery *self)
|
239
|
-
{
|
229
|
+
static unsigned long long fuzq_hash(FrtQuery *self) {
|
240
230
|
return frt_str_hash(FzQ(self)->term) ^ frt_str_hash(rb_id2name(FzQ(self)->field))
|
241
231
|
^ frt_float2int(FzQ(self)->min_sim) ^ FzQ(self)->pre_len;
|
242
232
|
}
|
243
233
|
|
244
|
-
static int fuzq_eq(FrtQuery *self, FrtQuery *o)
|
245
|
-
{
|
234
|
+
static int fuzq_eq(FrtQuery *self, FrtQuery *o) {
|
246
235
|
FrtFuzzyQuery *fq1 = FzQ(self);
|
247
236
|
FrtFuzzyQuery *fq2 = FzQ(o);
|
248
237
|
|
@@ -252,17 +241,17 @@ static int fuzq_eq(FrtQuery *self, FrtQuery *o)
|
|
252
241
|
&& (fq1->min_sim == fq2->min_sim);
|
253
242
|
}
|
254
243
|
|
255
|
-
FrtQuery *
|
256
|
-
|
257
|
-
|
258
|
-
FrtQuery *self = frt_q_new(FrtFuzzyQuery);
|
244
|
+
FrtQuery *frt_fuzq_alloc(void) {
|
245
|
+
return frt_q_new(FrtFuzzyQuery);
|
246
|
+
}
|
259
247
|
|
248
|
+
FrtQuery *frt_fuzq_init_conf(FrtQuery *self, ID field, const char *term, float min_sim, int pre_len, int max_terms) {
|
260
249
|
FzQ(self)->field = field;
|
261
250
|
FzQ(self)->term = frt_estrdup(term);
|
262
251
|
FzQ(self)->pre_len = pre_len ? pre_len : FRT_DEF_PRE_LEN;
|
263
252
|
FzQ(self)->min_sim = min_sim ? min_sim : FRT_DEF_MIN_SIM;
|
264
253
|
FzQ(self)->da = NULL;
|
265
|
-
FrtMTQMaxTerms(self)
|
254
|
+
FrtMTQMaxTerms(self) = max_terms ? max_terms : FRT_DEF_MAX_TERMS;
|
266
255
|
|
267
256
|
self->type = FUZZY_QUERY;
|
268
257
|
self->to_s = &fuzq_to_s;
|
@@ -275,7 +264,11 @@ FrtQuery *frt_fuzq_new_conf(FrtSymbol field, const char *term,
|
|
275
264
|
return self;
|
276
265
|
}
|
277
266
|
|
278
|
-
FrtQuery *
|
279
|
-
|
267
|
+
FrtQuery *frt_fuzq_new_conf(ID field, const char *term, float min_sim, int pre_len, int max_terms) {
|
268
|
+
FrtQuery *self = frt_fuzq_alloc();
|
269
|
+
return frt_fuzq_init_conf(self, field, term, min_sim, pre_len, max_terms);
|
270
|
+
}
|
271
|
+
|
272
|
+
FrtQuery *frt_fuzq_new(ID field, const char *term) {
|
280
273
|
return frt_fuzq_new_conf(field, term, 0.0f, 0, 0);
|
281
274
|
}
|
@@ -9,10 +9,9 @@
|
|
9
9
|
|
10
10
|
#define MASc(scorer) ((MatchAllScorer *)(scorer))
|
11
11
|
|
12
|
-
typedef struct MatchAllScorer
|
13
|
-
|
14
|
-
|
15
|
-
FrtIndexReader *ir;
|
12
|
+
typedef struct MatchAllScorer {
|
13
|
+
FrtScorer super;
|
14
|
+
FrtIndexReader *ir;
|
16
15
|
int max_doc;
|
17
16
|
float score;
|
18
17
|
} MatchAllScorer;
|
@@ -46,14 +45,11 @@ static FrtExplanation *masc_explain(FrtScorer *self, int doc_num)
|
|
46
45
|
return frt_expl_new(1.0, "MatchAllScorer");
|
47
46
|
}
|
48
47
|
|
49
|
-
static FrtScorer *masc_new(FrtWeight *weight, FrtIndexReader *ir)
|
50
|
-
{
|
48
|
+
static FrtScorer *masc_new(FrtWeight *weight, FrtIndexReader *ir) {
|
51
49
|
FrtScorer *self = frt_scorer_new(MatchAllScorer, weight->similarity);
|
52
|
-
|
53
50
|
MASc(self)->ir = ir;
|
54
51
|
MASc(self)->max_doc = ir->max_doc(ir);
|
55
52
|
MASc(self)->score = weight->value;
|
56
|
-
|
57
53
|
self->doc = -1;
|
58
54
|
self->score = &masc_score;
|
59
55
|
self->next = &masc_next;
|
@@ -89,16 +85,13 @@ static FrtExplanation *maw_explain(FrtWeight *self, FrtIndexReader *ir, int doc_
|
|
89
85
|
return expl;
|
90
86
|
}
|
91
87
|
|
92
|
-
static FrtWeight *maw_new(FrtQuery *query, FrtSearcher *searcher)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
self->
|
97
|
-
self->
|
98
|
-
self->
|
99
|
-
|
100
|
-
self->similarity = query->get_similarity(query, searcher);
|
101
|
-
self->idf = 1.0f;
|
88
|
+
static FrtWeight *maw_new(FrtQuery *query, FrtSearcher *searcher) {
|
89
|
+
FrtWeight *self = w_new(FrtWeight, query);
|
90
|
+
self->scorer = &masc_new;
|
91
|
+
self->explain = &maw_explain;
|
92
|
+
self->to_s = &maw_to_s;
|
93
|
+
self->similarity = query->get_similarity(query, searcher);
|
94
|
+
self->idf = 1.0f;
|
102
95
|
|
103
96
|
return self;
|
104
97
|
}
|
@@ -109,8 +102,7 @@ static FrtWeight *maw_new(FrtQuery *query, FrtSearcher *searcher)
|
|
109
102
|
*
|
110
103
|
***************************************************************************/
|
111
104
|
|
112
|
-
static char *maq_to_s(FrtQuery *self,
|
113
|
-
{
|
105
|
+
static char *maq_to_s(FrtQuery *self, ID default_field) {
|
114
106
|
(void)default_field;
|
115
107
|
if (self->boost == 1.0) {
|
116
108
|
return frt_estrdup("*");
|
@@ -131,10 +123,11 @@ static int maq_eq(FrtQuery *self, FrtQuery *o)
|
|
131
123
|
return true;
|
132
124
|
}
|
133
125
|
|
134
|
-
FrtQuery *
|
135
|
-
|
136
|
-
|
126
|
+
FrtQuery *frt_maq_alloc(void) {
|
127
|
+
return frt_q_new(FrtQuery);
|
128
|
+
}
|
137
129
|
|
130
|
+
FrtQuery *frt_maq_init(FrtQuery *self) {
|
138
131
|
self->type = MATCH_ALL_QUERY;
|
139
132
|
self->to_s = &maq_to_s;
|
140
133
|
self->hash = &maq_hash;
|
@@ -145,3 +138,8 @@ FrtQuery *frt_maq_new()
|
|
145
138
|
return self;
|
146
139
|
}
|
147
140
|
|
141
|
+
FrtQuery *frt_maq_new(void) {
|
142
|
+
FrtQuery *self = frt_maq_alloc();
|
143
|
+
return frt_maq_init(self);
|
144
|
+
}
|
145
|
+
|