ferret 0.11.6 → 0.11.8.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README +10 -22
- data/RELEASE_CHANGES +137 -0
- data/RELEASE_NOTES +60 -0
- data/Rakefile +379 -274
- data/TODO +100 -8
- data/bin/ferret-browser +0 -0
- data/ext/BZLIB_blocksort.c +1094 -0
- data/ext/BZLIB_bzlib.c +1578 -0
- data/ext/BZLIB_compress.c +672 -0
- data/ext/BZLIB_crctable.c +104 -0
- data/ext/BZLIB_decompress.c +626 -0
- data/ext/BZLIB_huffman.c +205 -0
- data/ext/BZLIB_randtable.c +84 -0
- data/ext/{api.c → STEMMER_api.c} +7 -10
- data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
- data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
- data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
- data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
- data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
- data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
- data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
- data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
- data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
- data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
- data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
- data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
- data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
- data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
- data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
- data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
- data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
- data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
- data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
- data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
- data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
- data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
- data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
- data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
- data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
- data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
- data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
- data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
- data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
- data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
- data/ext/analysis.c +276 -121
- data/ext/analysis.h +190 -143
- data/ext/api.h +3 -4
- data/ext/array.c +5 -3
- data/ext/array.h +52 -43
- data/ext/bitvector.c +38 -482
- data/ext/bitvector.h +446 -124
- data/ext/bzlib.h +282 -0
- data/ext/bzlib_private.h +503 -0
- data/ext/compound_io.c +23 -22
- data/ext/config.h +21 -11
- data/ext/document.c +43 -40
- data/ext/document.h +31 -21
- data/ext/except.c +20 -38
- data/ext/except.h +89 -76
- data/ext/extconf.rb +3 -2
- data/ext/ferret.c +49 -35
- data/ext/ferret.h +14 -11
- data/ext/field_index.c +262 -0
- data/ext/field_index.h +52 -0
- data/ext/filter.c +11 -10
- data/ext/fs_store.c +65 -47
- data/ext/global.c +245 -165
- data/ext/global.h +252 -54
- data/ext/hash.c +200 -243
- data/ext/hash.h +205 -163
- data/ext/hashset.c +118 -96
- data/ext/hashset.h +110 -82
- data/ext/header.h +19 -19
- data/ext/helper.c +11 -10
- data/ext/helper.h +14 -6
- data/ext/index.c +745 -366
- data/ext/index.h +503 -529
- data/ext/internal.h +1020 -0
- data/ext/lang.c +10 -0
- data/ext/lang.h +35 -15
- data/ext/mempool.c +5 -4
- data/ext/mempool.h +30 -22
- data/ext/modules.h +35 -7
- data/ext/multimapper.c +43 -2
- data/ext/multimapper.h +32 -23
- data/ext/posh.c +0 -0
- data/ext/posh.h +4 -38
- data/ext/priorityqueue.c +10 -12
- data/ext/priorityqueue.h +33 -21
- data/ext/q_boolean.c +22 -9
- data/ext/q_const_score.c +3 -2
- data/ext/q_filtered_query.c +15 -12
- data/ext/q_fuzzy.c +147 -135
- data/ext/q_match_all.c +3 -2
- data/ext/q_multi_term.c +28 -32
- data/ext/q_parser.c +451 -173
- data/ext/q_phrase.c +158 -79
- data/ext/q_prefix.c +16 -18
- data/ext/q_range.c +363 -31
- data/ext/q_span.c +130 -141
- data/ext/q_term.c +21 -21
- data/ext/q_wildcard.c +19 -23
- data/ext/r_analysis.c +369 -242
- data/ext/r_index.c +421 -434
- data/ext/r_qparser.c +142 -92
- data/ext/r_search.c +790 -407
- data/ext/r_store.c +44 -44
- data/ext/r_utils.c +264 -96
- data/ext/ram_store.c +29 -23
- data/ext/scanner.c +895 -0
- data/ext/scanner.h +36 -0
- data/ext/scanner_mb.c +6701 -0
- data/ext/scanner_utf8.c +4415 -0
- data/ext/search.c +210 -87
- data/ext/search.h +556 -488
- data/ext/similarity.c +17 -16
- data/ext/similarity.h +51 -44
- data/ext/sort.c +157 -354
- data/ext/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/stem_UTF_8_hungarian.h +16 -0
- data/ext/stem_UTF_8_romanian.h +16 -0
- data/ext/stem_UTF_8_turkish.h +16 -0
- data/ext/stopwords.c +287 -278
- data/ext/store.c +57 -51
- data/ext/store.h +308 -286
- data/ext/symbol.c +10 -0
- data/ext/symbol.h +23 -0
- data/ext/term_vectors.c +14 -293
- data/ext/threading.h +22 -22
- data/ext/win32.h +12 -4
- data/lib/ferret.rb +2 -1
- data/lib/ferret/browser.rb +1 -1
- data/lib/ferret/field_symbol.rb +94 -0
- data/lib/ferret/index.rb +221 -34
- data/lib/ferret/number_tools.rb +6 -6
- data/lib/ferret/version.rb +3 -0
- data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
- data/test/test_helper.rb +7 -2
- data/test/test_installed.rb +1 -0
- data/test/threading/thread_safety_index_test.rb +10 -1
- data/test/threading/thread_safety_read_write_test.rb +4 -7
- data/test/threading/thread_safety_test.rb +0 -0
- data/test/unit/analysis/tc_analyzer.rb +29 -27
- data/test/unit/analysis/tc_token_stream.rb +23 -16
- data/test/unit/index/tc_index.rb +116 -11
- data/test/unit/index/tc_index_reader.rb +27 -27
- data/test/unit/index/tc_index_writer.rb +10 -0
- data/test/unit/index/th_doc.rb +38 -21
- data/test/unit/search/tc_filter.rb +31 -10
- data/test/unit/search/tc_index_searcher.rb +6 -0
- data/test/unit/search/tm_searcher.rb +53 -1
- data/test/unit/store/tc_fs_store.rb +40 -2
- data/test/unit/store/tc_ram_store.rb +0 -0
- data/test/unit/store/tm_store.rb +0 -0
- data/test/unit/store/tm_store_lock.rb +7 -6
- data/test/unit/tc_field_symbol.rb +26 -0
- data/test/unit/ts_analysis.rb +0 -0
- data/test/unit/ts_index.rb +0 -0
- data/test/unit/ts_store.rb +0 -0
- data/test/unit/ts_utils.rb +0 -0
- data/test/unit/utils/tc_number_tools.rb +0 -0
- data/test/utils/content_generator.rb +226 -0
- metadata +262 -221
- data/ext/inc/lang.h +0 -48
- data/ext/inc/threading.h +0 -31
- data/ext/stem_ISO_8859_1_english.c +0 -1156
- data/ext/stem_ISO_8859_1_french.c +0 -1276
- data/ext/stem_ISO_8859_1_italian.c +0 -1091
- data/ext/stem_ISO_8859_1_norwegian.c +0 -296
- data/ext/stem_ISO_8859_1_spanish.c +0 -1119
- data/ext/stem_ISO_8859_1_swedish.c +0 -307
- data/ext/stem_UTF_8_danish.c +0 -344
- data/ext/stem_UTF_8_english.c +0 -1176
- data/ext/stem_UTF_8_french.c +0 -1296
- data/ext/stem_UTF_8_italian.c +0 -1113
- data/ext/stem_UTF_8_norwegian.c +0 -302
- data/ext/stem_UTF_8_portuguese.c +0 -1055
- data/ext/stem_UTF_8_russian.c +0 -709
- data/ext/stem_UTF_8_spanish.c +0 -1137
- data/ext/stem_UTF_8_swedish.c +0 -313
- data/lib/ferret_version.rb +0 -3
data/ext/q_prefix.c
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#include <string.h>
|
2
2
|
#include "search.h"
|
3
|
+
#include "symbol.h"
|
4
|
+
#include "internal.h"
|
3
5
|
|
4
6
|
/****************************************************************************
|
5
7
|
*
|
@@ -9,23 +11,20 @@
|
|
9
11
|
|
10
12
|
#define PfxQ(query) ((PrefixQuery *)(query))
|
11
13
|
|
12
|
-
static char *prq_to_s(Query *self,
|
14
|
+
static char *prq_to_s(Query *self, Symbol default_field)
|
13
15
|
{
|
14
16
|
char *buffer, *bptr;
|
15
17
|
const char *prefix = PfxQ(self)->prefix;
|
16
|
-
const char *field = PfxQ(self)->field;
|
17
18
|
size_t plen = strlen(prefix);
|
18
|
-
size_t flen =
|
19
|
+
size_t flen = sym_len(PfxQ(self)->field);
|
19
20
|
|
20
21
|
bptr = buffer = ALLOC_N(char, plen + flen + 35);
|
21
22
|
|
22
|
-
if (
|
23
|
-
sprintf(bptr, "%s:", field);
|
24
|
-
bptr += flen + 1;
|
23
|
+
if (PfxQ(self)->field != default_field) {
|
24
|
+
bptr += sprintf(bptr, "%s:", S(PfxQ(self)->field));
|
25
25
|
}
|
26
26
|
|
27
|
-
sprintf(bptr, "%s*", prefix);
|
28
|
-
bptr += plen + 1;
|
27
|
+
bptr += sprintf(bptr, "%s*", prefix);
|
29
28
|
if (self->boost != 1.0) {
|
30
29
|
*bptr = '^';
|
31
30
|
dbl_to_s(++bptr, self->boost);
|
@@ -36,9 +35,9 @@ static char *prq_to_s(Query *self, const char *current_field)
|
|
36
35
|
|
37
36
|
static Query *prq_rewrite(Query *self, IndexReader *ir)
|
38
37
|
{
|
39
|
-
const
|
40
|
-
|
41
|
-
|
38
|
+
const int field_num = fis_get_field_num(ir->fis, PfxQ(self)->field);
|
39
|
+
Query *volatile q = multi_tq_new_conf(PfxQ(self)->field,
|
40
|
+
MTQMaxTerms(self), 0.0);
|
42
41
|
q->boost = self->boost; /* set the boost */
|
43
42
|
|
44
43
|
if (field_num >= 0) {
|
@@ -48,7 +47,7 @@ static Query *prq_rewrite(Query *self, IndexReader *ir)
|
|
48
47
|
size_t prefix_len = strlen(prefix);
|
49
48
|
|
50
49
|
TRY
|
51
|
-
do {
|
50
|
+
do {
|
52
51
|
if (strncmp(term, prefix, prefix_len) != 0) {
|
53
52
|
break;
|
54
53
|
}
|
@@ -64,27 +63,26 @@ static Query *prq_rewrite(Query *self, IndexReader *ir)
|
|
64
63
|
|
65
64
|
static void prq_destroy(Query *self)
|
66
65
|
{
|
67
|
-
free(PfxQ(self)->field);
|
68
66
|
free(PfxQ(self)->prefix);
|
69
67
|
q_destroy_i(self);
|
70
68
|
}
|
71
69
|
|
72
70
|
static unsigned long prq_hash(Query *self)
|
73
71
|
{
|
74
|
-
return
|
72
|
+
return sym_hash(PfxQ(self)->field) ^ str_hash(PfxQ(self)->prefix);
|
75
73
|
}
|
76
74
|
|
77
75
|
static int prq_eq(Query *self, Query *o)
|
78
76
|
{
|
79
|
-
return (strcmp(PfxQ(self)->prefix, PfxQ(o)->prefix) == 0)
|
80
|
-
&& (
|
77
|
+
return (strcmp(PfxQ(self)->prefix, PfxQ(o)->prefix) == 0)
|
78
|
+
&& (PfxQ(self)->field == PfxQ(o)->field);
|
81
79
|
}
|
82
80
|
|
83
|
-
Query *prefixq_new(
|
81
|
+
Query *prefixq_new(Symbol field, const char *prefix)
|
84
82
|
{
|
85
83
|
Query *self = q_new(PrefixQuery);
|
86
84
|
|
87
|
-
PfxQ(self)->field =
|
85
|
+
PfxQ(self)->field = field;
|
88
86
|
PfxQ(self)->prefix = estrdup(prefix);
|
89
87
|
MTQMaxTerms(self) = PREFIX_QUERY_MAX_TERMS;
|
90
88
|
|
data/ext/q_range.c
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#include <string.h>
|
2
2
|
#include "search.h"
|
3
|
+
#include "symbol.h"
|
4
|
+
#include "internal.h"
|
3
5
|
|
4
6
|
/*****************************************************************************
|
5
7
|
*
|
@@ -9,26 +11,27 @@
|
|
9
11
|
|
10
12
|
typedef struct Range
|
11
13
|
{
|
12
|
-
|
14
|
+
Symbol field;
|
13
15
|
char *lower_term;
|
14
16
|
char *upper_term;
|
15
17
|
bool include_lower : 1;
|
16
18
|
bool include_upper : 1;
|
17
19
|
} Range;
|
18
20
|
|
19
|
-
static char *range_to_s(Range *range,
|
21
|
+
static char *range_to_s(Range *range, Symbol default_field, float boost)
|
20
22
|
{
|
21
23
|
char *buffer, *b;
|
22
24
|
size_t flen, llen, ulen;
|
25
|
+
const char *field = S(range->field);
|
23
26
|
|
24
|
-
flen = strlen(
|
27
|
+
flen = strlen(field);
|
25
28
|
llen = range->lower_term ? strlen(range->lower_term) : 0;
|
26
29
|
ulen = range->upper_term ? strlen(range->upper_term) : 0;
|
27
30
|
buffer = ALLOC_N(char, flen + llen + ulen + 40);
|
28
31
|
b = buffer;
|
29
32
|
|
30
|
-
if (
|
31
|
-
memcpy(buffer,
|
33
|
+
if (default_field != range->field) {
|
34
|
+
memcpy(buffer, field, flen * sizeof(char));
|
32
35
|
b += flen;
|
33
36
|
*b = ':';
|
34
37
|
b++;
|
@@ -68,7 +71,6 @@ static char *range_to_s(Range *range, const char *field, float boost)
|
|
68
71
|
|
69
72
|
static void range_destroy(Range *range)
|
70
73
|
{
|
71
|
-
free(range->field);
|
72
74
|
free(range->lower_term);
|
73
75
|
free(range->upper_term);
|
74
76
|
free(range);
|
@@ -77,26 +79,26 @@ static void range_destroy(Range *range)
|
|
77
79
|
static unsigned long range_hash(Range *filt)
|
78
80
|
{
|
79
81
|
return filt->include_lower | (filt->include_upper << 1)
|
80
|
-
| ((
|
82
|
+
| ((sym_hash(filt->field)
|
81
83
|
^ (filt->lower_term ? str_hash(filt->lower_term) : 0)
|
82
84
|
^ (filt->upper_term ? str_hash(filt->upper_term) : 0)) << 2);
|
83
85
|
}
|
84
86
|
|
85
|
-
static int str_eq(char *s1, char *s2)
|
87
|
+
static int str_eq(const char *s1, const char *s2)
|
86
88
|
{
|
87
89
|
return (s1 && s2 && (strcmp(s1, s2) == 0)) || (s1 == s2);
|
88
90
|
}
|
89
91
|
|
90
92
|
static int range_eq(Range *filt, Range *o)
|
91
93
|
{
|
92
|
-
return (
|
94
|
+
return ((filt->field == o->field)
|
93
95
|
&& str_eq(filt->lower_term, o->lower_term)
|
94
96
|
&& str_eq(filt->upper_term, o->upper_term)
|
95
97
|
&& (filt->include_lower == o->include_lower)
|
96
98
|
&& (filt->include_upper == o->include_upper));
|
97
99
|
}
|
98
100
|
|
99
|
-
Range *range_new(
|
101
|
+
static Range *range_new(Symbol field, const char *lower_term,
|
100
102
|
const char *upper_term, bool include_lower,
|
101
103
|
bool include_upper)
|
102
104
|
{
|
@@ -123,7 +125,61 @@ Range *range_new(const char *field, const char *lower_term,
|
|
123
125
|
|
124
126
|
range = ALLOC(Range);
|
125
127
|
|
126
|
-
range->field =
|
128
|
+
range->field = field;
|
129
|
+
range->lower_term = lower_term ? estrdup(lower_term) : NULL;
|
130
|
+
range->upper_term = upper_term ? estrdup(upper_term) : NULL;
|
131
|
+
range->include_lower = include_lower;
|
132
|
+
range->include_upper = include_upper;
|
133
|
+
return range;
|
134
|
+
}
|
135
|
+
|
136
|
+
static Range *trange_new(Symbol field, const char *lower_term,
|
137
|
+
const char *upper_term, bool include_lower,
|
138
|
+
bool include_upper)
|
139
|
+
{
|
140
|
+
Range *range;
|
141
|
+
int len;
|
142
|
+
double upper_num, lower_num;
|
143
|
+
|
144
|
+
if (!lower_term && !upper_term) {
|
145
|
+
RAISE(ARG_ERROR, "Nil bounds for range. A range must include either "
|
146
|
+
"lower bound or an upper bound");
|
147
|
+
}
|
148
|
+
if (include_lower && !lower_term) {
|
149
|
+
RAISE(ARG_ERROR, "Lower bound must be non-nil to be inclusive. That "
|
150
|
+
"is, if you specify :include_lower => true when you create a "
|
151
|
+
"range you must include a :lower_term");
|
152
|
+
}
|
153
|
+
if (include_upper && !upper_term) {
|
154
|
+
RAISE(ARG_ERROR, "Upper bound must be non-nil to be inclusive. That "
|
155
|
+
"is, if you specify :include_upper => true when you create a "
|
156
|
+
"range you must include a :upper_term");
|
157
|
+
}
|
158
|
+
if (upper_term && lower_term) {
|
159
|
+
if ((!lower_term ||
|
160
|
+
(sscanf(lower_term, "%lg%n", &lower_num, &len) &&
|
161
|
+
(int)strlen(lower_term) == len)) &&
|
162
|
+
(!upper_term ||
|
163
|
+
(sscanf(upper_term, "%lg%n", &upper_num, &len) &&
|
164
|
+
(int)strlen(upper_term) == len)))
|
165
|
+
{
|
166
|
+
if (upper_num < lower_num) {
|
167
|
+
RAISE(ARG_ERROR, "Upper bound must be greater than lower bound."
|
168
|
+
" numbers \"%lg\" < \"%lg\"", upper_num, lower_num);
|
169
|
+
}
|
170
|
+
}
|
171
|
+
else {
|
172
|
+
if (upper_term && lower_term &&
|
173
|
+
(strcmp(upper_term, lower_term) < 0)) {
|
174
|
+
RAISE(ARG_ERROR, "Upper bound must be greater than lower bound."
|
175
|
+
" \"%s\" < \"%s\"", upper_term, lower_term);
|
176
|
+
}
|
177
|
+
}
|
178
|
+
}
|
179
|
+
|
180
|
+
range = ALLOC(Range);
|
181
|
+
|
182
|
+
range->field = field;
|
127
183
|
range->lower_term = lower_term ? estrdup(lower_term) : NULL;
|
128
184
|
range->upper_term = upper_term ? estrdup(upper_term) : NULL;
|
129
185
|
range->include_lower = include_lower;
|
@@ -153,7 +209,7 @@ static void rfilt_destroy_i(Filter *filt)
|
|
153
209
|
|
154
210
|
static char *rfilt_to_s(Filter *filt)
|
155
211
|
{
|
156
|
-
char *rstr = range_to_s(RF(filt)->range,
|
212
|
+
char *rstr = range_to_s(RF(filt)->range, NULL, 1.0);
|
157
213
|
char *rfstr = strfmt("RangeFilter< %s >", rstr);
|
158
214
|
free(rstr);
|
159
215
|
return rfstr;
|
@@ -229,7 +285,7 @@ static int rfilt_eq(Filter *filt, Filter *o)
|
|
229
285
|
return range_eq(RF(filt)->range, RF(o)->range);
|
230
286
|
}
|
231
287
|
|
232
|
-
Filter *rfilt_new(
|
288
|
+
Filter *rfilt_new(Symbol field,
|
233
289
|
const char *lower_term, const char *upper_term,
|
234
290
|
bool include_lower, bool include_upper)
|
235
291
|
{
|
@@ -245,6 +301,143 @@ Filter *rfilt_new(const char *field,
|
|
245
301
|
return filt;
|
246
302
|
}
|
247
303
|
|
304
|
+
/***************************************************************************
|
305
|
+
*
|
306
|
+
* RangeFilter
|
307
|
+
*
|
308
|
+
***************************************************************************/
|
309
|
+
|
310
|
+
static char *trfilt_to_s(Filter *filt)
|
311
|
+
{
|
312
|
+
char *rstr = range_to_s(RF(filt)->range, NULL, 1.0);
|
313
|
+
char *rfstr = strfmt("TypedRangeFilter< %s >", rstr);
|
314
|
+
free(rstr);
|
315
|
+
return rfstr;
|
316
|
+
}
|
317
|
+
|
318
|
+
typedef enum {
|
319
|
+
TRC_NONE = 0x00,
|
320
|
+
TRC_LE = 0x01,
|
321
|
+
TRC_LT = 0x02,
|
322
|
+
TRC_GE = 0x04,
|
323
|
+
TRC_GE_LE = 0x05,
|
324
|
+
TRC_GE_LT = 0x06,
|
325
|
+
TRC_GT = 0x08,
|
326
|
+
TRC_GT_LE = 0x09,
|
327
|
+
TRC_GT_LT = 0x0a
|
328
|
+
} TypedRangeCheck;
|
329
|
+
|
330
|
+
#define SET_DOCS(cond)\
|
331
|
+
do {\
|
332
|
+
if (term[0] > '9') break; /* done */\
|
333
|
+
sscanf(term, "%lg%n", &num, &len);\
|
334
|
+
if (len == te->curr_term_len) { /* We have a number */\
|
335
|
+
if (cond) {\
|
336
|
+
tde->seek_te(tde, te);\
|
337
|
+
while (tde->next(tde)) {\
|
338
|
+
bv_set(bv, tde->doc_num(tde));\
|
339
|
+
}\
|
340
|
+
}\
|
341
|
+
}\
|
342
|
+
} while (te->next(te))
|
343
|
+
|
344
|
+
|
345
|
+
static BitVector *trfilt_get_bv_i(Filter *filt, IndexReader *ir)
|
346
|
+
{
|
347
|
+
Range *range = RF(filt)->range;
|
348
|
+
double lnum = 0.0, unum = 0.0;
|
349
|
+
int len = 0;
|
350
|
+
const char *lt = range->lower_term;
|
351
|
+
const char *ut = range->upper_term;
|
352
|
+
if ((!lt || (sscanf(lt, "%lg%n", &lnum, &len) && (int)strlen(lt) == len)) &&
|
353
|
+
(!ut || (sscanf(ut, "%lg%n", &unum, &len) && (int)strlen(ut) == len)))
|
354
|
+
{
|
355
|
+
BitVector *bv = bv_new_capa(ir->max_doc(ir));
|
356
|
+
FieldInfo *fi = fis_get_field(ir->fis, range->field);
|
357
|
+
/* the field info exists we need to add docs to the bit vector,
|
358
|
+
* otherwise we just return an empty bit vector */
|
359
|
+
if (fi) {
|
360
|
+
const int field_num = fi->number;
|
361
|
+
char *term;
|
362
|
+
double num;
|
363
|
+
TermEnum* te;
|
364
|
+
TermDocEnum *tde;
|
365
|
+
TypedRangeCheck check = TRC_NONE;
|
366
|
+
|
367
|
+
te = ir->terms(ir, field_num);
|
368
|
+
if (te->skip_to(te, "+.") == NULL) {
|
369
|
+
te->close(te);
|
370
|
+
return bv;
|
371
|
+
}
|
372
|
+
|
373
|
+
tde = ir->term_docs(ir);
|
374
|
+
term = te->curr_term;
|
375
|
+
|
376
|
+
if (lt) {
|
377
|
+
check = range->include_lower ? TRC_GE : TRC_GT;
|
378
|
+
}
|
379
|
+
if (ut) {
|
380
|
+
check = (TypedRangeCheck)(check | (range->include_upper
|
381
|
+
? TRC_LE
|
382
|
+
: TRC_LT));
|
383
|
+
}
|
384
|
+
|
385
|
+
switch(check) {
|
386
|
+
case TRC_LE:
|
387
|
+
SET_DOCS(num <= unum);
|
388
|
+
break;
|
389
|
+
case TRC_LT:
|
390
|
+
SET_DOCS(num < unum);
|
391
|
+
break;
|
392
|
+
case TRC_GE:
|
393
|
+
SET_DOCS(num >= lnum);
|
394
|
+
break;
|
395
|
+
case TRC_GE_LE:
|
396
|
+
SET_DOCS(num >= lnum && num <= unum);
|
397
|
+
break;
|
398
|
+
case TRC_GE_LT:
|
399
|
+
SET_DOCS(num >= lnum && num < unum);
|
400
|
+
break;
|
401
|
+
case TRC_GT:
|
402
|
+
SET_DOCS(num > lnum);
|
403
|
+
break;
|
404
|
+
case TRC_GT_LE:
|
405
|
+
SET_DOCS(num > lnum && num <= unum);
|
406
|
+
break;
|
407
|
+
case TRC_GT_LT:
|
408
|
+
SET_DOCS(num > lnum && num < unum);
|
409
|
+
break;
|
410
|
+
case TRC_NONE:
|
411
|
+
/* should never happen. Error should have been raised */
|
412
|
+
assert(false);
|
413
|
+
}
|
414
|
+
tde->close(tde);
|
415
|
+
te->close(te);
|
416
|
+
}
|
417
|
+
|
418
|
+
return bv;
|
419
|
+
}
|
420
|
+
else {
|
421
|
+
return rfilt_get_bv_i(filt, ir);
|
422
|
+
}
|
423
|
+
}
|
424
|
+
|
425
|
+
Filter *trfilt_new(Symbol field,
|
426
|
+
const char *lower_term, const char *upper_term,
|
427
|
+
bool include_lower, bool include_upper)
|
428
|
+
{
|
429
|
+
Filter *filt = filt_new(RangeFilter);
|
430
|
+
RF(filt)->range = trange_new(field, lower_term, upper_term,
|
431
|
+
include_lower, include_upper);
|
432
|
+
|
433
|
+
filt->get_bv_i = &trfilt_get_bv_i;
|
434
|
+
filt->hash = &rfilt_hash;
|
435
|
+
filt->eq = &rfilt_eq;
|
436
|
+
filt->to_s = &trfilt_to_s;
|
437
|
+
filt->destroy_i = &rfilt_destroy_i;
|
438
|
+
return filt;
|
439
|
+
}
|
440
|
+
|
248
441
|
/*****************************************************************************
|
249
442
|
*
|
250
443
|
* RangeQuery
|
@@ -258,7 +451,7 @@ typedef struct RangeQuery
|
|
258
451
|
Range *range;
|
259
452
|
} RangeQuery;
|
260
453
|
|
261
|
-
static char *rq_to_s(Query *self,
|
454
|
+
static char *rq_to_s(Query *self, Symbol field)
|
262
455
|
{
|
263
456
|
return range_to_s(RQ(self)->range, field, self->boost);
|
264
457
|
}
|
@@ -273,23 +466,29 @@ static MatchVector *rq_get_matchv_i(Query *self, MatchVector *mv,
|
|
273
466
|
TermVector *tv)
|
274
467
|
{
|
275
468
|
Range *range = RQ(((ConstantScoreQuery *)self)->original)->range;
|
276
|
-
if (
|
469
|
+
if (tv->field == range->field) {
|
470
|
+
const int term_cnt = tv->term_cnt;
|
277
471
|
int i, j;
|
278
472
|
char *upper_text = range->upper_term;
|
279
473
|
char *lower_text = range->lower_term;
|
280
474
|
int upper_limit = range->include_upper ? 1 : 0;
|
281
|
-
int lower_limit = range->include_lower ? 1 : 0;
|
282
475
|
|
283
|
-
|
476
|
+
i = lower_text ? tv_scan_to_term_index(tv, lower_text) : 0;
|
477
|
+
if (i < term_cnt && !range->include_lower && lower_text
|
478
|
+
&& 0 == strcmp(lower_text, tv->terms[i].text)) {
|
479
|
+
i++;
|
480
|
+
}
|
481
|
+
|
482
|
+
for (; i < term_cnt; i++) {
|
284
483
|
TVTerm *tv_term = &(tv->terms[i]);
|
285
484
|
char *text = tv_term->text;
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
485
|
+
const int tv_term_freq = tv_term->freq;
|
486
|
+
if (upper_text && strcmp(text, upper_text) >= upper_limit) {
|
487
|
+
break;
|
488
|
+
}
|
489
|
+
for (j = 0; j < tv_term_freq; j++) {
|
490
|
+
int pos = tv_term->positions[j];
|
491
|
+
matchv_add(mv, pos, pos);
|
293
492
|
}
|
294
493
|
}
|
295
494
|
}
|
@@ -319,25 +518,26 @@ static int rq_eq(Query *self, Query *o)
|
|
319
518
|
return range_eq(RQ(self)->range, RQ(o)->range);
|
320
519
|
}
|
321
520
|
|
322
|
-
Query *rq_new_less(
|
521
|
+
Query *rq_new_less(Symbol field, const char *upper_term,
|
323
522
|
bool include_upper)
|
324
523
|
{
|
325
524
|
return rq_new(field, NULL, upper_term, false, include_upper);
|
326
525
|
}
|
327
526
|
|
328
|
-
Query *rq_new_more(
|
527
|
+
Query *rq_new_more(Symbol field, const char *lower_term,
|
329
528
|
bool include_lower)
|
330
529
|
{
|
331
530
|
return rq_new(field, lower_term, NULL, include_lower, false);
|
332
531
|
}
|
333
532
|
|
334
|
-
Query *rq_new(
|
533
|
+
Query *rq_new(Symbol field, const char *lower_term,
|
335
534
|
const char *upper_term, bool include_lower, bool include_upper)
|
336
535
|
{
|
337
|
-
Query *self
|
338
|
-
|
339
|
-
|
340
|
-
|
536
|
+
Query *self;
|
537
|
+
Range *range = range_new(field, lower_term, upper_term,
|
538
|
+
include_lower, include_upper);
|
539
|
+
self = q_new(RangeQuery);
|
540
|
+
RQ(self)->range = range;
|
341
541
|
|
342
542
|
self->type = RANGE_QUERY;
|
343
543
|
self->rewrite = &rq_rewrite;
|
@@ -348,3 +548,135 @@ Query *rq_new(const char *field, const char *lower_term,
|
|
348
548
|
self->create_weight_i = &q_create_weight_unsup;
|
349
549
|
return self;
|
350
550
|
}
|
551
|
+
|
552
|
+
/*****************************************************************************
|
553
|
+
*
|
554
|
+
* TypedRangeQuery
|
555
|
+
*
|
556
|
+
*****************************************************************************/
|
557
|
+
|
558
|
+
#define SET_TERMS(cond)\
|
559
|
+
for (i = tv->term_cnt - 1; i >= 0; i--) {\
|
560
|
+
TVTerm *tv_term = &(tv->terms[i]);\
|
561
|
+
char *text = tv_term->text;\
|
562
|
+
double num;\
|
563
|
+
sscanf(text, "%lg%n", &num, &len);\
|
564
|
+
if ((int)strlen(text) == len) { /* We have a number */\
|
565
|
+
if (cond) {\
|
566
|
+
const int tv_term_freq = tv_term->freq;\
|
567
|
+
for (j = 0; j < tv_term_freq; j++) {\
|
568
|
+
int pos = tv_term->positions[j];\
|
569
|
+
matchv_add(mv, pos, pos);\
|
570
|
+
}\
|
571
|
+
}\
|
572
|
+
}\
|
573
|
+
}\
|
574
|
+
|
575
|
+
static MatchVector *trq_get_matchv_i(Query *self, MatchVector *mv,
|
576
|
+
TermVector *tv)
|
577
|
+
{
|
578
|
+
Range *range = RQ(((ConstantScoreQuery *)self)->original)->range;
|
579
|
+
if (tv->field == range->field) {
|
580
|
+
double lnum = 0.0, unum = 0.0;
|
581
|
+
int len = 0;
|
582
|
+
const char *lt = range->lower_term;
|
583
|
+
const char *ut = range->upper_term;
|
584
|
+
if ((!lt
|
585
|
+
|| (sscanf(lt,"%lg%n",&lnum,&len) && (int)strlen(lt) == len))
|
586
|
+
&&
|
587
|
+
(!ut
|
588
|
+
|| (sscanf(ut,"%lg%n",&unum,&len) && (int)strlen(ut) == len)))
|
589
|
+
{
|
590
|
+
TypedRangeCheck check = TRC_NONE;
|
591
|
+
int i = 0, j = 0;
|
592
|
+
|
593
|
+
if (lt) {
|
594
|
+
check = range->include_lower ? TRC_GE : TRC_GT;
|
595
|
+
}
|
596
|
+
if (ut) {
|
597
|
+
check = (TypedRangeCheck)(check | (range->include_upper
|
598
|
+
? TRC_LE
|
599
|
+
: TRC_LT));
|
600
|
+
}
|
601
|
+
|
602
|
+
switch(check) {
|
603
|
+
case TRC_LE:
|
604
|
+
SET_TERMS(num <= unum);
|
605
|
+
break;
|
606
|
+
case TRC_LT:
|
607
|
+
SET_TERMS(num < unum);
|
608
|
+
break;
|
609
|
+
case TRC_GE:
|
610
|
+
SET_TERMS(num >= lnum);
|
611
|
+
break;
|
612
|
+
case TRC_GE_LE:
|
613
|
+
SET_TERMS(num >= lnum && num <= unum);
|
614
|
+
break;
|
615
|
+
case TRC_GE_LT:
|
616
|
+
SET_TERMS(num >= lnum && num < unum);
|
617
|
+
break;
|
618
|
+
case TRC_GT:
|
619
|
+
SET_TERMS(num > lnum);
|
620
|
+
break;
|
621
|
+
case TRC_GT_LE:
|
622
|
+
SET_TERMS(num > lnum && num <= unum);
|
623
|
+
break;
|
624
|
+
case TRC_GT_LT:
|
625
|
+
SET_TERMS(num > lnum && num < unum);
|
626
|
+
break;
|
627
|
+
case TRC_NONE:
|
628
|
+
/* should never happen. Error should have been raised */
|
629
|
+
assert(false);
|
630
|
+
}
|
631
|
+
|
632
|
+
}
|
633
|
+
else {
|
634
|
+
return rq_get_matchv_i(self, mv, tv);
|
635
|
+
}
|
636
|
+
}
|
637
|
+
return mv;
|
638
|
+
}
|
639
|
+
|
640
|
+
static Query *trq_rewrite(Query *self, IndexReader *ir)
|
641
|
+
{
|
642
|
+
Query *csq;
|
643
|
+
Range *r = RQ(self)->range;
|
644
|
+
Filter *filter = trfilt_new(r->field, r->lower_term, r->upper_term,
|
645
|
+
r->include_lower, r->include_upper);
|
646
|
+
(void)ir;
|
647
|
+
csq = csq_new_nr(filter);
|
648
|
+
((ConstantScoreQuery *)csq)->original = self;
|
649
|
+
csq->get_matchv_i = &trq_get_matchv_i;
|
650
|
+
return (Query *)csq;
|
651
|
+
}
|
652
|
+
|
653
|
+
Query *trq_new_less(Symbol field, const char *upper_term,
|
654
|
+
bool include_upper)
|
655
|
+
{
|
656
|
+
return trq_new(field, NULL, upper_term, false, include_upper);
|
657
|
+
}
|
658
|
+
|
659
|
+
Query *trq_new_more(Symbol field, const char *lower_term,
|
660
|
+
bool include_lower)
|
661
|
+
{
|
662
|
+
return trq_new(field, lower_term, NULL, include_lower, false);
|
663
|
+
}
|
664
|
+
|
665
|
+
Query *trq_new(Symbol field, const char *lower_term,
|
666
|
+
const char *upper_term, bool include_lower, bool include_upper)
|
667
|
+
{
|
668
|
+
Query *self;
|
669
|
+
Range *range = trange_new(field, lower_term, upper_term,
|
670
|
+
include_lower, include_upper);
|
671
|
+
self = q_new(RangeQuery);
|
672
|
+
RQ(self)->range = range;
|
673
|
+
|
674
|
+
self->type = TYPED_RANGE_QUERY;
|
675
|
+
self->rewrite = &trq_rewrite;
|
676
|
+
self->to_s = &rq_to_s;
|
677
|
+
self->hash = &rq_hash;
|
678
|
+
self->eq = &rq_eq;
|
679
|
+
self->destroy_i = &rq_destroy;
|
680
|
+
self->create_weight_i = &q_create_weight_unsup;
|
681
|
+
return self;
|
682
|
+
}
|