isomorfeus-ferret 0.12.7 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +101 -19
- data/README.md +54 -1
- data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
- data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
- data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
- data/ext/isomorfeus_ferret_ext/bzip_blocksort.c +1094 -0
- data/ext/isomorfeus_ferret_ext/bzip_huffman.c +205 -0
- data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
- data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
- data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
- data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
- data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
- data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
- data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
- data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
- data/ext/isomorfeus_ferret_ext/frb_index.c +492 -474
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
- data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
- data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
- data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
- data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
- data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
- data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
- data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
- data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +105 -63
- data/ext/isomorfeus_ferret_ext/frt_global.h +7 -3
- data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
- data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_index.c +580 -399
- data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +67 -91
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
- data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
- data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
- data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
- data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
- data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +21 -109
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
- data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
- data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
- data/ext/isomorfeus_ferret_ext/test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
- data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
- data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
- data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
- data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
- data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
- data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -62
- data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
- data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
- data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
- data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +27 -57
- data/ext/isomorfeus_ferret_ext/email.rl +0 -21
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
- data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
- data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
- data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
- data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
- data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
- data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
- data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
- data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -4,6 +4,8 @@
|
|
4
4
|
#include "frt_search.h"
|
5
5
|
#include "frt_array.h"
|
6
6
|
|
7
|
+
#undef close
|
8
|
+
|
7
9
|
#define PhQ(query) ((FrtPhraseQuery *)(query))
|
8
10
|
|
9
11
|
/**
|
@@ -13,8 +15,7 @@
|
|
13
15
|
* consistant order of positions when testing. Functionally it makes no
|
14
16
|
* difference.
|
15
17
|
*/
|
16
|
-
static int phrase_pos_cmp(const void *p1, const void *p2)
|
17
|
-
{
|
18
|
+
static int phrase_pos_cmp(const void *p1, const void *p2) {
|
18
19
|
int pos1 = ((FrtPhrasePosition *)p1)->pos;
|
19
20
|
int pos2 = ((FrtPhrasePosition *)p2)->pos;
|
20
21
|
if (pos1 > pos2) {
|
@@ -38,17 +39,15 @@ static int phrase_pos_cmp(const void *p1, const void *p2)
|
|
38
39
|
***************************************************************************/
|
39
40
|
|
40
41
|
#define PP(p) ((PhPos *)(p))
|
41
|
-
typedef struct PhPos
|
42
|
-
{
|
42
|
+
typedef struct PhPos {
|
43
43
|
FrtTermDocEnum *tpe;
|
44
|
-
int
|
45
|
-
int
|
46
|
-
int
|
47
|
-
int
|
44
|
+
int offset;
|
45
|
+
int count;
|
46
|
+
int doc;
|
47
|
+
int position;
|
48
48
|
} PhPos;
|
49
49
|
|
50
|
-
static bool pp_next(PhPos *self)
|
51
|
-
{
|
50
|
+
static bool pp_next(PhPos *self) {
|
52
51
|
FrtTermDocEnum *tpe = self->tpe;
|
53
52
|
assert(tpe);
|
54
53
|
|
@@ -590,7 +589,7 @@ static FrtExplanation *phw_explain(FrtWeight *self, FrtIndexReader *ir, int doc_
|
|
590
589
|
return frt_expl_new(0.0, "field \"%s\" does not exist in the index", field_name);
|
591
590
|
}
|
592
591
|
|
593
|
-
query_str = self->query->to_s(self->query, (
|
592
|
+
query_str = self->query->to_s(self->query, (ID)NULL);
|
594
593
|
|
595
594
|
expl = frt_expl_new(0.0, "weight(%s in %d), product of:", query_str, doc_num);
|
596
595
|
|
@@ -931,8 +930,7 @@ static void phq_extract_terms(FrtQuery *self, FrtHashSet *term_set)
|
|
931
930
|
}
|
932
931
|
}
|
933
932
|
|
934
|
-
static char *phq_to_s(FrtQuery *self,
|
935
|
-
{
|
933
|
+
static char *phq_to_s(FrtQuery *self, ID default_field) {
|
936
934
|
FrtPhraseQuery *phq = PhQ(self);
|
937
935
|
const int pos_cnt = phq->pos_cnt;
|
938
936
|
FrtPhrasePosition *positions = phq->positions;
|
@@ -1110,10 +1108,11 @@ static int phq_eq(FrtQuery *self, FrtQuery *o)
|
|
1110
1108
|
return true;
|
1111
1109
|
}
|
1112
1110
|
|
1113
|
-
FrtQuery *
|
1114
|
-
|
1115
|
-
|
1111
|
+
FrtQuery *frt_phq_alloc(void) {
|
1112
|
+
return frt_q_new(FrtPhraseQuery);
|
1113
|
+
}
|
1116
1114
|
|
1115
|
+
FrtQuery *frt_phq_init(FrtQuery *self, ID field) {
|
1117
1116
|
PhQ(self)->field = field;
|
1118
1117
|
PhQ(self)->pos_cnt = 0;
|
1119
1118
|
PhQ(self)->pos_capa = PhQ_INIT_CAPA;
|
@@ -1131,6 +1130,11 @@ FrtQuery *frt_phq_new(FrtSymbol field)
|
|
1131
1130
|
return self;
|
1132
1131
|
}
|
1133
1132
|
|
1133
|
+
FrtQuery *frt_phq_new(ID field) {
|
1134
|
+
FrtQuery *self = frt_phq_alloc();
|
1135
|
+
return frt_phq_init(self, field);
|
1136
|
+
}
|
1137
|
+
|
1134
1138
|
void frt_phq_add_term_abs(FrtQuery *self, const char *term, int position)
|
1135
1139
|
{
|
1136
1140
|
FrtPhraseQuery *phq = PhQ(self);
|
@@ -2,6 +2,8 @@
|
|
2
2
|
#include "frt_global.h"
|
3
3
|
#include "frt_search.h"
|
4
4
|
|
5
|
+
#undef close
|
6
|
+
|
5
7
|
/****************************************************************************
|
6
8
|
*
|
7
9
|
* FrtPrefixQuery
|
@@ -10,8 +12,7 @@
|
|
10
12
|
|
11
13
|
#define PfxQ(query) ((FrtPrefixQuery *)(query))
|
12
14
|
|
13
|
-
static char *prq_to_s(FrtQuery *self,
|
14
|
-
{
|
15
|
+
static char *prq_to_s(FrtQuery *self, ID default_field) {
|
15
16
|
char *buffer, *bptr;
|
16
17
|
const char *prefix = PfxQ(self)->prefix;
|
17
18
|
size_t plen = strlen(prefix);
|
@@ -33,8 +34,7 @@ static char *prq_to_s(FrtQuery *self, FrtSymbol default_field)
|
|
33
34
|
return buffer;
|
34
35
|
}
|
35
36
|
|
36
|
-
static FrtQuery *prq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
37
|
-
{
|
37
|
+
static FrtQuery *prq_rewrite(FrtQuery *self, FrtIndexReader *ir) {
|
38
38
|
const int field_num = frt_fis_get_field_num(ir->fis, PfxQ(self)->field);
|
39
39
|
FrtQuery *volatile q = frt_multi_tq_new_conf(PfxQ(self)->field,
|
40
40
|
FrtMTQMaxTerms(self), 0.0);
|
@@ -61,30 +61,28 @@ static FrtQuery *prq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
|
61
61
|
return q;
|
62
62
|
}
|
63
63
|
|
64
|
-
static void prq_destroy(FrtQuery *self)
|
65
|
-
{
|
64
|
+
static void prq_destroy(FrtQuery *self) {
|
66
65
|
free(PfxQ(self)->prefix);
|
67
66
|
frt_q_destroy_i(self);
|
68
67
|
}
|
69
68
|
|
70
|
-
static unsigned long long prq_hash(FrtQuery *self)
|
71
|
-
{
|
69
|
+
static unsigned long long prq_hash(FrtQuery *self) {
|
72
70
|
return frt_str_hash(rb_id2name(PfxQ(self)->field)) ^ frt_str_hash(PfxQ(self)->prefix);
|
73
71
|
}
|
74
72
|
|
75
|
-
static int prq_eq(FrtQuery *self, FrtQuery *o)
|
76
|
-
{
|
73
|
+
static int prq_eq(FrtQuery *self, FrtQuery *o) {
|
77
74
|
return (strcmp(PfxQ(self)->prefix, PfxQ(o)->prefix) == 0)
|
78
75
|
&& (PfxQ(self)->field == PfxQ(o)->field);
|
79
76
|
}
|
80
77
|
|
81
|
-
FrtQuery *
|
82
|
-
|
83
|
-
|
78
|
+
FrtQuery *frt_prefixq_alloc(void) {
|
79
|
+
return frt_q_new(FrtPrefixQuery);
|
80
|
+
}
|
84
81
|
|
82
|
+
FrtQuery *frt_prefixq_init(FrtQuery *self, ID field, const char *prefix) {
|
85
83
|
PfxQ(self)->field = field;
|
86
84
|
PfxQ(self)->prefix = frt_estrdup(prefix);
|
87
|
-
FrtMTQMaxTerms(self)
|
85
|
+
FrtMTQMaxTerms(self) = PREFIX_QUERY_MAX_TERMS;
|
88
86
|
|
89
87
|
self->type = PREFIX_QUERY;
|
90
88
|
self->rewrite = &prq_rewrite;
|
@@ -96,3 +94,8 @@ FrtQuery *frt_prefixq_new(FrtSymbol field, const char *prefix)
|
|
96
94
|
|
97
95
|
return self;
|
98
96
|
}
|
97
|
+
|
98
|
+
FrtQuery *frt_prefixq_new(ID field, const char *prefix) {
|
99
|
+
FrtQuery *self = frt_prefixq_alloc();
|
100
|
+
return frt_prefixq_init(self, field, prefix);
|
101
|
+
}
|
@@ -2,23 +2,15 @@
|
|
2
2
|
#include "frt_global.h"
|
3
3
|
#include "frt_search.h"
|
4
4
|
|
5
|
+
#undef close
|
6
|
+
|
5
7
|
/*****************************************************************************
|
6
8
|
*
|
7
|
-
*
|
9
|
+
* FrtRange
|
8
10
|
*
|
9
11
|
*****************************************************************************/
|
10
12
|
|
11
|
-
|
12
|
-
{
|
13
|
-
FrtSymbol field;
|
14
|
-
char *lower_term;
|
15
|
-
char *upper_term;
|
16
|
-
bool include_lower : 1;
|
17
|
-
bool include_upper : 1;
|
18
|
-
} Range;
|
19
|
-
|
20
|
-
static char *range_to_s(Range *range, FrtSymbol default_field, float boost)
|
21
|
-
{
|
13
|
+
static char *range_to_s(FrtRange *range, ID default_field, float boost) {
|
22
14
|
char *buffer, *b;
|
23
15
|
size_t flen, llen, ulen;
|
24
16
|
const char *field_name = rb_id2name(range->field);
|
@@ -68,14 +60,14 @@ static char *range_to_s(Range *range, FrtSymbol default_field, float boost)
|
|
68
60
|
return buffer;
|
69
61
|
}
|
70
62
|
|
71
|
-
static void range_destroy(
|
63
|
+
static void range_destroy(FrtRange *range)
|
72
64
|
{
|
73
65
|
free(range->lower_term);
|
74
66
|
free(range->upper_term);
|
75
67
|
free(range);
|
76
68
|
}
|
77
69
|
|
78
|
-
static unsigned long long range_hash(
|
70
|
+
static unsigned long long range_hash(FrtRange *filt)
|
79
71
|
{
|
80
72
|
return filt->include_lower | (filt->include_upper << 1)
|
81
73
|
| ((frt_str_hash(rb_id2name(filt->field))
|
@@ -83,7 +75,7 @@ static unsigned long long range_hash(Range *filt)
|
|
83
75
|
^ (filt->upper_term ? frt_str_hash(filt->upper_term) : 0)) << 2);
|
84
76
|
}
|
85
77
|
|
86
|
-
static int range_eq(
|
78
|
+
static int range_eq(FrtRange *filt, FrtRange *o)
|
87
79
|
{
|
88
80
|
if ((filt->lower_term && !o->lower_term) || (!filt->lower_term && o->lower_term)) { return false; }
|
89
81
|
if ((filt->upper_term && !o->upper_term) || (!filt->upper_term && o->upper_term)) { return false; }
|
@@ -94,11 +86,8 @@ static int range_eq(Range *filt, Range *o)
|
|
94
86
|
&& (filt->include_upper == o->include_upper));
|
95
87
|
}
|
96
88
|
|
97
|
-
static
|
98
|
-
|
99
|
-
bool include_upper)
|
100
|
-
{
|
101
|
-
Range *range;
|
89
|
+
static FrtRange *range_new(ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
90
|
+
FrtRange *range;
|
102
91
|
|
103
92
|
if (!lower_term && !upper_term) {
|
104
93
|
FRT_RAISE(FRT_ARG_ERROR, "Nil bounds for range. A range must include either "
|
@@ -119,7 +108,7 @@ static Range *range_new(FrtSymbol field, const char *lower_term,
|
|
119
108
|
"\"%s\" < \"%s\"", upper_term, lower_term);
|
120
109
|
}
|
121
110
|
|
122
|
-
range = FRT_ALLOC(
|
111
|
+
range = FRT_ALLOC(FrtRange);
|
123
112
|
|
124
113
|
range->field = field;
|
125
114
|
range->lower_term = lower_term ? frt_estrdup(lower_term) : NULL;
|
@@ -129,11 +118,8 @@ static Range *range_new(FrtSymbol field, const char *lower_term,
|
|
129
118
|
return range;
|
130
119
|
}
|
131
120
|
|
132
|
-
static
|
133
|
-
|
134
|
-
bool include_upper)
|
135
|
-
{
|
136
|
-
Range *range;
|
121
|
+
static FrtRange *trange_new(ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
122
|
+
FrtRange *range;
|
137
123
|
int len;
|
138
124
|
double upper_num, lower_num;
|
139
125
|
|
@@ -173,7 +159,7 @@ static Range *trange_new(FrtSymbol field, const char *lower_term,
|
|
173
159
|
}
|
174
160
|
}
|
175
161
|
|
176
|
-
range = FRT_ALLOC(
|
162
|
+
range = FRT_ALLOC(FrtRange);
|
177
163
|
|
178
164
|
range->field = field;
|
179
165
|
range->lower_term = lower_term ? frt_estrdup(lower_term) : NULL;
|
@@ -185,27 +171,20 @@ static Range *trange_new(FrtSymbol field, const char *lower_term,
|
|
185
171
|
|
186
172
|
/***************************************************************************
|
187
173
|
*
|
188
|
-
*
|
174
|
+
* FrtRangeFilter
|
189
175
|
*
|
190
176
|
***************************************************************************/
|
191
177
|
|
192
|
-
|
193
|
-
{
|
194
|
-
FrtFilter super;
|
195
|
-
Range *range;
|
196
|
-
} RangeFilter;
|
178
|
+
#define RF(filt) ((FrtRangeFilter *)(filt))
|
197
179
|
|
198
|
-
|
199
|
-
|
200
|
-
static void frt_rfilt_destroy_i(FrtFilter *filt)
|
201
|
-
{
|
180
|
+
static void frt_rfilt_destroy_i(FrtFilter *filt) {
|
202
181
|
range_destroy(RF(filt)->range);
|
203
182
|
frt_filt_destroy_i(filt);
|
204
183
|
}
|
205
184
|
|
206
185
|
static char *frt_rfilt_to_s(FrtFilter *filt)
|
207
186
|
{
|
208
|
-
char *rstr = range_to_s(RF(filt)->range, (
|
187
|
+
char *rstr = range_to_s(RF(filt)->range, (ID)NULL, 1.0);
|
209
188
|
char *rfstr = frt_strfmt("RangeFilter< %s >", rstr);
|
210
189
|
free(rstr);
|
211
190
|
return rfstr;
|
@@ -214,7 +193,7 @@ static char *frt_rfilt_to_s(FrtFilter *filt)
|
|
214
193
|
static FrtBitVector *frt_rfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir)
|
215
194
|
{
|
216
195
|
FrtBitVector *bv = frt_bv_new_capa(ir->max_doc(ir));
|
217
|
-
|
196
|
+
FrtRange *range = RF(filt)->range;
|
218
197
|
FrtFieldInfo *fi = frt_fis_get_field(ir->fis, range->field);
|
219
198
|
/* the field info exists we need to add docs to the bit vector, otherwise
|
220
199
|
* we just return an empty bit vector */
|
@@ -278,13 +257,12 @@ static int frt_rfilt_eq(FrtFilter *filt, FrtFilter *o) {
|
|
278
257
|
return range_eq(RF(filt)->range, RF(o)->range);
|
279
258
|
}
|
280
259
|
|
281
|
-
FrtFilter *
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
RF(filt)->range =
|
287
|
-
include_lower, include_upper);
|
260
|
+
FrtFilter *frt_rfilt_alloc(void) {
|
261
|
+
return filt_new(FrtRangeFilter);
|
262
|
+
}
|
263
|
+
|
264
|
+
FrtFilter *frt_rfilt_init(FrtFilter *filt, ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
265
|
+
RF(filt)->range = range_new(field, lower_term, upper_term, include_lower, include_upper);
|
288
266
|
filt->get_bv_i = &frt_rfilt_get_bv_i;
|
289
267
|
filt->hash = &frt_rfilt_hash;
|
290
268
|
filt->eq = &frt_rfilt_eq;
|
@@ -293,15 +271,19 @@ FrtFilter *frt_rfilt_new(FrtSymbol field,
|
|
293
271
|
return filt;
|
294
272
|
}
|
295
273
|
|
274
|
+
FrtFilter *frt_rfilt_new(ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
275
|
+
FrtFilter *filt = frt_rfilt_alloc();
|
276
|
+
return frt_rfilt_init(filt, field, lower_term, upper_term, include_lower, include_upper);
|
277
|
+
}
|
278
|
+
|
296
279
|
/***************************************************************************
|
297
280
|
*
|
298
|
-
*
|
281
|
+
* FrtRangeFilter
|
299
282
|
*
|
300
283
|
***************************************************************************/
|
301
284
|
|
302
|
-
static char *frt_trfilt_to_s(FrtFilter *filt)
|
303
|
-
|
304
|
-
char *rstr = range_to_s(RF(filt)->range, (FrtSymbol)NULL, 1.0);
|
285
|
+
static char *frt_trfilt_to_s(FrtFilter *filt) {
|
286
|
+
char *rstr = range_to_s(RF(filt)->range, (ID)NULL, 1.0);
|
305
287
|
char *rfstr = frt_strfmt("TypedRangeFilter< %s >", rstr);
|
306
288
|
free(rstr);
|
307
289
|
return rfstr;
|
@@ -333,10 +315,8 @@ do {\
|
|
333
315
|
}\
|
334
316
|
} while (te->next(te))
|
335
317
|
|
336
|
-
|
337
|
-
|
338
|
-
{
|
339
|
-
Range *range = RF(filt)->range;
|
318
|
+
static FrtBitVector *frt_trfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir) {
|
319
|
+
FrtRange *range = RF(filt)->range;
|
340
320
|
double lnum = 0.0, unum = 0.0;
|
341
321
|
int len = 0;
|
342
322
|
const char *lt = range->lower_term;
|
@@ -408,19 +388,17 @@ static FrtBitVector *frt_trfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir)
|
|
408
388
|
}
|
409
389
|
|
410
390
|
return bv;
|
411
|
-
}
|
412
|
-
else {
|
391
|
+
} else {
|
413
392
|
return frt_rfilt_get_bv_i(filt, ir);
|
414
393
|
}
|
415
394
|
}
|
416
395
|
|
417
|
-
FrtFilter *
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
RF(filt)->range =
|
423
|
-
include_lower, include_upper);
|
396
|
+
FrtFilter *frt_trfilt_alloc(void) {
|
397
|
+
return filt_new(FrtRangeFilter);
|
398
|
+
}
|
399
|
+
|
400
|
+
FrtFilter *frt_trfilt_init(FrtFilter *filt, ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
401
|
+
RF(filt)->range = trange_new(field, lower_term, upper_term, include_lower, include_upper);
|
424
402
|
|
425
403
|
filt->get_bv_i = &frt_trfilt_get_bv_i;
|
426
404
|
filt->hash = &frt_rfilt_hash;
|
@@ -430,6 +408,11 @@ FrtFilter *frt_trfilt_new(FrtSymbol field,
|
|
430
408
|
return filt;
|
431
409
|
}
|
432
410
|
|
411
|
+
FrtFilter *frt_trfilt_new(ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
412
|
+
FrtFilter *filt = frt_trfilt_alloc();
|
413
|
+
return frt_trfilt_init(filt, field, lower_term, upper_term, include_lower, include_upper);
|
414
|
+
}
|
415
|
+
|
433
416
|
/*****************************************************************************
|
434
417
|
*
|
435
418
|
* RangeQuery
|
@@ -437,27 +420,18 @@ FrtFilter *frt_trfilt_new(FrtSymbol field,
|
|
437
420
|
*****************************************************************************/
|
438
421
|
|
439
422
|
#define RQ(query) ((FrtRangeQuery *)(query))
|
440
|
-
typedef struct FrtRangeQuery
|
441
|
-
{
|
442
|
-
FrtQuery f;
|
443
|
-
Range *range;
|
444
|
-
} FrtRangeQuery;
|
445
423
|
|
446
|
-
static char *frt_rq_to_s(FrtQuery *self,
|
447
|
-
{
|
424
|
+
static char *frt_rq_to_s(FrtQuery *self, ID field) {
|
448
425
|
return range_to_s(RQ(self)->range, field, self->boost);
|
449
426
|
}
|
450
427
|
|
451
|
-
static void frt_rq_destroy(FrtQuery *self)
|
452
|
-
{
|
428
|
+
static void frt_rq_destroy(FrtQuery *self) {
|
453
429
|
range_destroy(RQ(self)->range);
|
454
430
|
frt_q_destroy_i(self);
|
455
431
|
}
|
456
432
|
|
457
|
-
static FrtMatchVector *rq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv,
|
458
|
-
|
459
|
-
{
|
460
|
-
Range *range = RQ(((FrtConstantScoreQuery *)self)->original)->range;
|
433
|
+
static FrtMatchVector *rq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtTermVector *tv) {
|
434
|
+
FrtRange *range = RQ(((FrtConstantScoreQuery *)self)->original)->range;
|
461
435
|
if (tv->field == range->field) {
|
462
436
|
const int term_cnt = tv->term_cnt;
|
463
437
|
int i, j;
|
@@ -487,12 +461,10 @@ static FrtMatchVector *rq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv,
|
|
487
461
|
return mv;
|
488
462
|
}
|
489
463
|
|
490
|
-
static FrtQuery *frt_rq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
491
|
-
{
|
464
|
+
static FrtQuery *frt_rq_rewrite(FrtQuery *self, FrtIndexReader *ir) {
|
492
465
|
FrtQuery *csq;
|
493
|
-
|
494
|
-
FrtFilter *filter = frt_rfilt_new(r->field, r->lower_term, r->upper_term,
|
495
|
-
r->include_lower, r->include_upper);
|
466
|
+
FrtRange *r = RQ(self)->range;
|
467
|
+
FrtFilter *filter = frt_rfilt_new(r->field, r->lower_term, r->upper_term, r->include_lower, r->include_upper);
|
496
468
|
(void)ir;
|
497
469
|
csq = frt_csq_new_nr(filter);
|
498
470
|
((FrtConstantScoreQuery *)csq)->original = self;
|
@@ -500,8 +472,7 @@ static FrtQuery *frt_rq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
|
500
472
|
return (FrtQuery *)csq;
|
501
473
|
}
|
502
474
|
|
503
|
-
static unsigned long long frt_rq_hash(FrtQuery *self)
|
504
|
-
{
|
475
|
+
static unsigned long long frt_rq_hash(FrtQuery *self) {
|
505
476
|
return range_hash(RQ(self)->range);
|
506
477
|
}
|
507
478
|
|
@@ -509,33 +480,36 @@ static int frt_rq_eq(FrtQuery *self, FrtQuery *o) {
|
|
509
480
|
return range_eq(RQ(self)->range, RQ(o)->range);
|
510
481
|
}
|
511
482
|
|
512
|
-
FrtQuery *frt_rq_new_less(
|
483
|
+
FrtQuery *frt_rq_new_less(ID field, const char *upper_term, bool include_upper) {
|
513
484
|
return frt_rq_new(field, NULL, upper_term, false, include_upper);
|
514
485
|
}
|
515
486
|
|
516
|
-
FrtQuery *frt_rq_new_more(
|
487
|
+
FrtQuery *frt_rq_new_more(ID field, const char *lower_term, bool include_lower) {
|
517
488
|
return frt_rq_new(field, lower_term, NULL, include_lower, false);
|
518
489
|
}
|
519
490
|
|
520
|
-
FrtQuery *
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
self
|
527
|
-
|
528
|
-
|
529
|
-
self->
|
530
|
-
self->
|
531
|
-
self->
|
532
|
-
self->
|
533
|
-
self->
|
534
|
-
self->destroy_i = &frt_rq_destroy;
|
535
|
-
self->create_weight_i = &frt_q_create_weight_unsup;
|
491
|
+
FrtQuery *frt_rq_alloc(void) {
|
492
|
+
return frt_q_new(FrtRangeQuery);
|
493
|
+
}
|
494
|
+
|
495
|
+
FrtQuery *frt_rq_init(FrtQuery *self, ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
496
|
+
FrtRange *range = range_new(field, lower_term, upper_term, include_lower, include_upper);
|
497
|
+
RQ(self)->range = range;
|
498
|
+
self->type = RANGE_QUERY;
|
499
|
+
self->rewrite = &frt_rq_rewrite;
|
500
|
+
self->to_s = &frt_rq_to_s;
|
501
|
+
self->hash = &frt_rq_hash;
|
502
|
+
self->eq = &frt_rq_eq;
|
503
|
+
self->destroy_i = &frt_rq_destroy;
|
504
|
+
self->create_weight_i = &frt_q_create_weight_unsup;
|
536
505
|
return self;
|
537
506
|
}
|
538
507
|
|
508
|
+
FrtQuery *frt_rq_new(ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
509
|
+
FrtQuery *self = frt_rq_alloc();
|
510
|
+
return frt_rq_init(self, field, lower_term, upper_term, include_lower, include_upper);
|
511
|
+
}
|
512
|
+
|
539
513
|
/*****************************************************************************
|
540
514
|
*
|
541
515
|
* TypedRangeQuery
|
@@ -559,10 +533,8 @@ for (i = tv->term_cnt - 1; i >= 0; i--) {\
|
|
559
533
|
}\
|
560
534
|
}\
|
561
535
|
|
562
|
-
static FrtMatchVector *trq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv,
|
563
|
-
|
564
|
-
{
|
565
|
-
Range *range = RQ(((FrtConstantScoreQuery *)self)->original)->range;
|
536
|
+
static FrtMatchVector *trq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtTermVector *tv) {
|
537
|
+
FrtRange *range = RQ(((FrtConstantScoreQuery *)self)->original)->range;
|
566
538
|
if (tv->field == range->field) {
|
567
539
|
double lnum = 0.0, unum = 0.0;
|
568
540
|
int len = 0;
|
@@ -615,21 +587,17 @@ static FrtMatchVector *trq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv,
|
|
615
587
|
/* should never happen. Error should have been rb_raised */
|
616
588
|
assert(false);
|
617
589
|
}
|
618
|
-
|
619
|
-
}
|
620
|
-
else {
|
590
|
+
} else {
|
621
591
|
return rq_get_matchv_i(self, mv, tv);
|
622
592
|
}
|
623
593
|
}
|
624
594
|
return mv;
|
625
595
|
}
|
626
596
|
|
627
|
-
static FrtQuery *frt_trq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
628
|
-
{
|
597
|
+
static FrtQuery *frt_trq_rewrite(FrtQuery *self, FrtIndexReader *ir) {
|
629
598
|
FrtQuery *csq;
|
630
|
-
|
631
|
-
FrtFilter *filter = frt_trfilt_new(r->field, r->lower_term, r->upper_term,
|
632
|
-
r->include_lower, r->include_upper);
|
599
|
+
FrtRange *r = RQ(self)->range;
|
600
|
+
FrtFilter *filter = frt_trfilt_new(r->field, r->lower_term, r->upper_term, r->include_lower, r->include_upper);
|
633
601
|
(void)ir;
|
634
602
|
csq = frt_csq_new_nr(filter);
|
635
603
|
((FrtConstantScoreQuery *)csq)->original = self;
|
@@ -637,29 +605,32 @@ static FrtQuery *frt_trq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
|
637
605
|
return (FrtQuery *)csq;
|
638
606
|
}
|
639
607
|
|
640
|
-
FrtQuery *frt_trq_new_less(
|
608
|
+
FrtQuery *frt_trq_new_less(ID field, const char *upper_term, bool include_upper) {
|
641
609
|
return frt_trq_new(field, NULL, upper_term, false, include_upper);
|
642
610
|
}
|
643
611
|
|
644
|
-
FrtQuery *frt_trq_new_more(
|
612
|
+
FrtQuery *frt_trq_new_more(ID field, const char *lower_term, bool include_lower) {
|
645
613
|
return frt_trq_new(field, lower_term, NULL, include_lower, false);
|
646
614
|
}
|
647
615
|
|
648
|
-
FrtQuery *
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
self
|
655
|
-
|
656
|
-
|
657
|
-
self->
|
658
|
-
self->
|
659
|
-
self->
|
660
|
-
self->
|
661
|
-
self->
|
662
|
-
self->destroy_i = &frt_rq_destroy;
|
663
|
-
self->create_weight_i = &frt_q_create_weight_unsup;
|
616
|
+
FrtQuery *frt_trq_alloc(void) {
|
617
|
+
return frt_q_new(FrtRangeQuery);
|
618
|
+
}
|
619
|
+
|
620
|
+
FrtQuery *frt_trq_init(FrtQuery *self, ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
621
|
+
FrtRange *range = trange_new(field, lower_term, upper_term, include_lower, include_upper);
|
622
|
+
RQ(self)->range = range;
|
623
|
+
self->type = TYPED_RANGE_QUERY;
|
624
|
+
self->rewrite = &frt_trq_rewrite;
|
625
|
+
self->to_s = &frt_rq_to_s;
|
626
|
+
self->hash = &frt_rq_hash;
|
627
|
+
self->eq = &frt_rq_eq;
|
628
|
+
self->destroy_i = &frt_rq_destroy;
|
629
|
+
self->create_weight_i = &frt_q_create_weight_unsup;
|
664
630
|
return self;
|
665
631
|
}
|
632
|
+
|
633
|
+
FrtQuery *frt_trq_new(ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
634
|
+
FrtQuery *self = frt_trq_alloc();
|
635
|
+
return frt_trq_init(self, field, lower_term, upper_term, include_lower, include_upper);
|
636
|
+
}
|