isomorfeus-ferret 0.12.7 → 0.13.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +101 -19
- data/README.md +85 -13
- data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
- data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
- data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
- data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
- data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
- data/ext/isomorfeus_ferret_ext/bzlib_blocksort.c +1094 -0
- data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
- data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
- data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
- data/ext/isomorfeus_ferret_ext/bzlib_huffman.c +205 -0
- data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
- data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
- data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
- data/ext/isomorfeus_ferret_ext/frb_index.c +497 -495
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
- data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
- data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
- data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
- data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
- data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
- data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
- data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
- data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +91 -200
- data/ext/isomorfeus_ferret_ext/frt_global.h +7 -18
- data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
- data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_index.c +603 -410
- data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
- data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +68 -91
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
- data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
- data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
- data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
- data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
- data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +22 -112
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
- data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
- data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
- data/ext/isomorfeus_ferret_ext/test.c +0 -17
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
- data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
- data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
- data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
- data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
- data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
- data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
- data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -64
- data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
- data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
- data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
- data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +27 -57
- data/ext/isomorfeus_ferret_ext/email.rl +0 -21
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
- data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
- data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
- data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
- data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
- data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
- data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
- data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
- data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -4,6 +4,8 @@
|
|
4
4
|
#include "frt_search.h"
|
5
5
|
#include "frt_array.h"
|
6
6
|
|
7
|
+
#undef close
|
8
|
+
|
7
9
|
#define PhQ(query) ((FrtPhraseQuery *)(query))
|
8
10
|
|
9
11
|
/**
|
@@ -13,8 +15,7 @@
|
|
13
15
|
* consistant order of positions when testing. Functionally it makes no
|
14
16
|
* difference.
|
15
17
|
*/
|
16
|
-
static int phrase_pos_cmp(const void *p1, const void *p2)
|
17
|
-
{
|
18
|
+
static int phrase_pos_cmp(const void *p1, const void *p2) {
|
18
19
|
int pos1 = ((FrtPhrasePosition *)p1)->pos;
|
19
20
|
int pos2 = ((FrtPhrasePosition *)p2)->pos;
|
20
21
|
if (pos1 > pos2) {
|
@@ -38,17 +39,15 @@ static int phrase_pos_cmp(const void *p1, const void *p2)
|
|
38
39
|
***************************************************************************/
|
39
40
|
|
40
41
|
#define PP(p) ((PhPos *)(p))
|
41
|
-
typedef struct PhPos
|
42
|
-
{
|
42
|
+
typedef struct PhPos {
|
43
43
|
FrtTermDocEnum *tpe;
|
44
|
-
int
|
45
|
-
int
|
46
|
-
int
|
47
|
-
int
|
44
|
+
int offset;
|
45
|
+
int count;
|
46
|
+
int doc;
|
47
|
+
int position;
|
48
48
|
} PhPos;
|
49
49
|
|
50
|
-
static bool pp_next(PhPos *self)
|
51
|
-
{
|
50
|
+
static bool pp_next(PhPos *self) {
|
52
51
|
FrtTermDocEnum *tpe = self->tpe;
|
53
52
|
assert(tpe);
|
54
53
|
|
@@ -590,7 +589,7 @@ static FrtExplanation *phw_explain(FrtWeight *self, FrtIndexReader *ir, int doc_
|
|
590
589
|
return frt_expl_new(0.0, "field \"%s\" does not exist in the index", field_name);
|
591
590
|
}
|
592
591
|
|
593
|
-
query_str = self->query->to_s(self->query, (
|
592
|
+
query_str = self->query->to_s(self->query, (ID)NULL);
|
594
593
|
|
595
594
|
expl = frt_expl_new(0.0, "weight(%s in %d), product of:", query_str, doc_num);
|
596
595
|
|
@@ -931,8 +930,7 @@ static void phq_extract_terms(FrtQuery *self, FrtHashSet *term_set)
|
|
931
930
|
}
|
932
931
|
}
|
933
932
|
|
934
|
-
static char *phq_to_s(FrtQuery *self,
|
935
|
-
{
|
933
|
+
static char *phq_to_s(FrtQuery *self, ID default_field) {
|
936
934
|
FrtPhraseQuery *phq = PhQ(self);
|
937
935
|
const int pos_cnt = phq->pos_cnt;
|
938
936
|
FrtPhrasePosition *positions = phq->positions;
|
@@ -1110,10 +1108,11 @@ static int phq_eq(FrtQuery *self, FrtQuery *o)
|
|
1110
1108
|
return true;
|
1111
1109
|
}
|
1112
1110
|
|
1113
|
-
FrtQuery *
|
1114
|
-
|
1115
|
-
|
1111
|
+
FrtQuery *frt_phq_alloc(void) {
|
1112
|
+
return frt_q_new(FrtPhraseQuery);
|
1113
|
+
}
|
1116
1114
|
|
1115
|
+
FrtQuery *frt_phq_init(FrtQuery *self, ID field) {
|
1117
1116
|
PhQ(self)->field = field;
|
1118
1117
|
PhQ(self)->pos_cnt = 0;
|
1119
1118
|
PhQ(self)->pos_capa = PhQ_INIT_CAPA;
|
@@ -1131,6 +1130,11 @@ FrtQuery *frt_phq_new(FrtSymbol field)
|
|
1131
1130
|
return self;
|
1132
1131
|
}
|
1133
1132
|
|
1133
|
+
FrtQuery *frt_phq_new(ID field) {
|
1134
|
+
FrtQuery *self = frt_phq_alloc();
|
1135
|
+
return frt_phq_init(self, field);
|
1136
|
+
}
|
1137
|
+
|
1134
1138
|
void frt_phq_add_term_abs(FrtQuery *self, const char *term, int position)
|
1135
1139
|
{
|
1136
1140
|
FrtPhraseQuery *phq = PhQ(self);
|
@@ -2,6 +2,8 @@
|
|
2
2
|
#include "frt_global.h"
|
3
3
|
#include "frt_search.h"
|
4
4
|
|
5
|
+
#undef close
|
6
|
+
|
5
7
|
/****************************************************************************
|
6
8
|
*
|
7
9
|
* FrtPrefixQuery
|
@@ -10,8 +12,7 @@
|
|
10
12
|
|
11
13
|
#define PfxQ(query) ((FrtPrefixQuery *)(query))
|
12
14
|
|
13
|
-
static char *prq_to_s(FrtQuery *self,
|
14
|
-
{
|
15
|
+
static char *prq_to_s(FrtQuery *self, ID default_field) {
|
15
16
|
char *buffer, *bptr;
|
16
17
|
const char *prefix = PfxQ(self)->prefix;
|
17
18
|
size_t plen = strlen(prefix);
|
@@ -33,8 +34,7 @@ static char *prq_to_s(FrtQuery *self, FrtSymbol default_field)
|
|
33
34
|
return buffer;
|
34
35
|
}
|
35
36
|
|
36
|
-
static FrtQuery *prq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
37
|
-
{
|
37
|
+
static FrtQuery *prq_rewrite(FrtQuery *self, FrtIndexReader *ir) {
|
38
38
|
const int field_num = frt_fis_get_field_num(ir->fis, PfxQ(self)->field);
|
39
39
|
FrtQuery *volatile q = frt_multi_tq_new_conf(PfxQ(self)->field,
|
40
40
|
FrtMTQMaxTerms(self), 0.0);
|
@@ -61,30 +61,28 @@ static FrtQuery *prq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
|
61
61
|
return q;
|
62
62
|
}
|
63
63
|
|
64
|
-
static void prq_destroy(FrtQuery *self)
|
65
|
-
{
|
64
|
+
static void prq_destroy(FrtQuery *self) {
|
66
65
|
free(PfxQ(self)->prefix);
|
67
66
|
frt_q_destroy_i(self);
|
68
67
|
}
|
69
68
|
|
70
|
-
static unsigned long long prq_hash(FrtQuery *self)
|
71
|
-
{
|
69
|
+
static unsigned long long prq_hash(FrtQuery *self) {
|
72
70
|
return frt_str_hash(rb_id2name(PfxQ(self)->field)) ^ frt_str_hash(PfxQ(self)->prefix);
|
73
71
|
}
|
74
72
|
|
75
|
-
static int prq_eq(FrtQuery *self, FrtQuery *o)
|
76
|
-
{
|
73
|
+
static int prq_eq(FrtQuery *self, FrtQuery *o) {
|
77
74
|
return (strcmp(PfxQ(self)->prefix, PfxQ(o)->prefix) == 0)
|
78
75
|
&& (PfxQ(self)->field == PfxQ(o)->field);
|
79
76
|
}
|
80
77
|
|
81
|
-
FrtQuery *
|
82
|
-
|
83
|
-
|
78
|
+
FrtQuery *frt_prefixq_alloc(void) {
|
79
|
+
return frt_q_new(FrtPrefixQuery);
|
80
|
+
}
|
84
81
|
|
82
|
+
FrtQuery *frt_prefixq_init(FrtQuery *self, ID field, const char *prefix) {
|
85
83
|
PfxQ(self)->field = field;
|
86
84
|
PfxQ(self)->prefix = frt_estrdup(prefix);
|
87
|
-
FrtMTQMaxTerms(self)
|
85
|
+
FrtMTQMaxTerms(self) = PREFIX_QUERY_MAX_TERMS;
|
88
86
|
|
89
87
|
self->type = PREFIX_QUERY;
|
90
88
|
self->rewrite = &prq_rewrite;
|
@@ -96,3 +94,8 @@ FrtQuery *frt_prefixq_new(FrtSymbol field, const char *prefix)
|
|
96
94
|
|
97
95
|
return self;
|
98
96
|
}
|
97
|
+
|
98
|
+
FrtQuery *frt_prefixq_new(ID field, const char *prefix) {
|
99
|
+
FrtQuery *self = frt_prefixq_alloc();
|
100
|
+
return frt_prefixq_init(self, field, prefix);
|
101
|
+
}
|
@@ -2,23 +2,15 @@
|
|
2
2
|
#include "frt_global.h"
|
3
3
|
#include "frt_search.h"
|
4
4
|
|
5
|
+
#undef close
|
6
|
+
|
5
7
|
/*****************************************************************************
|
6
8
|
*
|
7
|
-
*
|
9
|
+
* FrtRange
|
8
10
|
*
|
9
11
|
*****************************************************************************/
|
10
12
|
|
11
|
-
|
12
|
-
{
|
13
|
-
FrtSymbol field;
|
14
|
-
char *lower_term;
|
15
|
-
char *upper_term;
|
16
|
-
bool include_lower : 1;
|
17
|
-
bool include_upper : 1;
|
18
|
-
} Range;
|
19
|
-
|
20
|
-
static char *range_to_s(Range *range, FrtSymbol default_field, float boost)
|
21
|
-
{
|
13
|
+
static char *range_to_s(FrtRange *range, ID default_field, float boost) {
|
22
14
|
char *buffer, *b;
|
23
15
|
size_t flen, llen, ulen;
|
24
16
|
const char *field_name = rb_id2name(range->field);
|
@@ -68,14 +60,14 @@ static char *range_to_s(Range *range, FrtSymbol default_field, float boost)
|
|
68
60
|
return buffer;
|
69
61
|
}
|
70
62
|
|
71
|
-
static void range_destroy(
|
63
|
+
static void range_destroy(FrtRange *range)
|
72
64
|
{
|
73
65
|
free(range->lower_term);
|
74
66
|
free(range->upper_term);
|
75
67
|
free(range);
|
76
68
|
}
|
77
69
|
|
78
|
-
static unsigned long long range_hash(
|
70
|
+
static unsigned long long range_hash(FrtRange *filt)
|
79
71
|
{
|
80
72
|
return filt->include_lower | (filt->include_upper << 1)
|
81
73
|
| ((frt_str_hash(rb_id2name(filt->field))
|
@@ -83,7 +75,7 @@ static unsigned long long range_hash(Range *filt)
|
|
83
75
|
^ (filt->upper_term ? frt_str_hash(filt->upper_term) : 0)) << 2);
|
84
76
|
}
|
85
77
|
|
86
|
-
static int range_eq(
|
78
|
+
static int range_eq(FrtRange *filt, FrtRange *o)
|
87
79
|
{
|
88
80
|
if ((filt->lower_term && !o->lower_term) || (!filt->lower_term && o->lower_term)) { return false; }
|
89
81
|
if ((filt->upper_term && !o->upper_term) || (!filt->upper_term && o->upper_term)) { return false; }
|
@@ -94,11 +86,8 @@ static int range_eq(Range *filt, Range *o)
|
|
94
86
|
&& (filt->include_upper == o->include_upper));
|
95
87
|
}
|
96
88
|
|
97
|
-
static
|
98
|
-
|
99
|
-
bool include_upper)
|
100
|
-
{
|
101
|
-
Range *range;
|
89
|
+
static FrtRange *range_new(ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
90
|
+
FrtRange *range;
|
102
91
|
|
103
92
|
if (!lower_term && !upper_term) {
|
104
93
|
FRT_RAISE(FRT_ARG_ERROR, "Nil bounds for range. A range must include either "
|
@@ -119,7 +108,7 @@ static Range *range_new(FrtSymbol field, const char *lower_term,
|
|
119
108
|
"\"%s\" < \"%s\"", upper_term, lower_term);
|
120
109
|
}
|
121
110
|
|
122
|
-
range = FRT_ALLOC(
|
111
|
+
range = FRT_ALLOC(FrtRange);
|
123
112
|
|
124
113
|
range->field = field;
|
125
114
|
range->lower_term = lower_term ? frt_estrdup(lower_term) : NULL;
|
@@ -129,11 +118,8 @@ static Range *range_new(FrtSymbol field, const char *lower_term,
|
|
129
118
|
return range;
|
130
119
|
}
|
131
120
|
|
132
|
-
static
|
133
|
-
|
134
|
-
bool include_upper)
|
135
|
-
{
|
136
|
-
Range *range;
|
121
|
+
static FrtRange *trange_new(ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
122
|
+
FrtRange *range;
|
137
123
|
int len;
|
138
124
|
double upper_num, lower_num;
|
139
125
|
|
@@ -173,7 +159,7 @@ static Range *trange_new(FrtSymbol field, const char *lower_term,
|
|
173
159
|
}
|
174
160
|
}
|
175
161
|
|
176
|
-
range = FRT_ALLOC(
|
162
|
+
range = FRT_ALLOC(FrtRange);
|
177
163
|
|
178
164
|
range->field = field;
|
179
165
|
range->lower_term = lower_term ? frt_estrdup(lower_term) : NULL;
|
@@ -185,27 +171,20 @@ static Range *trange_new(FrtSymbol field, const char *lower_term,
|
|
185
171
|
|
186
172
|
/***************************************************************************
|
187
173
|
*
|
188
|
-
*
|
174
|
+
* FrtRangeFilter
|
189
175
|
*
|
190
176
|
***************************************************************************/
|
191
177
|
|
192
|
-
|
193
|
-
{
|
194
|
-
FrtFilter super;
|
195
|
-
Range *range;
|
196
|
-
} RangeFilter;
|
178
|
+
#define RF(filt) ((FrtRangeFilter *)(filt))
|
197
179
|
|
198
|
-
|
199
|
-
|
200
|
-
static void frt_rfilt_destroy_i(FrtFilter *filt)
|
201
|
-
{
|
180
|
+
static void frt_rfilt_destroy_i(FrtFilter *filt) {
|
202
181
|
range_destroy(RF(filt)->range);
|
203
182
|
frt_filt_destroy_i(filt);
|
204
183
|
}
|
205
184
|
|
206
185
|
static char *frt_rfilt_to_s(FrtFilter *filt)
|
207
186
|
{
|
208
|
-
char *rstr = range_to_s(RF(filt)->range, (
|
187
|
+
char *rstr = range_to_s(RF(filt)->range, (ID)NULL, 1.0);
|
209
188
|
char *rfstr = frt_strfmt("RangeFilter< %s >", rstr);
|
210
189
|
free(rstr);
|
211
190
|
return rfstr;
|
@@ -214,7 +193,7 @@ static char *frt_rfilt_to_s(FrtFilter *filt)
|
|
214
193
|
static FrtBitVector *frt_rfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir)
|
215
194
|
{
|
216
195
|
FrtBitVector *bv = frt_bv_new_capa(ir->max_doc(ir));
|
217
|
-
|
196
|
+
FrtRange *range = RF(filt)->range;
|
218
197
|
FrtFieldInfo *fi = frt_fis_get_field(ir->fis, range->field);
|
219
198
|
/* the field info exists we need to add docs to the bit vector, otherwise
|
220
199
|
* we just return an empty bit vector */
|
@@ -278,13 +257,12 @@ static int frt_rfilt_eq(FrtFilter *filt, FrtFilter *o) {
|
|
278
257
|
return range_eq(RF(filt)->range, RF(o)->range);
|
279
258
|
}
|
280
259
|
|
281
|
-
FrtFilter *
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
RF(filt)->range =
|
287
|
-
include_lower, include_upper);
|
260
|
+
FrtFilter *frt_rfilt_alloc(void) {
|
261
|
+
return filt_new(FrtRangeFilter);
|
262
|
+
}
|
263
|
+
|
264
|
+
FrtFilter *frt_rfilt_init(FrtFilter *filt, ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
265
|
+
RF(filt)->range = range_new(field, lower_term, upper_term, include_lower, include_upper);
|
288
266
|
filt->get_bv_i = &frt_rfilt_get_bv_i;
|
289
267
|
filt->hash = &frt_rfilt_hash;
|
290
268
|
filt->eq = &frt_rfilt_eq;
|
@@ -293,15 +271,19 @@ FrtFilter *frt_rfilt_new(FrtSymbol field,
|
|
293
271
|
return filt;
|
294
272
|
}
|
295
273
|
|
274
|
+
FrtFilter *frt_rfilt_new(ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
275
|
+
FrtFilter *filt = frt_rfilt_alloc();
|
276
|
+
return frt_rfilt_init(filt, field, lower_term, upper_term, include_lower, include_upper);
|
277
|
+
}
|
278
|
+
|
296
279
|
/***************************************************************************
|
297
280
|
*
|
298
|
-
*
|
281
|
+
* FrtRangeFilter
|
299
282
|
*
|
300
283
|
***************************************************************************/
|
301
284
|
|
302
|
-
static char *frt_trfilt_to_s(FrtFilter *filt)
|
303
|
-
|
304
|
-
char *rstr = range_to_s(RF(filt)->range, (FrtSymbol)NULL, 1.0);
|
285
|
+
static char *frt_trfilt_to_s(FrtFilter *filt) {
|
286
|
+
char *rstr = range_to_s(RF(filt)->range, (ID)NULL, 1.0);
|
305
287
|
char *rfstr = frt_strfmt("TypedRangeFilter< %s >", rstr);
|
306
288
|
free(rstr);
|
307
289
|
return rfstr;
|
@@ -333,10 +315,8 @@ do {\
|
|
333
315
|
}\
|
334
316
|
} while (te->next(te))
|
335
317
|
|
336
|
-
|
337
|
-
|
338
|
-
{
|
339
|
-
Range *range = RF(filt)->range;
|
318
|
+
static FrtBitVector *frt_trfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir) {
|
319
|
+
FrtRange *range = RF(filt)->range;
|
340
320
|
double lnum = 0.0, unum = 0.0;
|
341
321
|
int len = 0;
|
342
322
|
const char *lt = range->lower_term;
|
@@ -408,19 +388,17 @@ static FrtBitVector *frt_trfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir)
|
|
408
388
|
}
|
409
389
|
|
410
390
|
return bv;
|
411
|
-
}
|
412
|
-
else {
|
391
|
+
} else {
|
413
392
|
return frt_rfilt_get_bv_i(filt, ir);
|
414
393
|
}
|
415
394
|
}
|
416
395
|
|
417
|
-
FrtFilter *
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
RF(filt)->range =
|
423
|
-
include_lower, include_upper);
|
396
|
+
FrtFilter *frt_trfilt_alloc(void) {
|
397
|
+
return filt_new(FrtRangeFilter);
|
398
|
+
}
|
399
|
+
|
400
|
+
FrtFilter *frt_trfilt_init(FrtFilter *filt, ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
401
|
+
RF(filt)->range = trange_new(field, lower_term, upper_term, include_lower, include_upper);
|
424
402
|
|
425
403
|
filt->get_bv_i = &frt_trfilt_get_bv_i;
|
426
404
|
filt->hash = &frt_rfilt_hash;
|
@@ -430,6 +408,11 @@ FrtFilter *frt_trfilt_new(FrtSymbol field,
|
|
430
408
|
return filt;
|
431
409
|
}
|
432
410
|
|
411
|
+
FrtFilter *frt_trfilt_new(ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
412
|
+
FrtFilter *filt = frt_trfilt_alloc();
|
413
|
+
return frt_trfilt_init(filt, field, lower_term, upper_term, include_lower, include_upper);
|
414
|
+
}
|
415
|
+
|
433
416
|
/*****************************************************************************
|
434
417
|
*
|
435
418
|
* RangeQuery
|
@@ -437,27 +420,18 @@ FrtFilter *frt_trfilt_new(FrtSymbol field,
|
|
437
420
|
*****************************************************************************/
|
438
421
|
|
439
422
|
#define RQ(query) ((FrtRangeQuery *)(query))
|
440
|
-
typedef struct FrtRangeQuery
|
441
|
-
{
|
442
|
-
FrtQuery f;
|
443
|
-
Range *range;
|
444
|
-
} FrtRangeQuery;
|
445
423
|
|
446
|
-
static char *frt_rq_to_s(FrtQuery *self,
|
447
|
-
{
|
424
|
+
static char *frt_rq_to_s(FrtQuery *self, ID field) {
|
448
425
|
return range_to_s(RQ(self)->range, field, self->boost);
|
449
426
|
}
|
450
427
|
|
451
|
-
static void frt_rq_destroy(FrtQuery *self)
|
452
|
-
{
|
428
|
+
static void frt_rq_destroy(FrtQuery *self) {
|
453
429
|
range_destroy(RQ(self)->range);
|
454
430
|
frt_q_destroy_i(self);
|
455
431
|
}
|
456
432
|
|
457
|
-
static FrtMatchVector *rq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv,
|
458
|
-
|
459
|
-
{
|
460
|
-
Range *range = RQ(((FrtConstantScoreQuery *)self)->original)->range;
|
433
|
+
static FrtMatchVector *rq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtTermVector *tv) {
|
434
|
+
FrtRange *range = RQ(((FrtConstantScoreQuery *)self)->original)->range;
|
461
435
|
if (tv->field == range->field) {
|
462
436
|
const int term_cnt = tv->term_cnt;
|
463
437
|
int i, j;
|
@@ -487,12 +461,10 @@ static FrtMatchVector *rq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv,
|
|
487
461
|
return mv;
|
488
462
|
}
|
489
463
|
|
490
|
-
static FrtQuery *frt_rq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
491
|
-
{
|
464
|
+
static FrtQuery *frt_rq_rewrite(FrtQuery *self, FrtIndexReader *ir) {
|
492
465
|
FrtQuery *csq;
|
493
|
-
|
494
|
-
FrtFilter *filter = frt_rfilt_new(r->field, r->lower_term, r->upper_term,
|
495
|
-
r->include_lower, r->include_upper);
|
466
|
+
FrtRange *r = RQ(self)->range;
|
467
|
+
FrtFilter *filter = frt_rfilt_new(r->field, r->lower_term, r->upper_term, r->include_lower, r->include_upper);
|
496
468
|
(void)ir;
|
497
469
|
csq = frt_csq_new_nr(filter);
|
498
470
|
((FrtConstantScoreQuery *)csq)->original = self;
|
@@ -500,8 +472,7 @@ static FrtQuery *frt_rq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
|
500
472
|
return (FrtQuery *)csq;
|
501
473
|
}
|
502
474
|
|
503
|
-
static unsigned long long frt_rq_hash(FrtQuery *self)
|
504
|
-
{
|
475
|
+
static unsigned long long frt_rq_hash(FrtQuery *self) {
|
505
476
|
return range_hash(RQ(self)->range);
|
506
477
|
}
|
507
478
|
|
@@ -509,33 +480,36 @@ static int frt_rq_eq(FrtQuery *self, FrtQuery *o) {
|
|
509
480
|
return range_eq(RQ(self)->range, RQ(o)->range);
|
510
481
|
}
|
511
482
|
|
512
|
-
FrtQuery *frt_rq_new_less(
|
483
|
+
FrtQuery *frt_rq_new_less(ID field, const char *upper_term, bool include_upper) {
|
513
484
|
return frt_rq_new(field, NULL, upper_term, false, include_upper);
|
514
485
|
}
|
515
486
|
|
516
|
-
FrtQuery *frt_rq_new_more(
|
487
|
+
FrtQuery *frt_rq_new_more(ID field, const char *lower_term, bool include_lower) {
|
517
488
|
return frt_rq_new(field, lower_term, NULL, include_lower, false);
|
518
489
|
}
|
519
490
|
|
520
|
-
FrtQuery *
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
self
|
527
|
-
|
528
|
-
|
529
|
-
self->
|
530
|
-
self->
|
531
|
-
self->
|
532
|
-
self->
|
533
|
-
self->
|
534
|
-
self->destroy_i = &frt_rq_destroy;
|
535
|
-
self->create_weight_i = &frt_q_create_weight_unsup;
|
491
|
+
FrtQuery *frt_rq_alloc(void) {
|
492
|
+
return frt_q_new(FrtRangeQuery);
|
493
|
+
}
|
494
|
+
|
495
|
+
FrtQuery *frt_rq_init(FrtQuery *self, ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
496
|
+
FrtRange *range = range_new(field, lower_term, upper_term, include_lower, include_upper);
|
497
|
+
RQ(self)->range = range;
|
498
|
+
self->type = RANGE_QUERY;
|
499
|
+
self->rewrite = &frt_rq_rewrite;
|
500
|
+
self->to_s = &frt_rq_to_s;
|
501
|
+
self->hash = &frt_rq_hash;
|
502
|
+
self->eq = &frt_rq_eq;
|
503
|
+
self->destroy_i = &frt_rq_destroy;
|
504
|
+
self->create_weight_i = &frt_q_create_weight_unsup;
|
536
505
|
return self;
|
537
506
|
}
|
538
507
|
|
508
|
+
FrtQuery *frt_rq_new(ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
509
|
+
FrtQuery *self = frt_rq_alloc();
|
510
|
+
return frt_rq_init(self, field, lower_term, upper_term, include_lower, include_upper);
|
511
|
+
}
|
512
|
+
|
539
513
|
/*****************************************************************************
|
540
514
|
*
|
541
515
|
* TypedRangeQuery
|
@@ -559,10 +533,8 @@ for (i = tv->term_cnt - 1; i >= 0; i--) {\
|
|
559
533
|
}\
|
560
534
|
}\
|
561
535
|
|
562
|
-
static FrtMatchVector *trq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv,
|
563
|
-
|
564
|
-
{
|
565
|
-
Range *range = RQ(((FrtConstantScoreQuery *)self)->original)->range;
|
536
|
+
static FrtMatchVector *trq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtTermVector *tv) {
|
537
|
+
FrtRange *range = RQ(((FrtConstantScoreQuery *)self)->original)->range;
|
566
538
|
if (tv->field == range->field) {
|
567
539
|
double lnum = 0.0, unum = 0.0;
|
568
540
|
int len = 0;
|
@@ -615,21 +587,17 @@ static FrtMatchVector *trq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv,
|
|
615
587
|
/* should never happen. Error should have been rb_raised */
|
616
588
|
assert(false);
|
617
589
|
}
|
618
|
-
|
619
|
-
}
|
620
|
-
else {
|
590
|
+
} else {
|
621
591
|
return rq_get_matchv_i(self, mv, tv);
|
622
592
|
}
|
623
593
|
}
|
624
594
|
return mv;
|
625
595
|
}
|
626
596
|
|
627
|
-
static FrtQuery *frt_trq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
628
|
-
{
|
597
|
+
static FrtQuery *frt_trq_rewrite(FrtQuery *self, FrtIndexReader *ir) {
|
629
598
|
FrtQuery *csq;
|
630
|
-
|
631
|
-
FrtFilter *filter = frt_trfilt_new(r->field, r->lower_term, r->upper_term,
|
632
|
-
r->include_lower, r->include_upper);
|
599
|
+
FrtRange *r = RQ(self)->range;
|
600
|
+
FrtFilter *filter = frt_trfilt_new(r->field, r->lower_term, r->upper_term, r->include_lower, r->include_upper);
|
633
601
|
(void)ir;
|
634
602
|
csq = frt_csq_new_nr(filter);
|
635
603
|
((FrtConstantScoreQuery *)csq)->original = self;
|
@@ -637,29 +605,32 @@ static FrtQuery *frt_trq_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
|
637
605
|
return (FrtQuery *)csq;
|
638
606
|
}
|
639
607
|
|
640
|
-
FrtQuery *frt_trq_new_less(
|
608
|
+
FrtQuery *frt_trq_new_less(ID field, const char *upper_term, bool include_upper) {
|
641
609
|
return frt_trq_new(field, NULL, upper_term, false, include_upper);
|
642
610
|
}
|
643
611
|
|
644
|
-
FrtQuery *frt_trq_new_more(
|
612
|
+
FrtQuery *frt_trq_new_more(ID field, const char *lower_term, bool include_lower) {
|
645
613
|
return frt_trq_new(field, lower_term, NULL, include_lower, false);
|
646
614
|
}
|
647
615
|
|
648
|
-
FrtQuery *
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
self
|
655
|
-
|
656
|
-
|
657
|
-
self->
|
658
|
-
self->
|
659
|
-
self->
|
660
|
-
self->
|
661
|
-
self->
|
662
|
-
self->destroy_i = &frt_rq_destroy;
|
663
|
-
self->create_weight_i = &frt_q_create_weight_unsup;
|
616
|
+
FrtQuery *frt_trq_alloc(void) {
|
617
|
+
return frt_q_new(FrtRangeQuery);
|
618
|
+
}
|
619
|
+
|
620
|
+
FrtQuery *frt_trq_init(FrtQuery *self, ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
621
|
+
FrtRange *range = trange_new(field, lower_term, upper_term, include_lower, include_upper);
|
622
|
+
RQ(self)->range = range;
|
623
|
+
self->type = TYPED_RANGE_QUERY;
|
624
|
+
self->rewrite = &frt_trq_rewrite;
|
625
|
+
self->to_s = &frt_rq_to_s;
|
626
|
+
self->hash = &frt_rq_hash;
|
627
|
+
self->eq = &frt_rq_eq;
|
628
|
+
self->destroy_i = &frt_rq_destroy;
|
629
|
+
self->create_weight_i = &frt_q_create_weight_unsup;
|
664
630
|
return self;
|
665
631
|
}
|
632
|
+
|
633
|
+
FrtQuery *frt_trq_new(ID field, const char *lower_term, const char *upper_term, bool include_lower, bool include_upper) {
|
634
|
+
FrtQuery *self = frt_trq_alloc();
|
635
|
+
return frt_trq_init(self, field, lower_term, upper_term, include_lower, include_upper);
|
636
|
+
}
|