ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
data/ext/q_prefix.c
CHANGED
@@ -7,83 +7,94 @@
|
|
7
7
|
*
|
8
8
|
****************************************************************************/
|
9
9
|
|
10
|
-
|
10
|
+
#define PfxQ(query) ((PrefixQuery *)(query))
|
11
|
+
|
12
|
+
static char *prq_to_s(Query *self, const char *current_field)
|
11
13
|
{
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
14
|
+
char *buffer, *bptr;
|
15
|
+
const char *prefix = PfxQ(self)->prefix;
|
16
|
+
const char *field = PfxQ(self)->field;
|
17
|
+
size_t plen = strlen(prefix);
|
18
|
+
size_t flen = strlen(field);
|
19
|
+
|
20
|
+
bptr = buffer = ALLOC_N(char, plen + flen + 35);
|
21
|
+
|
22
|
+
if (strcmp(field, current_field) != 0) {
|
23
|
+
sprintf(bptr, "%s:", field);
|
24
|
+
bptr += flen + 1;
|
25
|
+
}
|
26
|
+
|
27
|
+
sprintf(bptr, "%s*", prefix);
|
28
|
+
bptr += plen + 1;
|
29
|
+
if (self->boost != 1.0) {
|
30
|
+
*bptr = '^';
|
31
|
+
dbl_to_s(++bptr, self->boost);
|
32
|
+
}
|
33
|
+
|
34
|
+
return buffer;
|
29
35
|
}
|
30
36
|
|
31
|
-
Query *prq_rewrite(Query *self, IndexReader *ir)
|
37
|
+
static Query *prq_rewrite(Query *self, IndexReader *ir)
|
32
38
|
{
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
39
|
+
const char *field = PfxQ(self)->field;
|
40
|
+
const int field_num = fis_get_field_num(ir->fis, field);
|
41
|
+
Query *volatile q = multi_tq_new_conf(field, MTQMaxTerms(self), 0.0);
|
42
|
+
q->boost = self->boost; /* set the boost */
|
43
|
+
|
44
|
+
if (field_num >= 0) {
|
45
|
+
const char *prefix = PfxQ(self)->prefix;
|
46
|
+
TermEnum *te = ir->terms_from(ir, field_num, prefix);
|
47
|
+
const char *term = te->curr_term;
|
48
|
+
size_t prefix_len = strlen(prefix);
|
49
|
+
|
50
|
+
TRY
|
51
|
+
do {
|
52
|
+
if (strncmp(term, prefix, prefix_len) != 0) {
|
53
|
+
break;
|
54
|
+
}
|
55
|
+
multi_tq_add_term(q, term); /* found a match */
|
56
|
+
} while (te->next(te));
|
57
|
+
XFINALLY
|
58
|
+
te->close(te);
|
59
|
+
XENDTRY
|
60
|
+
}
|
61
|
+
|
62
|
+
return q;
|
57
63
|
}
|
58
64
|
|
59
65
|
static void prq_destroy(Query *self)
|
60
66
|
{
|
61
|
-
|
62
|
-
|
67
|
+
free(PfxQ(self)->field);
|
68
|
+
free(PfxQ(self)->prefix);
|
69
|
+
q_destroy_i(self);
|
63
70
|
}
|
64
71
|
|
65
|
-
static
|
72
|
+
static ulong prq_hash(Query *self)
|
66
73
|
{
|
67
|
-
|
74
|
+
return str_hash(PfxQ(self)->field) ^ str_hash(PfxQ(self)->prefix);
|
68
75
|
}
|
69
76
|
|
70
77
|
static int prq_eq(Query *self, Query *o)
|
71
78
|
{
|
72
|
-
|
79
|
+
return (strcmp(PfxQ(self)->prefix, PfxQ(o)->prefix) == 0)
|
80
|
+
&& (strcmp(PfxQ(self)->field, PfxQ(o)->field) == 0);
|
73
81
|
}
|
74
82
|
|
75
|
-
Query *
|
83
|
+
Query *prefixq_new(const char *field, const char *prefix)
|
76
84
|
{
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
85
|
+
Query *self = q_new(PrefixQuery);
|
86
|
+
|
87
|
+
PfxQ(self)->field = estrdup(field);
|
88
|
+
PfxQ(self)->prefix = estrdup(prefix);
|
89
|
+
MTQMaxTerms(self) = PREFIX_QUERY_MAX_TERMS;
|
90
|
+
|
91
|
+
self->type = PREFIX_QUERY;
|
92
|
+
self->rewrite = &prq_rewrite;
|
93
|
+
self->to_s = &prq_to_s;
|
94
|
+
self->hash = &prq_hash;
|
95
|
+
self->eq = &prq_eq;
|
96
|
+
self->destroy_i = &prq_destroy;
|
97
|
+
self->create_weight_i = &q_create_weight_unsup;
|
98
|
+
|
99
|
+
return self;
|
89
100
|
}
|
data/ext/q_range.c
CHANGED
@@ -1,120 +1,134 @@
|
|
1
1
|
#include <string.h>
|
2
2
|
#include "search.h"
|
3
3
|
|
4
|
-
static char * const NIL_BOUNDS_ERROR_MSG = "At least one value must be non-nil";
|
5
|
-
static char * const LOWER_BOUND_ERROR_MSG = "The lower bound must be non-nil to be inclusive";
|
6
|
-
static char * const UPPER_BOUND_ERROR_MSG = "The upper bound must be non-nil to be inclusive";
|
7
|
-
static char * const BOUND_ORDER_ERROR_MSG = "The lower bound must less than the upper bound";
|
8
|
-
|
9
4
|
/*****************************************************************************
|
10
5
|
*
|
11
6
|
* Range
|
12
7
|
*
|
13
8
|
*****************************************************************************/
|
14
9
|
|
15
|
-
|
10
|
+
typedef struct Range
|
16
11
|
{
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
12
|
+
char *field;
|
13
|
+
char *lower_term;
|
14
|
+
char *upper_term;
|
15
|
+
bool include_lower : 1;
|
16
|
+
bool include_upper : 1;
|
17
|
+
} Range;
|
18
|
+
|
19
|
+
static char *range_to_s(Range *range, const char *field, float boost)
|
20
|
+
{
|
21
|
+
char *buffer, *b;
|
22
|
+
size_t flen, llen, ulen;
|
23
|
+
|
24
|
+
flen = strlen(range->field);
|
25
|
+
llen = range->lower_term ? strlen(range->lower_term) : 0;
|
26
|
+
ulen = range->upper_term ? strlen(range->upper_term) : 0;
|
27
|
+
buffer = ALLOC_N(char, flen + llen + ulen + 40);
|
28
|
+
b = buffer;
|
29
|
+
|
30
|
+
if (strcmp(field, range->field)) {
|
31
|
+
memcpy(buffer, range->field, flen * sizeof(char));
|
32
|
+
b += flen;
|
33
|
+
*b = ':';
|
34
|
+
b++;
|
35
|
+
}
|
36
|
+
|
37
|
+
if (range->lower_term) {
|
38
|
+
*b = range->include_lower ? '[' : '{';
|
39
|
+
b++;
|
40
|
+
memcpy(b, range->lower_term, llen);
|
41
|
+
b += llen;
|
42
|
+
} else {
|
43
|
+
*b = '<';
|
44
|
+
b++;
|
45
|
+
}
|
46
|
+
|
47
|
+
if (range->upper_term && range->lower_term) {
|
48
|
+
*b = ' '; b++;
|
49
|
+
}
|
50
|
+
|
51
|
+
if (range->upper_term) {
|
52
|
+
memcpy(b, range->upper_term, ulen);
|
53
|
+
b += ulen;
|
54
|
+
*b = range->include_upper ? ']' : '}';
|
55
|
+
b++;
|
56
|
+
} else {
|
57
|
+
*b = '>';
|
58
|
+
b++;
|
59
|
+
}
|
60
|
+
|
61
|
+
*b = 0;
|
62
|
+
if (boost != 1.0) {
|
63
|
+
*b = '^';
|
64
|
+
dbl_to_s(b + 1, boost);
|
65
|
+
}
|
66
|
+
return buffer;
|
63
67
|
}
|
64
68
|
|
65
|
-
void range_destroy(
|
69
|
+
static void range_destroy(Range *range)
|
66
70
|
{
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
free(range);
|
71
|
+
free(range->field);
|
72
|
+
free(range->lower_term);
|
73
|
+
free(range->upper_term);
|
74
|
+
free(range);
|
72
75
|
}
|
73
76
|
|
74
|
-
static
|
77
|
+
static ulong range_hash(Range *filt)
|
75
78
|
{
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
79
|
+
return filt->include_lower | (filt->include_upper << 1)
|
80
|
+
| ((str_hash(filt->field)
|
81
|
+
^ (filt->lower_term ? str_hash(filt->lower_term) : 0)
|
82
|
+
^ (filt->upper_term ? str_hash(filt->upper_term) : 0)) << 2);
|
80
83
|
}
|
81
84
|
|
82
|
-
static
|
85
|
+
static int str_eq(char *s1, char *s2)
|
83
86
|
{
|
84
|
-
|
87
|
+
return (s1 && s2 && (strcmp(s1, s2) == 0)) || (s1 == s2);
|
85
88
|
}
|
86
89
|
|
87
|
-
static
|
90
|
+
static int range_eq(Range *filt, Range *o)
|
88
91
|
{
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
92
|
+
return (str_eq(filt->field, o->field)
|
93
|
+
&& str_eq(filt->lower_term, o->lower_term)
|
94
|
+
&& str_eq(filt->upper_term, o->upper_term)
|
95
|
+
&& (filt->include_lower == o->include_lower)
|
96
|
+
&& (filt->include_upper == o->include_upper));
|
94
97
|
}
|
95
98
|
|
96
|
-
Range *
|
97
|
-
|
99
|
+
Range *range_new(const char *field, const char *lower_term,
|
100
|
+
const char *upper_term, bool include_lower,
|
101
|
+
bool include_upper)
|
98
102
|
{
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
103
|
+
Range *range;
|
104
|
+
|
105
|
+
if (!lower_term && !upper_term) {
|
106
|
+
RAISE(ARG_ERROR, "Nil bounds for range. A range must include either "
|
107
|
+
"lower bound or an upper bound");
|
108
|
+
}
|
109
|
+
if (include_lower && !lower_term) {
|
110
|
+
RAISE(ARG_ERROR, "Lower bound must be non-nil to be inclusive. That "
|
111
|
+
"is, if you specify :include_lower => true when you create a "
|
112
|
+
"range you must include a :lower_term");
|
113
|
+
}
|
114
|
+
if (include_upper && !upper_term) {
|
115
|
+
RAISE(ARG_ERROR, "Upper bound must be non-nil to be inclusive. That "
|
116
|
+
"is, if you specify :include_upper => true when you create a "
|
117
|
+
"range you must include a :upper_term");
|
118
|
+
}
|
119
|
+
if (upper_term && lower_term && (strcmp(upper_term, lower_term) < 0)) {
|
120
|
+
RAISE(ARG_ERROR, "Upper bound must be greater than lower bound. "
|
121
|
+
"\"%s\" < \"%s\"", upper_term, lower_term);
|
122
|
+
}
|
123
|
+
|
124
|
+
range = ALLOC(Range);
|
125
|
+
|
126
|
+
range->field = estrdup((char *)field);
|
127
|
+
range->lower_term = lower_term ? estrdup(lower_term) : NULL;
|
128
|
+
range->upper_term = upper_term ? estrdup(upper_term) : NULL;
|
129
|
+
range->include_lower = include_lower;
|
130
|
+
range->include_upper = include_upper;
|
131
|
+
return range;
|
118
132
|
}
|
119
133
|
|
120
134
|
/***************************************************************************
|
@@ -123,109 +137,112 @@ Range *range_create(const char *field, char *lower_term, char *upper_term,
|
|
123
137
|
*
|
124
138
|
***************************************************************************/
|
125
139
|
|
126
|
-
|
140
|
+
typedef struct RangeFilter
|
141
|
+
{
|
142
|
+
Filter super;
|
143
|
+
Range *range;
|
144
|
+
} RangeFilter;
|
145
|
+
|
146
|
+
#define RF(filt) ((RangeFilter *)(filt))
|
147
|
+
|
148
|
+
static void rfilt_destroy_i(Filter *filt)
|
127
149
|
{
|
128
|
-
|
129
|
-
|
150
|
+
range_destroy(RF(filt)->range);
|
151
|
+
filt_destroy_i(filt);
|
130
152
|
}
|
131
153
|
|
132
|
-
char *rfilt_to_s(Filter *
|
154
|
+
static char *rfilt_to_s(Filter *filt)
|
133
155
|
{
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
return rfstr;
|
156
|
+
char *rstr = range_to_s(RF(filt)->range, "", 1.0);
|
157
|
+
char *rfstr = strfmt("RangeFilter< %s >", rstr);
|
158
|
+
free(rstr);
|
159
|
+
return rfstr;
|
139
160
|
}
|
140
161
|
|
141
|
-
BitVector *
|
162
|
+
static BitVector *rfilt_get_bv_i(Filter *filt, IndexReader *ir)
|
142
163
|
{
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
if (!range->include_lower) // make adjustments to set to exclusive
|
164
|
-
check_lower = true;
|
165
|
-
|
166
|
-
tde = ir->term_docs(ir);
|
167
|
-
tb = te->tb_curr;
|
168
|
-
term.text = tb->text;
|
169
|
-
do {
|
170
|
-
if (tb && strcmp(tb->field, field) == 0) {
|
171
|
-
if (!check_lower || lower_term == EMPTY_STRING ||
|
172
|
-
strcmp(tb->text, lower_term) > 0) {
|
173
|
-
check_lower = false;
|
174
|
-
if (upper_term) {
|
175
|
-
int compare = strcmp(upper_term, tb->text);
|
176
|
-
/* if beyond the upper term, or is exclusive and
|
177
|
-
* this is equal to the upper term, break out */
|
178
|
-
if ((compare < 0) ||
|
179
|
-
(!include_upper && compare==0)) {
|
180
|
-
break;
|
181
|
-
}
|
182
|
-
}
|
183
|
-
/* we have a good term, find the docs */
|
184
|
-
/* text is already pointing to term buffer text */
|
185
|
-
term.field = tb->field;
|
186
|
-
tde->seek(tde, &term);
|
187
|
-
while (tde->next(tde)) {
|
188
|
-
bv_set(bv, tde->doc_num(tde));
|
189
|
-
//printf("Setting %d\n", tde->doc_num(tde));
|
164
|
+
BitVector *bv = bv_new_capa(ir->max_doc(ir));
|
165
|
+
Range *range = RF(filt)->range;
|
166
|
+
FieldInfo *fi = fis_get_field(ir->fis, range->field);
|
167
|
+
/* the field info exists we need to add docs to the bit vector, otherwise
|
168
|
+
* we just return an empty bit vector */
|
169
|
+
if (fi) {
|
170
|
+
const char *lower_term =
|
171
|
+
range->lower_term ? range->lower_term : EMPTY_STRING;
|
172
|
+
const char *upper_term = range->upper_term;
|
173
|
+
const bool include_upper = range->include_upper;
|
174
|
+
const int field_num = fi->number;
|
175
|
+
char *term;
|
176
|
+
TermEnum* te;
|
177
|
+
TermDocEnum *tde;
|
178
|
+
bool check_lower;
|
179
|
+
|
180
|
+
te = ir->terms(ir, field_num);
|
181
|
+
if (te->skip_to(te, lower_term) == NULL) {
|
182
|
+
te->close(te);
|
183
|
+
return bv;
|
190
184
|
}
|
191
|
-
|
192
|
-
|
193
|
-
|
185
|
+
|
186
|
+
check_lower = !(range->include_lower || (lower_term == EMPTY_STRING));
|
187
|
+
|
188
|
+
tde = ir->term_docs(ir);
|
189
|
+
term = te->curr_term;
|
190
|
+
do {
|
191
|
+
if (!check_lower
|
192
|
+
|| (strcmp(term, lower_term) > 0)) {
|
193
|
+
check_lower = false;
|
194
|
+
if (upper_term) {
|
195
|
+
int compare = strcmp(upper_term, term);
|
196
|
+
/* Break if upper term is greater than or equal to upper
|
197
|
+
* term and include_upper is false or ther term is fully
|
198
|
+
* greater than upper term. This is optimized so that only
|
199
|
+
* one check is done except in last check or two */
|
200
|
+
if ((compare <= 0)
|
201
|
+
&& (!include_upper || (compare < 0))) {
|
202
|
+
break;
|
203
|
+
}
|
204
|
+
}
|
205
|
+
/* we have a good term, find the docs */
|
206
|
+
/* text is already pointing to term buffer text */
|
207
|
+
tde->seek_te(tde, te);
|
208
|
+
while (tde->next(tde)) {
|
209
|
+
bv_set(bv, tde->doc_num(tde));
|
210
|
+
/* printf("Setting %d\n", tde->doc_num(tde)); */
|
211
|
+
}
|
212
|
+
}
|
213
|
+
} while (te->next(te));
|
214
|
+
|
215
|
+
tde->close(tde);
|
216
|
+
te->close(te);
|
194
217
|
}
|
195
|
-
} while (te->next(te));
|
196
|
-
|
197
|
-
tde->close(tde);
|
198
|
-
te->close(te);
|
199
|
-
term_destroy(term_from);
|
200
218
|
|
201
|
-
|
219
|
+
return bv;
|
202
220
|
}
|
203
221
|
|
204
|
-
|
222
|
+
static ulong rfilt_hash(Filter *filt)
|
205
223
|
{
|
206
|
-
|
224
|
+
return range_hash(RF(filt)->range);
|
207
225
|
}
|
208
226
|
|
209
|
-
int rfilt_eq(Filter *
|
227
|
+
static int rfilt_eq(Filter *filt, Filter *o)
|
210
228
|
{
|
211
|
-
|
229
|
+
return range_eq(RF(filt)->range, RF(o)->range);
|
212
230
|
}
|
213
231
|
|
214
|
-
Filter *
|
215
|
-
|
232
|
+
Filter *rfilt_new(const char *field,
|
233
|
+
const char *lower_term, const char *upper_term,
|
234
|
+
bool include_lower, bool include_upper)
|
216
235
|
{
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
self->destroy = &rfilt_destroy;
|
228
|
-
return self;
|
236
|
+
Filter *filt = filt_new(RangeFilter);
|
237
|
+
RF(filt)->range = range_new(field, lower_term, upper_term,
|
238
|
+
include_lower, include_upper);
|
239
|
+
|
240
|
+
filt->get_bv_i = &rfilt_get_bv_i;
|
241
|
+
filt->hash = &rfilt_hash;
|
242
|
+
filt->eq = &rfilt_eq;
|
243
|
+
filt->to_s = &rfilt_to_s;
|
244
|
+
filt->destroy_i = &rfilt_destroy_i;
|
245
|
+
return filt;
|
229
246
|
}
|
230
247
|
|
231
248
|
/*****************************************************************************
|
@@ -234,61 +251,69 @@ Filter *rfilt_create(const char *field, char *lower_term, char *upper_term,
|
|
234
251
|
*
|
235
252
|
*****************************************************************************/
|
236
253
|
|
237
|
-
|
254
|
+
#define RQ(query) ((RangeQuery *)(query))
|
255
|
+
typedef struct RangeQuery
|
256
|
+
{
|
257
|
+
Query f;
|
258
|
+
Range *range;
|
259
|
+
} RangeQuery;
|
260
|
+
|
261
|
+
static char *rq_to_s(Query *self, const char *field)
|
238
262
|
{
|
239
|
-
|
240
|
-
return range_to_s(range, field, self->boost);
|
263
|
+
return range_to_s(RQ(self)->range, field, self->boost);
|
241
264
|
}
|
242
265
|
|
243
|
-
void rq_destroy(Query *self)
|
266
|
+
static void rq_destroy(Query *self)
|
244
267
|
{
|
245
|
-
|
246
|
-
|
268
|
+
range_destroy(RQ(self)->range);
|
269
|
+
q_destroy_i(self);
|
247
270
|
}
|
248
271
|
|
249
|
-
Query *rq_rewrite(Query *self, IndexReader *ir)
|
272
|
+
static Query *rq_rewrite(Query *self, IndexReader *ir)
|
250
273
|
{
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
274
|
+
Range *r = RQ(self)->range;
|
275
|
+
Filter *filter = rfilt_new(r->field, r->lower_term, r->upper_term,
|
276
|
+
r->include_lower, r->include_upper);
|
277
|
+
(void)ir;
|
278
|
+
return csq_new_nr(filter);
|
255
279
|
}
|
256
280
|
|
257
|
-
static
|
281
|
+
static ulong rq_hash(Query *self)
|
258
282
|
{
|
259
|
-
|
283
|
+
return range_hash(RQ(self)->range);
|
260
284
|
}
|
261
285
|
|
262
286
|
static int rq_eq(Query *self, Query *o)
|
263
287
|
{
|
264
|
-
|
288
|
+
return range_eq(RQ(self)->range, RQ(o)->range);
|
265
289
|
}
|
266
290
|
|
267
|
-
Query *
|
291
|
+
Query *rq_new_less(const char *field, const char *upper_term,
|
292
|
+
bool include_upper)
|
268
293
|
{
|
269
|
-
|
294
|
+
return rq_new(field, NULL, upper_term, false, include_upper);
|
270
295
|
}
|
271
296
|
|
272
|
-
Query *
|
297
|
+
Query *rq_new_more(const char *field, const char *lower_term,
|
298
|
+
bool include_lower)
|
273
299
|
{
|
274
|
-
|
300
|
+
return rq_new(field, lower_term, NULL, include_lower, false);
|
275
301
|
}
|
276
302
|
|
277
|
-
Query *
|
278
|
-
|
303
|
+
Query *rq_new(const char *field, const char *lower_term,
|
304
|
+
const char *upper_term, bool include_lower, bool include_upper)
|
279
305
|
{
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
return self;
|
306
|
+
Query *self = q_new(RangeQuery);
|
307
|
+
|
308
|
+
RQ(self)->range = range_new(field, lower_term, upper_term,
|
309
|
+
include_lower, include_upper);
|
310
|
+
|
311
|
+
self->type = RANGE_QUERY;
|
312
|
+
self->rewrite = &rq_rewrite;
|
313
|
+
self->to_s = &rq_to_s;
|
314
|
+
self->hash = &rq_hash;
|
315
|
+
self->eq = &rq_eq;
|
316
|
+
self->destroy_i = &rq_destroy;
|
317
|
+
self->create_weight_i = &q_create_weight_unsup;
|
318
|
+
return self;
|
294
319
|
}
|