ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
data/ext/search.h
CHANGED
@@ -9,9 +9,6 @@ typedef struct Scorer Scorer;
|
|
9
9
|
#include "bitvector.h"
|
10
10
|
#include "similarity.h"
|
11
11
|
|
12
|
-
#define term_set_create() \
|
13
|
-
hs_create((hash_ft)&term_hash, (eq_ft)&term_eq, (free_ft)&term_destroy)
|
14
|
-
|
15
12
|
/***************************************************************************
|
16
13
|
*
|
17
14
|
* Explanation
|
@@ -24,15 +21,45 @@ typedef struct Explanation
|
|
24
21
|
float value;
|
25
22
|
char *description;
|
26
23
|
struct Explanation **details;
|
27
|
-
int dcnt;
|
28
|
-
int dcapa;
|
29
24
|
} Explanation;
|
25
|
+
|
26
|
+
extern Explanation *expl_new(float value, const char *description, ...);
|
27
|
+
extern void expl_destroy(Explanation *expl);
|
28
|
+
extern Explanation *expl_add_detail(Explanation *expl, Explanation *detail);
|
29
|
+
extern char *expl_to_s_depth(Explanation *expl, int depth);
|
30
|
+
extern char *expl_to_html(Explanation *expl);
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
32
|
+
#define expl_to_s(expl) expl_to_s_depth(expl, 0)
|
33
|
+
|
34
|
+
/***************************************************************************
|
35
|
+
*
|
36
|
+
* Highlighter
|
37
|
+
*
|
38
|
+
***************************************************************************/
|
39
|
+
|
40
|
+
typedef struct MatchRange
|
41
|
+
{
|
42
|
+
int start;
|
43
|
+
int end;
|
44
|
+
int start_offset;
|
45
|
+
int end_offset;
|
46
|
+
double score;
|
47
|
+
} MatchRange;
|
48
|
+
|
49
|
+
#define MATCH_VECTOR_INIT_CAPA 8
|
50
|
+
typedef struct MatchVector
|
51
|
+
{
|
52
|
+
int size;
|
53
|
+
int capa;
|
54
|
+
MatchRange *matches;
|
55
|
+
} MatchVector;
|
56
|
+
|
57
|
+
extern MatchVector *matchv_new();
|
58
|
+
extern MatchVector *matchv_add(MatchVector *mp, int start, int end);
|
59
|
+
extern MatchVector *matchv_sort(MatchVector *self);
|
60
|
+
extern void matchv_destroy(MatchVector *self);
|
61
|
+
extern MatchVector *matchv_compact(MatchVector *self);
|
62
|
+
extern MatchVector *matchv_compact_with_breaks(MatchVector *self);
|
36
63
|
|
37
64
|
/***************************************************************************
|
38
65
|
*
|
@@ -46,8 +73,6 @@ typedef struct Hit
|
|
46
73
|
float score;
|
47
74
|
} Hit;
|
48
75
|
|
49
|
-
extern bool hit_less_than(void *p1, void *p2);
|
50
|
-
|
51
76
|
/***************************************************************************
|
52
77
|
*
|
53
78
|
* TopDocs
|
@@ -59,9 +84,10 @@ typedef struct TopDocs
|
|
59
84
|
int total_hits;
|
60
85
|
int size;
|
61
86
|
Hit **hits;
|
87
|
+
float max_score;
|
62
88
|
} TopDocs;
|
63
89
|
|
64
|
-
extern TopDocs *
|
90
|
+
extern TopDocs *td_new(int total_hits, int size, Hit **hits, float max_score);
|
65
91
|
extern void td_destroy(TopDocs *td);
|
66
92
|
extern char *td_to_s(TopDocs *td);
|
67
93
|
|
@@ -73,22 +99,23 @@ extern char *td_to_s(TopDocs *td);
|
|
73
99
|
|
74
100
|
typedef struct Filter
|
75
101
|
{
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
102
|
+
char *name;
|
103
|
+
HashTable *cache;
|
104
|
+
BitVector *(*get_bv_i)(struct Filter *self, IndexReader *ir);
|
105
|
+
char *(*to_s)(struct Filter *self);
|
106
|
+
ulong (*hash)(struct Filter *self);
|
107
|
+
int (*eq)(struct Filter *self, struct Filter *o);
|
108
|
+
void (*destroy_i)(struct Filter *self);
|
109
|
+
int ref_cnt;
|
84
110
|
} Filter;
|
85
111
|
|
86
|
-
|
87
|
-
extern
|
88
|
-
extern BitVector *filt_get_bv(Filter *
|
89
|
-
extern void
|
90
|
-
extern
|
91
|
-
extern
|
112
|
+
#define filt_new(type) filt_create(sizeof(type), #type)
|
113
|
+
extern Filter *filt_create(size_t size, const char *name);
|
114
|
+
extern BitVector *filt_get_bv(Filter *filt, IndexReader *ir);
|
115
|
+
extern void filt_destroy_i(Filter *filt);
|
116
|
+
extern void filt_deref(Filter *filt);
|
117
|
+
extern ulong filt_hash(Filter *filt);
|
118
|
+
extern int filt_eq(Filter *filt, Filter *o);
|
92
119
|
|
93
120
|
/***************************************************************************
|
94
121
|
*
|
@@ -96,8 +123,9 @@ extern int filt_eq(Filter *self, Filter *o);
|
|
96
123
|
*
|
97
124
|
***************************************************************************/
|
98
125
|
|
99
|
-
extern Filter *
|
100
|
-
|
126
|
+
extern Filter *rfilt_new(const char *field,
|
127
|
+
const char *lower_term, const char *upper_term,
|
128
|
+
bool include_lower, bool include_upper);
|
101
129
|
|
102
130
|
/***************************************************************************
|
103
131
|
*
|
@@ -105,12 +133,8 @@ extern Filter *rfilt_create(const char *field, char *lower_term,
|
|
105
133
|
*
|
106
134
|
***************************************************************************/
|
107
135
|
|
108
|
-
|
109
|
-
|
110
|
-
Query *query;
|
111
|
-
} QueryFilter;
|
112
|
-
|
113
|
-
extern Filter *qfilt_create(Query *query);
|
136
|
+
extern Filter *qfilt_new(Query *query);
|
137
|
+
extern Filter *qfilt_new_nr(Query *query);
|
114
138
|
|
115
139
|
/***************************************************************************
|
116
140
|
*
|
@@ -120,12 +144,11 @@ extern Filter *qfilt_create(Query *query);
|
|
120
144
|
|
121
145
|
struct Weight
|
122
146
|
{
|
123
|
-
|
124
|
-
float
|
125
|
-
float
|
126
|
-
float
|
127
|
-
|
128
|
-
Query *query;
|
147
|
+
float value;
|
148
|
+
float qweight;
|
149
|
+
float qnorm;
|
150
|
+
float idf;
|
151
|
+
Query *query;
|
129
152
|
Similarity *similarity;
|
130
153
|
Query *(*get_query)(Weight *self);
|
131
154
|
float (*get_value)(Weight *self);
|
@@ -137,78 +160,26 @@ struct Weight
|
|
137
160
|
void (*destroy)(Weight *self);
|
138
161
|
};
|
139
162
|
|
140
|
-
|
163
|
+
#define w_new(type, query) w_create(sizeof(type), query)
|
164
|
+
extern Weight *w_create(size_t size, Query *query);
|
141
165
|
extern void w_destroy(Weight *self);
|
142
|
-
|
143
166
|
extern Query *w_get_query(Weight *self);
|
144
167
|
extern float w_get_value(Weight *self);
|
145
168
|
extern float w_sum_of_squared_weights(Weight *self);
|
146
169
|
extern void w_normalize(Weight *self, float normalization_factor);
|
147
170
|
|
148
|
-
/***************************************************************************
|
149
|
-
*
|
150
|
-
* TermWeight
|
151
|
-
*
|
152
|
-
***************************************************************************/
|
153
|
-
|
154
|
-
extern Weight *tw_create(Query *query, Searcher *searcher);
|
155
|
-
|
156
|
-
/***************************************************************************
|
157
|
-
*
|
158
|
-
* BooleanWeight
|
159
|
-
*
|
160
|
-
***************************************************************************/
|
161
|
-
|
162
|
-
typedef struct BooleanWeight {
|
163
|
-
Weight **weights;
|
164
|
-
int w_cnt;
|
165
|
-
} BooleanWeight;
|
166
|
-
|
167
|
-
extern Weight *bw_create(Query *query, Searcher *searcher);
|
168
|
-
|
169
|
-
/***************************************************************************
|
170
|
-
*
|
171
|
-
* PhraseWeight
|
172
|
-
*
|
173
|
-
***************************************************************************/
|
174
|
-
|
175
|
-
extern Weight *phw_create(Query *query, Searcher *searcher);
|
176
|
-
|
177
|
-
/***************************************************************************
|
178
|
-
*
|
179
|
-
* ConstantScoreWeight
|
180
|
-
*
|
181
|
-
***************************************************************************/
|
182
|
-
|
183
|
-
extern Weight *csw_create(Query *query, Searcher *searcher);
|
184
|
-
|
185
|
-
/***************************************************************************
|
186
|
-
*
|
187
|
-
* MatchAllWeight
|
188
|
-
*
|
189
|
-
***************************************************************************/
|
190
|
-
|
191
|
-
extern Weight *maw_create(Query *query, Searcher *searcher);
|
192
|
-
|
193
|
-
/***************************************************************************
|
194
|
-
*
|
195
|
-
* SpanWeight
|
196
|
-
*
|
197
|
-
***************************************************************************/
|
198
|
-
|
199
|
-
extern Weight *spanw_create(Query *query, Searcher *searcher);
|
200
|
-
|
201
171
|
/***************************************************************************
|
202
172
|
*
|
203
173
|
* Query
|
204
174
|
*
|
205
175
|
***************************************************************************/
|
206
176
|
|
207
|
-
enum QUERY_TYPE
|
177
|
+
enum QUERY_TYPE
|
178
|
+
{
|
208
179
|
TERM_QUERY,
|
180
|
+
MULTI_TERM_QUERY,
|
209
181
|
BOOLEAN_QUERY,
|
210
182
|
PHRASE_QUERY,
|
211
|
-
MULTI_PHRASE_QUERY,
|
212
183
|
CONSTANT_QUERY,
|
213
184
|
FILTERED_QUERY,
|
214
185
|
MATCH_ALL_QUERY,
|
@@ -225,57 +196,53 @@ enum QUERY_TYPE {
|
|
225
196
|
|
226
197
|
struct Query
|
227
198
|
{
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
bool destroy_all : 1;
|
199
|
+
int ref_cnt;
|
200
|
+
float boost;
|
201
|
+
Weight *weight;
|
202
|
+
Query *(*rewrite)(Query *self, IndexReader *ir);
|
203
|
+
void (*extract_terms)(Query *self, HashSet *terms);
|
204
|
+
Similarity *(*get_similarity)(Query *self, Searcher *searcher);
|
205
|
+
char *(*to_s)(Query *self, const char *field);
|
206
|
+
ulong (*hash)(Query *self);
|
207
|
+
int (*eq)(Query *self, Query *o);
|
208
|
+
void (*destroy_i)(Query *self);
|
209
|
+
Weight *(*create_weight_i)(Query *self, Searcher *searcher);
|
210
|
+
MatchVector *(*get_matchv_i)(Query *self, MatchVector *mv, TermVector *tv);
|
211
|
+
enum QUERY_TYPE type;
|
242
212
|
};
|
243
213
|
|
244
214
|
/* Internal Query Functions */
|
245
|
-
extern Query *q_create();
|
246
215
|
extern Similarity *q_get_similarity_i(Query *self, Searcher *searcher);
|
247
216
|
extern void q_destroy_i(Query *self);
|
248
217
|
extern Weight *q_create_weight_unsup(Query *self, Searcher *searcher);
|
249
218
|
|
250
|
-
|
251
219
|
extern void q_deref(Query *self);
|
220
|
+
extern const char *q_get_query_name(enum QUERY_TYPE type);
|
252
221
|
extern Weight *q_weight(Query *self, Searcher *searcher);
|
253
222
|
extern Query *q_combine(Query **queries, int q_cnt);
|
254
|
-
extern
|
223
|
+
extern ulong q_hash(Query *self);
|
255
224
|
extern int q_eq(Query *self, Query *o);
|
225
|
+
extern Query *q_create(size_t size);
|
226
|
+
#define q_new(type) q_create(sizeof(type))
|
256
227
|
|
257
228
|
/***************************************************************************
|
258
|
-
*
|
259
229
|
* TermQuery
|
260
|
-
*
|
261
230
|
***************************************************************************/
|
262
231
|
|
263
232
|
typedef struct TermQuery
|
264
233
|
{
|
265
|
-
|
234
|
+
Query super;
|
235
|
+
char *field;
|
236
|
+
char *term;
|
266
237
|
} TermQuery;
|
267
238
|
|
268
|
-
|
239
|
+
Query *tq_new(const char *field, const char *term);
|
269
240
|
|
270
241
|
/***************************************************************************
|
271
|
-
*
|
272
242
|
* BooleanQuery
|
273
|
-
*
|
274
243
|
***************************************************************************/
|
275
244
|
|
276
|
-
|
277
|
-
* BooleanClause
|
278
|
-
***************************************************************************/
|
245
|
+
/* *** BooleanClause *** */
|
279
246
|
|
280
247
|
enum BC_TYPE
|
281
248
|
{
|
@@ -284,22 +251,20 @@ enum BC_TYPE
|
|
284
251
|
BC_MUST_NOT
|
285
252
|
};
|
286
253
|
|
287
|
-
typedef struct BooleanClause
|
254
|
+
typedef struct BooleanClause
|
255
|
+
{
|
288
256
|
int ref_cnt;
|
289
257
|
Query *query;
|
290
|
-
Query *rewritten;
|
291
258
|
unsigned int occur : 4;
|
292
259
|
bool is_prohibited : 1;
|
293
260
|
bool is_required : 1;
|
294
261
|
} BooleanClause;
|
295
262
|
|
296
|
-
extern BooleanClause *
|
263
|
+
extern BooleanClause *bc_new(Query *query, enum BC_TYPE occur);
|
297
264
|
extern void bc_deref(BooleanClause *self);
|
298
|
-
extern void bc_set_occur(BooleanClause *self,
|
265
|
+
extern void bc_set_occur(BooleanClause *self, enum BC_TYPE occur);
|
299
266
|
|
300
|
-
|
301
|
-
* BooleanQuery
|
302
|
-
***************************************************************************/
|
267
|
+
/* *** BooleanQuery *** */
|
303
268
|
|
304
269
|
#define DEFAULT_MAX_CLAUSE_COUNT 1024
|
305
270
|
#define BOOLEAN_CLAUSES_START_CAPA 4
|
@@ -307,172 +272,184 @@ extern void bc_set_occur(BooleanClause *self, unsigned int occur);
|
|
307
272
|
|
308
273
|
typedef struct BooleanQuery
|
309
274
|
{
|
310
|
-
|
311
|
-
|
312
|
-
int
|
313
|
-
int
|
314
|
-
|
275
|
+
Query super;
|
276
|
+
bool coord_disabled;
|
277
|
+
int max_clause_cnt;
|
278
|
+
int clause_cnt;
|
279
|
+
int clause_capa;
|
280
|
+
float original_boost;
|
315
281
|
BooleanClause **clauses;
|
316
|
-
Similarity
|
282
|
+
Similarity *similarity;
|
317
283
|
} BooleanQuery;
|
318
284
|
|
319
|
-
extern Query *
|
285
|
+
extern Query *bq_new(bool coord_disabled);
|
320
286
|
extern BooleanClause *bq_add_query(Query *self, Query *sub_query,
|
321
|
-
|
287
|
+
enum BC_TYPE occur);
|
288
|
+
extern BooleanClause *bq_add_query_nr(Query *self, Query *sub_query,
|
289
|
+
enum BC_TYPE occur);
|
322
290
|
extern BooleanClause *bq_add_clause(Query *self, BooleanClause *bc);
|
291
|
+
extern BooleanClause *bq_add_clause_nr(Query *self, BooleanClause *bc);
|
323
292
|
|
324
293
|
/***************************************************************************
|
325
|
-
*
|
326
294
|
* PhraseQuery
|
327
|
-
*
|
328
295
|
***************************************************************************/
|
329
296
|
|
330
297
|
#define PHQ_INIT_CAPA 4
|
331
298
|
typedef struct PhraseQuery
|
332
299
|
{
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
int
|
338
|
-
|
300
|
+
Query super;
|
301
|
+
int slop;
|
302
|
+
char *field;
|
303
|
+
PhrasePosition *positions;
|
304
|
+
int pos_cnt;
|
305
|
+
int pos_capa;
|
339
306
|
} PhraseQuery;
|
340
307
|
|
341
|
-
extern Query *
|
342
|
-
extern void phq_add_term(Query *self,
|
308
|
+
extern Query *phq_new(const char *field);
|
309
|
+
extern void phq_add_term(Query *self, const char *term, int pos_inc);
|
310
|
+
extern void phq_add_term_abs(Query *self, const char *term, int position);
|
311
|
+
extern void phq_append_multi_term(Query *self, const char *term);
|
343
312
|
|
344
313
|
/***************************************************************************
|
345
|
-
*
|
346
|
-
* MultiPhraseQuery
|
347
|
-
*
|
314
|
+
* MultiTermQuery
|
348
315
|
***************************************************************************/
|
349
316
|
|
350
|
-
|
317
|
+
#define MULTI_TERM_QUERY_MAX_TERMS 256
|
318
|
+
typedef struct MultiTermQuery
|
351
319
|
{
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
int t_capa;
|
358
|
-
char *field;
|
359
|
-
} MultiPhraseQuery;
|
320
|
+
Query super;
|
321
|
+
char *field;
|
322
|
+
PriorityQueue *boosted_terms;
|
323
|
+
float min_boost;
|
324
|
+
} MultiTermQuery;
|
360
325
|
|
361
|
-
extern Query *
|
362
|
-
extern void
|
326
|
+
extern void multi_tq_add_term(Query *self, const char *term);
|
327
|
+
extern void multi_tq_add_term_boost(Query *self, const char *term, float boost);
|
328
|
+
extern Query *multi_tq_new(const char *field);
|
329
|
+
extern Query *multi_tq_new_conf(const char *field, int max_terms,
|
330
|
+
float min_boost);
|
331
|
+
|
332
|
+
#define MTQMaxTerms(query) (((MTQSubQuery *)(query))->max_terms)
|
333
|
+
typedef struct MTQSubQuery
|
334
|
+
{
|
335
|
+
Query super;
|
336
|
+
int max_terms;
|
337
|
+
} MTQSubQuery;
|
363
338
|
|
364
339
|
/***************************************************************************
|
365
|
-
*
|
366
340
|
* PrefixQuery
|
367
|
-
*
|
368
341
|
***************************************************************************/
|
369
342
|
|
370
|
-
|
343
|
+
#define PREFIX_QUERY_MAX_TERMS 256
|
344
|
+
|
345
|
+
|
346
|
+
typedef struct PrefixQuery
|
347
|
+
{
|
348
|
+
MTQSubQuery super;
|
349
|
+
char *field;
|
350
|
+
char *prefix;
|
351
|
+
} PrefixQuery;
|
352
|
+
|
353
|
+
extern Query *prefixq_new(const char *field, const char *prefix);
|
371
354
|
|
372
355
|
/***************************************************************************
|
373
|
-
*
|
374
356
|
* WildCardQuery
|
375
|
-
*
|
376
357
|
***************************************************************************/
|
377
358
|
|
378
359
|
#define WILD_CHAR '?'
|
379
360
|
#define WILD_STRING '*'
|
361
|
+
#define WILD_CARD_QUERY_MAX_TERMS 256
|
362
|
+
|
363
|
+
typedef struct WildCardQuery
|
364
|
+
{
|
365
|
+
MTQSubQuery super;
|
366
|
+
char *field;
|
367
|
+
char *pattern;
|
368
|
+
} WildCardQuery;
|
369
|
+
|
380
370
|
|
381
|
-
extern Query *
|
382
|
-
extern bool wc_match(char *pattern, char *text);
|
371
|
+
extern Query *wcq_new(const char *field, const char *pattern);
|
372
|
+
extern bool wc_match(const char *pattern, const char *text);
|
383
373
|
|
384
374
|
/***************************************************************************
|
385
|
-
*
|
386
375
|
* FuzzyQuery
|
387
|
-
*
|
388
376
|
***************************************************************************/
|
389
377
|
|
390
|
-
#define DEF_MIN_SIM 0.
|
378
|
+
#define DEF_MIN_SIM 0.5f
|
391
379
|
#define DEF_PRE_LEN 0
|
380
|
+
#define DEF_MAX_TERMS 256
|
392
381
|
#define TYPICAL_LONGEST_WORD 20
|
393
382
|
|
394
383
|
typedef struct FuzzyQuery
|
395
384
|
{
|
396
|
-
|
397
|
-
char
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
int
|
385
|
+
MTQSubQuery super;
|
386
|
+
char *field;
|
387
|
+
char *term;
|
388
|
+
const char *text; /* term text after prefix */
|
389
|
+
int text_len;
|
390
|
+
int pre_len;
|
391
|
+
float min_sim;
|
392
|
+
float scale_factor;
|
393
|
+
int max_distances[TYPICAL_LONGEST_WORD];
|
394
|
+
int *da;
|
405
395
|
} FuzzyQuery;
|
406
396
|
|
407
|
-
extern Query *
|
408
|
-
extern Query *
|
397
|
+
extern Query *fuzq_new(const char *term, const char *field);
|
398
|
+
extern Query *fuzq_new_conf(const char *field, const char *term,
|
399
|
+
float min_sim, int pre_len, int max_terms);
|
409
400
|
|
410
401
|
/***************************************************************************
|
411
|
-
*
|
412
402
|
* ConstantScoreQuery
|
413
|
-
*
|
414
403
|
***************************************************************************/
|
415
404
|
|
416
|
-
|
405
|
+
typedef struct ConstantScoreQuery
|
406
|
+
{
|
407
|
+
Query super;
|
408
|
+
Filter *filter;
|
409
|
+
} ConstantScoreQuery;
|
410
|
+
|
411
|
+
extern Query *csq_new(Filter *filter);
|
412
|
+
extern Query *csq_new_nr(Filter *filter);
|
417
413
|
|
418
414
|
/***************************************************************************
|
419
|
-
*
|
420
|
-
* FilteredQueryQuery
|
421
|
-
*
|
415
|
+
* FilteredQuery
|
422
416
|
***************************************************************************/
|
423
417
|
|
424
418
|
typedef struct FilteredQuery
|
425
419
|
{
|
426
|
-
Query
|
420
|
+
Query super;
|
421
|
+
Query *query;
|
427
422
|
Filter *filter;
|
428
423
|
} FilteredQuery;
|
429
424
|
|
430
|
-
extern Query *
|
425
|
+
extern Query *fq_new(Query *query, Filter *filter);
|
431
426
|
|
432
427
|
/***************************************************************************
|
433
|
-
*
|
434
428
|
* MatchAllQuery
|
435
|
-
*
|
436
429
|
***************************************************************************/
|
437
430
|
|
438
|
-
extern Query *
|
431
|
+
extern Query *maq_new();
|
439
432
|
|
440
433
|
/***************************************************************************
|
441
|
-
*
|
442
434
|
* RangeQuery
|
443
|
-
*
|
444
435
|
***************************************************************************/
|
445
436
|
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
} Range;
|
454
|
-
|
455
|
-
extern Query *rq_create(const char *field, char *lower_term,
|
456
|
-
char *upper_term, bool include_lower, bool include_upper);
|
457
|
-
extern Query *rq_create_less(const char *field, char *upper_term,
|
458
|
-
bool include_upper);
|
459
|
-
extern Query *rq_create_more(const char *field, char *lower_term,
|
460
|
-
bool include_lower);
|
437
|
+
extern Query *rq_new(const char *field, const char *lower_term,
|
438
|
+
const char *upper_term, bool include_lower,
|
439
|
+
bool include_upper);
|
440
|
+
extern Query *rq_new_less(const char *field, const char *upper_term,
|
441
|
+
bool include_upper);
|
442
|
+
extern Query *rq_new_more(const char *field, const char *lower_term,
|
443
|
+
bool include_lower);
|
461
444
|
|
462
445
|
/***************************************************************************
|
463
|
-
*
|
464
446
|
* SpanQuery
|
465
|
-
*
|
466
|
-
***************************************************************************/
|
467
|
-
|
468
|
-
/***************************************************************************
|
469
|
-
* SpanEnum
|
470
447
|
***************************************************************************/
|
471
448
|
|
449
|
+
/* ** SpanEnum ** */
|
472
450
|
typedef struct SpanEnum SpanEnum;
|
473
451
|
struct SpanEnum
|
474
452
|
{
|
475
|
-
void *data;
|
476
453
|
Query *query;
|
477
454
|
bool (*next)(SpanEnum *self);
|
478
455
|
bool (*skip_to)(SpanEnum *self, int target_doc);
|
@@ -483,107 +460,26 @@ struct SpanEnum
|
|
483
460
|
void (*destroy)(SpanEnum *self);
|
484
461
|
};
|
485
462
|
|
486
|
-
|
487
|
-
|
488
|
-
***************************************************************************/
|
489
|
-
|
490
|
-
typedef struct SpanTermEnum SpanTermEnum;
|
491
|
-
struct SpanTermEnum
|
492
|
-
{
|
493
|
-
TermDocEnum *positions;
|
494
|
-
int position;
|
495
|
-
int doc;
|
496
|
-
int count;
|
497
|
-
int freq;
|
498
|
-
};
|
499
|
-
|
500
|
-
extern SpanEnum *spante_create(Query *query, IndexReader *ir);
|
501
|
-
|
502
|
-
/***************************************************************************
|
503
|
-
* SpanFirstEnum
|
504
|
-
***************************************************************************/
|
505
|
-
|
506
|
-
extern SpanEnum *spanfe_create(Query *query, IndexReader *ir);
|
507
|
-
|
508
|
-
/***************************************************************************
|
509
|
-
* SpanOrEnum
|
510
|
-
***************************************************************************/
|
511
|
-
|
512
|
-
typedef struct SpanOrEnum
|
513
|
-
{
|
514
|
-
PriorityQueue *queue;
|
515
|
-
SpanEnum **span_enums;
|
516
|
-
int s_cnt;
|
517
|
-
bool first_time;
|
518
|
-
} SpanOrEnum;
|
519
|
-
|
520
|
-
extern SpanEnum *spanoe_create(Query *query, IndexReader *ir);
|
521
|
-
|
522
|
-
/***************************************************************************
|
523
|
-
* SpanEnumCell
|
524
|
-
***************************************************************************/
|
525
|
-
|
526
|
-
typedef struct SpanEnumCell
|
527
|
-
{
|
528
|
-
SpanEnum *parent;
|
529
|
-
SpanEnum *se;
|
530
|
-
int index;
|
531
|
-
int length;
|
532
|
-
} SpanEnumCell;
|
533
|
-
|
534
|
-
extern SpanEnum *spanec_create(Query *parent, Query *child, int index);
|
535
|
-
|
536
|
-
/***************************************************************************
|
537
|
-
* SpanNearEnum
|
538
|
-
***************************************************************************/
|
539
|
-
|
540
|
-
typedef struct SpanNearEnum
|
541
|
-
{
|
542
|
-
SpanEnum **span_enums;
|
543
|
-
int s_cnt;
|
544
|
-
int slop;
|
545
|
-
int current;
|
546
|
-
bool first_time : 1;
|
547
|
-
bool in_order : 1;
|
548
|
-
int doc;
|
549
|
-
int start;
|
550
|
-
int end;
|
551
|
-
} SpanNearEnum;
|
552
|
-
|
553
|
-
extern SpanEnum *spanne_create(Query *query, IndexReader *ir);
|
554
|
-
|
555
|
-
/***************************************************************************
|
556
|
-
* SpanNotEnum
|
557
|
-
***************************************************************************/
|
558
|
-
|
559
|
-
typedef struct SpanNotEnum
|
463
|
+
/* ** SpanQuery ** */
|
464
|
+
typedef struct SpanQuery
|
560
465
|
{
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
}
|
566
|
-
|
567
|
-
extern SpanEnum *spanxe_create(Query *query, IndexReader *ir);
|
466
|
+
Query super;
|
467
|
+
char *field;
|
468
|
+
SpanEnum *(*get_spans)(Query *self, IndexReader *ir);
|
469
|
+
HashSet *(*get_terms)(Query *self);
|
470
|
+
} SpanQuery;
|
568
471
|
|
569
472
|
/***************************************************************************
|
570
|
-
*
|
473
|
+
* SpanTermQuery
|
571
474
|
***************************************************************************/
|
572
475
|
|
573
|
-
typedef struct
|
574
|
-
struct SpanQuery
|
476
|
+
typedef struct SpanTermQuery
|
575
477
|
{
|
576
|
-
|
577
|
-
char
|
578
|
-
|
579
|
-
|
580
|
-
};
|
478
|
+
SpanQuery super;
|
479
|
+
char *term;
|
480
|
+
} SpanTermQuery;
|
481
|
+
extern Query *spantq_new(const char *field, const char *term);
|
581
482
|
|
582
|
-
/***************************************************************************
|
583
|
-
* SpanTermQuery
|
584
|
-
***************************************************************************/
|
585
|
-
|
586
|
-
extern Query *spantq_create(Term *term);
|
587
483
|
|
588
484
|
/***************************************************************************
|
589
485
|
* SpanFirstQuery
|
@@ -591,11 +487,13 @@ extern Query *spantq_create(Term *term);
|
|
591
487
|
|
592
488
|
typedef struct SpanFirstQuery
|
593
489
|
{
|
594
|
-
|
595
|
-
|
490
|
+
SpanQuery super;
|
491
|
+
int end;
|
492
|
+
Query *match;
|
596
493
|
} SpanFirstQuery;
|
597
494
|
|
598
|
-
extern Query *
|
495
|
+
extern Query *spanfq_new(Query *match, int end);
|
496
|
+
extern Query *spanfq_new_nr(Query *match, int end);
|
599
497
|
|
600
498
|
/***************************************************************************
|
601
499
|
* SpanOrQuery
|
@@ -603,11 +501,15 @@ extern Query *spanfq_create(Query *match, int end);
|
|
603
501
|
|
604
502
|
typedef struct SpanOrQuery
|
605
503
|
{
|
606
|
-
|
607
|
-
|
504
|
+
SpanQuery super;
|
505
|
+
Query **clauses;
|
506
|
+
int c_cnt;
|
507
|
+
int c_capa;
|
608
508
|
} SpanOrQuery;
|
609
509
|
|
610
|
-
extern Query *
|
510
|
+
extern Query *spanoq_new();
|
511
|
+
extern Query *spanoq_add_clause(Query *self, Query *clause);
|
512
|
+
extern Query *spanoq_add_clause_nr(Query *self, Query *clause);
|
611
513
|
|
612
514
|
/***************************************************************************
|
613
515
|
* SpanNearQuery
|
@@ -615,15 +517,17 @@ extern Query *spanoq_create(Query **clauses, int c_cnt);
|
|
615
517
|
|
616
518
|
typedef struct SpanNearQuery
|
617
519
|
{
|
618
|
-
|
619
|
-
|
620
|
-
int
|
621
|
-
|
520
|
+
SpanQuery super;
|
521
|
+
Query **clauses;
|
522
|
+
int c_cnt;
|
523
|
+
int c_capa;
|
524
|
+
int slop;
|
525
|
+
bool in_order : 1;
|
622
526
|
} SpanNearQuery;
|
623
527
|
|
624
|
-
extern Query *
|
625
|
-
|
626
|
-
|
528
|
+
extern Query *spannq_new(int slop, bool in_order);
|
529
|
+
extern Query *spannq_add_clause(Query *self, Query *clause);
|
530
|
+
extern Query *spannq_add_clause_nr(Query *self, Query *clause);
|
627
531
|
|
628
532
|
/***************************************************************************
|
629
533
|
* SpanNotQuery
|
@@ -631,11 +535,15 @@ extern Query *spannq_create(Query **clauses, int c_cnt, int slop,
|
|
631
535
|
|
632
536
|
typedef struct SpanNotQuery
|
633
537
|
{
|
634
|
-
|
635
|
-
Query
|
538
|
+
SpanQuery super;
|
539
|
+
Query *inc;
|
540
|
+
Query *exc;
|
636
541
|
} SpanNotQuery;
|
637
542
|
|
638
|
-
extern Query *
|
543
|
+
extern Query *spanxq_new(Query *inc, Query *exc);
|
544
|
+
extern Query *spanxq_new_nr(Query *inc, Query *exc);
|
545
|
+
|
546
|
+
|
639
547
|
|
640
548
|
/***************************************************************************
|
641
549
|
*
|
@@ -643,13 +551,15 @@ extern Query *spanxq_create(Query *inc, Query *exc);
|
|
643
551
|
*
|
644
552
|
***************************************************************************/
|
645
553
|
|
646
|
-
#define SCORER_NULLIFY(mscorer)
|
554
|
+
#define SCORER_NULLIFY(mscorer) do {\
|
555
|
+
(mscorer)->destroy(mscorer);\
|
556
|
+
(mscorer) = NULL;\
|
557
|
+
} while (0)
|
647
558
|
|
648
559
|
struct Scorer
|
649
560
|
{
|
650
|
-
void *data;
|
651
561
|
Similarity *similarity;
|
652
|
-
int
|
562
|
+
int doc;
|
653
563
|
float (*score)(Scorer *self);
|
654
564
|
bool (*next)(Scorer *self);
|
655
565
|
bool (*skip_to)(Scorer *self, int doc_num);
|
@@ -657,300 +567,80 @@ struct Scorer
|
|
657
567
|
void (*destroy)(Scorer *self);
|
658
568
|
};
|
659
569
|
|
570
|
+
#define scorer_new(type, similarity) scorer_create(sizeof(type), similarity)
|
660
571
|
/* Internal Scorer Function */
|
661
572
|
extern void scorer_destroy_i(Scorer *self);
|
662
|
-
|
663
|
-
extern Scorer *scorer_create(Similarity *similarity);
|
573
|
+
extern Scorer *scorer_create(size_t size, Similarity *similarity);
|
664
574
|
extern bool scorer_less_than(void *p1, void *p2);
|
665
|
-
extern bool scorer_doc_less_than(
|
575
|
+
extern bool scorer_doc_less_than(const Scorer *s1, const Scorer *s2);
|
666
576
|
extern int scorer_doc_cmp(const void *p1, const void *p2);
|
667
577
|
|
668
|
-
/***************************************************************************
|
669
|
-
*
|
670
|
-
* TermScorer
|
671
|
-
*
|
672
|
-
***************************************************************************/
|
673
|
-
|
674
|
-
#define SCORE_CACHE_SIZE 32
|
675
|
-
#define TDE_READ_SIZE 32
|
676
|
-
|
677
|
-
typedef struct TermScorer
|
678
|
-
{
|
679
|
-
int docs[TDE_READ_SIZE];
|
680
|
-
int freqs[TDE_READ_SIZE];
|
681
|
-
int pointer;
|
682
|
-
int pointer_max;
|
683
|
-
float score_cache[SCORE_CACHE_SIZE];
|
684
|
-
Weight *weight;
|
685
|
-
TermDocEnum *tde;
|
686
|
-
uchar *norms;
|
687
|
-
float weight_value;
|
688
|
-
} TermScorer;
|
689
|
-
|
690
|
-
extern Scorer *tsc_create(Weight *weight, TermDocEnum *tde, uchar *norms);
|
691
|
-
|
692
|
-
/***************************************************************************
|
693
|
-
*
|
694
|
-
* BooleanScorer
|
695
|
-
*
|
696
|
-
***************************************************************************/
|
697
|
-
|
698
|
-
/***************************************************************************
|
699
|
-
* Coordinator
|
700
|
-
***************************************************************************/
|
701
|
-
|
702
|
-
typedef struct Coordinator
|
703
|
-
{
|
704
|
-
int max_coord;
|
705
|
-
float *coord_factors;
|
706
|
-
Similarity *similarity;
|
707
|
-
int num_matches;
|
708
|
-
} Coordinator;
|
709
|
-
|
710
|
-
/***************************************************************************
|
711
|
-
* DisjunctionSumScorer
|
712
|
-
***************************************************************************/
|
713
|
-
|
714
|
-
typedef struct DisjunctionSumScorer
|
715
|
-
{
|
716
|
-
float cum_score;
|
717
|
-
int num_matches;
|
718
|
-
int min_num_matches;
|
719
|
-
Scorer **sub_scorers;
|
720
|
-
int ss_cnt;
|
721
|
-
PriorityQueue *scorer_queue;
|
722
|
-
Coordinator *coordinator;
|
723
|
-
} DisjunctionSumScorer;
|
724
|
-
|
725
|
-
/***************************************************************************
|
726
|
-
* ConjunctionScorer
|
727
|
-
***************************************************************************/
|
728
|
-
|
729
|
-
typedef struct ConjunctionScorer
|
730
|
-
{
|
731
|
-
bool first_time : 1;
|
732
|
-
bool more : 1;
|
733
|
-
float coord;
|
734
|
-
int ss_cnt;
|
735
|
-
int ss_capa;
|
736
|
-
Scorer **sub_scorers;
|
737
|
-
int first;
|
738
|
-
int last;
|
739
|
-
Coordinator *coordinator;
|
740
|
-
int last_scored_doc;
|
741
|
-
} ConjunctionScorer;
|
742
|
-
|
743
|
-
/***************************************************************************
|
744
|
-
* SingleMatchScorer
|
745
|
-
***************************************************************************/
|
746
|
-
|
747
|
-
typedef struct SingleMatchScorer
|
748
|
-
{
|
749
|
-
Coordinator *coordinator;
|
750
|
-
Scorer *scorer;
|
751
|
-
} SingleMatchScorer;
|
752
|
-
|
753
|
-
/***************************************************************************
|
754
|
-
* ReqOptSumScorer
|
755
|
-
***************************************************************************/
|
756
|
-
|
757
|
-
typedef struct ReqOptSumScorer
|
758
|
-
{
|
759
|
-
Scorer *req_scorer;
|
760
|
-
Scorer *opt_scorer;
|
761
|
-
bool first_time_opt;
|
762
|
-
} ReqOptSumScorer;
|
763
|
-
|
764
|
-
/***************************************************************************
|
765
|
-
* ReqExclScorer
|
766
|
-
***************************************************************************/
|
767
|
-
|
768
|
-
typedef struct ReqExclScorer
|
769
|
-
{
|
770
|
-
Scorer *req_scorer;
|
771
|
-
Scorer *excl_scorer;
|
772
|
-
bool first_time;
|
773
|
-
} ReqExclScorer;
|
774
|
-
|
775
|
-
/***************************************************************************
|
776
|
-
* BooleanScorer
|
777
|
-
***************************************************************************/
|
778
|
-
|
779
|
-
typedef struct BooleanScorer
|
780
|
-
{
|
781
|
-
Scorer **required_scorers;
|
782
|
-
int rs_cnt;
|
783
|
-
int rs_capa;
|
784
|
-
Scorer **optional_scorers;
|
785
|
-
int os_cnt;
|
786
|
-
int os_capa;
|
787
|
-
Scorer **prohibited_scorers;
|
788
|
-
int ps_cnt;
|
789
|
-
int ps_capa;
|
790
|
-
Scorer *counting_sum_scorer;
|
791
|
-
Coordinator *coordinator;
|
792
|
-
} BooleanScorer;
|
793
|
-
|
794
|
-
extern Scorer *bsc_create(Similarity *similarity);
|
795
|
-
extern void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur);
|
796
|
-
|
797
|
-
/***************************************************************************
|
798
|
-
*
|
799
|
-
* PhraseScorer
|
800
|
-
*
|
801
|
-
***************************************************************************/
|
802
|
-
|
803
|
-
/***************************************************************************
|
804
|
-
* PhrasePosition
|
805
|
-
***************************************************************************/
|
806
|
-
typedef struct PhrasePosition
|
807
|
-
{
|
808
|
-
TermDocEnum *tpe;
|
809
|
-
int offset;
|
810
|
-
int count;
|
811
|
-
int doc;
|
812
|
-
int position;
|
813
|
-
} PhrasePosition;
|
814
|
-
|
815
|
-
extern PhrasePosition *pp_create(TermDocEnum *tpe, int offset);
|
816
|
-
|
817
|
-
/***************************************************************************
|
818
|
-
* PhraseScorer
|
819
|
-
***************************************************************************/
|
820
|
-
|
821
|
-
typedef struct PhraseScorer
|
822
|
-
{
|
823
|
-
float freq;
|
824
|
-
uchar *norms;
|
825
|
-
float value;
|
826
|
-
Weight *weight;
|
827
|
-
bool first_time : 1;
|
828
|
-
bool more : 1;
|
829
|
-
int pp_first;
|
830
|
-
int pp_last;
|
831
|
-
int pp_cnt;
|
832
|
-
PhrasePosition **phrase_pos;
|
833
|
-
float (*phrase_freq)(Scorer *self);
|
834
|
-
int slop;
|
835
|
-
} PhraseScorer;
|
836
|
-
|
837
|
-
extern Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
|
838
|
-
int *positions, int t_cnt, Similarity *similarity, uchar *norms);
|
839
|
-
|
840
|
-
/***************************************************************************
|
841
|
-
* ExactPhraseScorer
|
842
|
-
***************************************************************************/
|
843
|
-
|
844
|
-
extern Scorer *exact_phrase_scorer_create(Weight *weight,
|
845
|
-
TermDocEnum **term_pos_enum, int *positions, int t_cnt,
|
846
|
-
Similarity *similarity, uchar *norms);
|
847
|
-
|
848
|
-
/***************************************************************************
|
849
|
-
* SloppyPhraseScorer
|
850
|
-
***************************************************************************/
|
851
|
-
|
852
|
-
extern Scorer *sloppy_phrase_scorer_create(Weight *weight,
|
853
|
-
TermDocEnum **term_pos_enum, int *positions, int t_cnt,
|
854
|
-
Similarity *similarity, int slop, uchar *norms);
|
855
|
-
|
856
|
-
/***************************************************************************
|
857
|
-
*
|
858
|
-
* ConstantScoreScorer
|
859
|
-
*
|
860
|
-
***************************************************************************/
|
861
|
-
|
862
|
-
typedef struct ConstantScoreScorer
|
863
|
-
{
|
864
|
-
BitVector *bv;
|
865
|
-
float score;
|
866
|
-
} ConstantScoreScorer;
|
867
|
-
|
868
|
-
extern Scorer *cssc_create(Weight *weight, IndexReader *ir);
|
869
|
-
|
870
|
-
|
871
|
-
/***************************************************************************
|
872
|
-
*
|
873
|
-
* MatchAllScorer
|
874
|
-
*
|
875
|
-
***************************************************************************/
|
876
|
-
|
877
|
-
typedef struct MatchAllScorer
|
878
|
-
{
|
879
|
-
IndexReader *ir;
|
880
|
-
int max_doc;
|
881
|
-
float score;
|
882
|
-
} MatchAllScorer;
|
883
|
-
|
884
|
-
extern Scorer *masc_create(Weight *weight, IndexReader *ir);
|
885
|
-
|
886
|
-
|
887
|
-
/***************************************************************************
|
888
|
-
*
|
889
|
-
* SpanScorer
|
890
|
-
*
|
891
|
-
***************************************************************************/
|
892
|
-
|
893
|
-
typedef struct SpanScorer
|
894
|
-
{
|
895
|
-
bool first_time : 1;
|
896
|
-
bool more : 1;
|
897
|
-
IndexReader *ir;
|
898
|
-
SpanEnum *spans;
|
899
|
-
Similarity *sim;
|
900
|
-
uchar *norms;
|
901
|
-
Weight *weight;
|
902
|
-
float value;
|
903
|
-
float freq;
|
904
|
-
} SpanScorer;
|
905
|
-
|
906
|
-
extern Scorer *spansc_create(Weight *weight, IndexReader *ir);
|
907
|
-
|
908
578
|
/***************************************************************************
|
909
579
|
*
|
910
580
|
* Sort
|
911
581
|
*
|
912
582
|
***************************************************************************/
|
913
583
|
|
914
|
-
enum SORT_TYPE
|
584
|
+
enum SORT_TYPE
|
585
|
+
{
|
915
586
|
SORT_TYPE_SCORE,
|
916
587
|
SORT_TYPE_DOC,
|
588
|
+
SORT_TYPE_BYTE,
|
917
589
|
SORT_TYPE_INTEGER,
|
918
590
|
SORT_TYPE_FLOAT,
|
919
591
|
SORT_TYPE_STRING,
|
920
592
|
SORT_TYPE_AUTO
|
921
593
|
};
|
922
594
|
|
595
|
+
/***************************************************************************
|
596
|
+
* Comparable
|
597
|
+
***************************************************************************/
|
598
|
+
|
599
|
+
typedef struct Comparable
|
600
|
+
{
|
601
|
+
int type;
|
602
|
+
union {
|
603
|
+
int i;
|
604
|
+
float f;
|
605
|
+
char *s;
|
606
|
+
void *p;
|
607
|
+
} val;
|
608
|
+
bool reverse : 1;
|
609
|
+
} Comparable;
|
610
|
+
|
923
611
|
/***************************************************************************
|
924
612
|
* SortField
|
925
613
|
***************************************************************************/
|
926
614
|
|
927
615
|
typedef struct SortField
|
928
616
|
{
|
929
|
-
mutex_t
|
930
|
-
char
|
931
|
-
|
932
|
-
bool
|
933
|
-
void
|
934
|
-
int
|
935
|
-
void
|
936
|
-
void
|
937
|
-
void
|
617
|
+
mutex_t mutex;
|
618
|
+
char *field;
|
619
|
+
enum SORT_TYPE type;
|
620
|
+
bool reverse : 1;
|
621
|
+
void *index;
|
622
|
+
int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
|
623
|
+
void (*get_val)(void *index_ptr, Hit *hit1, Comparable *comparable);
|
624
|
+
void *(*create_index)(int size);
|
625
|
+
void (*destroy_index)(void *p);
|
626
|
+
void (*handle_term)(void *index, TermDocEnum *tde, char *text);
|
938
627
|
} SortField;
|
939
628
|
|
940
|
-
extern SortField *
|
941
|
-
extern SortField *
|
942
|
-
extern SortField *
|
943
|
-
extern SortField *
|
944
|
-
extern SortField *
|
945
|
-
extern SortField *
|
946
|
-
extern SortField *
|
629
|
+
extern SortField *sort_field_new(char *field, enum SORT_TYPE type, bool reverse);
|
630
|
+
extern SortField *sort_field_score_new(bool reverse);
|
631
|
+
extern SortField *sort_field_doc_new(bool reverse);
|
632
|
+
extern SortField *sort_field_int_new(char *field, bool reverse);
|
633
|
+
extern SortField *sort_field_byte_new(char *field, bool reverse);
|
634
|
+
extern SortField *sort_field_float_new(char *field, bool reverse);
|
635
|
+
extern SortField *sort_field_string_new(char *field, bool reverse);
|
636
|
+
extern SortField *sort_field_auto_new(char *field, bool reverse);
|
947
637
|
extern void sort_field_destroy(void *p);
|
948
638
|
extern char *sort_field_to_s(SortField *self);
|
949
639
|
|
950
|
-
extern SortField SORT_FIELD_SCORE;
|
951
|
-
extern SortField SORT_FIELD_SCORE_REV;
|
952
|
-
extern SortField SORT_FIELD_DOC;
|
953
|
-
extern SortField SORT_FIELD_DOC_REV;
|
640
|
+
extern const SortField SORT_FIELD_SCORE;
|
641
|
+
extern const SortField SORT_FIELD_SCORE_REV;
|
642
|
+
extern const SortField SORT_FIELD_DOC;
|
643
|
+
extern const SortField SORT_FIELD_DOC_REV;
|
954
644
|
|
955
645
|
/***************************************************************************
|
956
646
|
* Sort
|
@@ -959,12 +649,13 @@ extern SortField SORT_FIELD_DOC_REV;
|
|
959
649
|
typedef struct Sort
|
960
650
|
{
|
961
651
|
SortField **sort_fields;
|
962
|
-
int
|
963
|
-
int
|
652
|
+
int size;
|
653
|
+
int capa;
|
654
|
+
int start;
|
964
655
|
bool destroy_all : 1;
|
965
656
|
} Sort;
|
966
657
|
|
967
|
-
extern Sort *
|
658
|
+
extern Sort *sort_new();
|
968
659
|
extern void sort_destroy(void *p);
|
969
660
|
extern void sort_add_sort_field(Sort *self, SortField *sf);
|
970
661
|
extern void sort_clear(Sort *self);
|
@@ -978,7 +669,27 @@ extern Hit *fshq_pq_pop(PriorityQueue *pq);
|
|
978
669
|
extern void fshq_pq_down(PriorityQueue *pq);
|
979
670
|
extern void fshq_pq_insert(PriorityQueue *pq, Hit *hit);
|
980
671
|
extern void fshq_pq_destroy(PriorityQueue *pq);
|
981
|
-
extern PriorityQueue *
|
672
|
+
extern PriorityQueue *fshq_pq_new(int size, Sort *sort, IndexReader *ir);
|
673
|
+
extern Hit *fshq_pq_pop_fd(PriorityQueue *pq);
|
674
|
+
|
675
|
+
/***************************************************************************
|
676
|
+
* FieldDoc
|
677
|
+
***************************************************************************/
|
678
|
+
|
679
|
+
typedef struct FieldDoc
|
680
|
+
{
|
681
|
+
Hit hit;
|
682
|
+
int size;
|
683
|
+
Comparable comparables[];
|
684
|
+
} FieldDoc;
|
685
|
+
|
686
|
+
extern void fd_destroy(FieldDoc *fd);
|
687
|
+
|
688
|
+
/***************************************************************************
|
689
|
+
* FieldDocSortedHitQueue
|
690
|
+
***************************************************************************/
|
691
|
+
|
692
|
+
extern bool fdshq_lt(FieldDoc *fd1, FieldDoc *fd2);
|
982
693
|
|
983
694
|
/***************************************************************************
|
984
695
|
*
|
@@ -986,47 +697,91 @@ extern PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
|
|
986
697
|
*
|
987
698
|
***************************************************************************/
|
988
699
|
|
989
|
-
|
990
|
-
|
991
|
-
|
700
|
+
typedef bool (*filter_ft)(int doc_num, float score, Searcher *self);
|
701
|
+
|
702
|
+
struct Searcher
|
703
|
+
{
|
992
704
|
Similarity *similarity;
|
993
|
-
|
994
|
-
|
995
|
-
int *(*doc_freqs)(Searcher *self, Term **terms, int tcnt);
|
705
|
+
int (*doc_freq)(Searcher *self, const char *field,
|
706
|
+
const char *term);
|
996
707
|
Document *(*get_doc)(Searcher *self, int doc_num);
|
708
|
+
LazyDoc *(*get_lazy_doc)(Searcher *self, int doc_num);
|
997
709
|
int (*max_doc)(Searcher *self);
|
998
710
|
Weight *(*create_weight)(Searcher *self, Query *query);
|
999
711
|
TopDocs *(*search)(Searcher *self, Query *query, int first_doc,
|
1000
|
-
int num_docs, Filter *filter, Sort *sort
|
712
|
+
int num_docs, Filter *filter, Sort *sort,
|
713
|
+
filter_ft filter_func,
|
714
|
+
bool load_fields);
|
715
|
+
TopDocs *(*search_w)(Searcher *self, Weight *weight, int first_doc,
|
716
|
+
int num_docs, Filter *filter, Sort *sort,
|
717
|
+
filter_ft filter_func,
|
718
|
+
bool load_fields);
|
1001
719
|
void (*search_each)(Searcher *self, Query *query, Filter *filter,
|
1002
|
-
|
720
|
+
filter_ft filter_func,
|
721
|
+
void (*fn)(Searcher *, int, float, void *),
|
722
|
+
void *arg);
|
1003
723
|
void (*search_each_w)(Searcher *self, Weight *weight,
|
1004
|
-
Filter *filter,
|
724
|
+
Filter *filter,
|
725
|
+
filter_ft filter_func,
|
726
|
+
void (*fn)(Searcher *, int, float, void *),
|
1005
727
|
void *arg);
|
1006
728
|
Query *(*rewrite)(Searcher *self, Query *original);
|
1007
729
|
Explanation *(*explain)(Searcher *self, Query *query, int doc_num);
|
1008
730
|
Explanation *(*explain_w)(Searcher *self, Weight *weight, int doc_num);
|
731
|
+
TermVector *(*get_term_vector)(Searcher *self, const int doc_num,
|
732
|
+
const char *field);
|
1009
733
|
Similarity *(*get_similarity)(Searcher *self);
|
1010
734
|
void (*close)(Searcher *self);
|
735
|
+
void *arg; /* used to pass values to Searcher functions */
|
1011
736
|
};
|
1012
737
|
|
1013
|
-
#define
|
1014
|
-
#define
|
1015
|
-
#define
|
1016
|
-
#define
|
1017
|
-
#define
|
1018
|
-
|
1019
|
-
#define
|
1020
|
-
|
1021
|
-
#define
|
1022
|
-
|
1023
|
-
|
1024
|
-
#define
|
1025
|
-
|
1026
|
-
#define
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
738
|
+
#define searcher_doc_freq(s, t) s->doc_freq(s, t)
|
739
|
+
#define searcher_get_doc(s, dn) s->get_doc(s, dn)
|
740
|
+
#define searcher_get_lazy_doc(s, dn) s->get_lazy_doc(s, dn)
|
741
|
+
#define searcher_max_doc(s) s->max_doc(s)
|
742
|
+
#define searcher_rewrite(s, q) s->rewrite(s, q)
|
743
|
+
#define searcher_explain(s, q, dn) s->explain(s, q, dn)
|
744
|
+
#define searcher_explain_w(s, q, dn) s->explain_w(s, q, dn)
|
745
|
+
#define searcher_get_similarity(s) s->get_similarity(s)
|
746
|
+
#define searcher_close(s) s->close(s)
|
747
|
+
#define searcher_search(s, q, fd, nd, filt, sort, ff)\
|
748
|
+
s->search(s, q, fd, nd, filt, sort, ff, false)
|
749
|
+
#define searcher_search_fd(s, q, fd, nd, filt, sort, ff)\
|
750
|
+
s->search(s, q, fd, nd, filt, sort, ff, true)
|
751
|
+
#define searcher_search_each(s, q, filt, ff, fn, arg)\
|
752
|
+
s->search_each(s, q, filt, ff, fn, arg)
|
753
|
+
#define searcher_search_each_w(s, q, filt, ff, fn, arg)\
|
754
|
+
s->search_each_w(s, q, filt, ff, fn, arg)
|
755
|
+
|
756
|
+
|
757
|
+
extern MatchVector *searcher_get_match_vector(Searcher *self,
|
758
|
+
Query *query,
|
759
|
+
const int doc_num,
|
760
|
+
const char *field);
|
761
|
+
extern char **searcher_highlight(Searcher *self,
|
762
|
+
Query *query,
|
763
|
+
const int doc_num,
|
764
|
+
const char *field,
|
765
|
+
const int excerpt_len,
|
766
|
+
const int num_excerpts,
|
767
|
+
const char *pre_tag,
|
768
|
+
const char *post_tag,
|
769
|
+
const char *ellipsis);
|
770
|
+
|
771
|
+
/***************************************************************************
|
772
|
+
*
|
773
|
+
* IndexSearcher
|
774
|
+
*
|
775
|
+
***************************************************************************/
|
776
|
+
|
777
|
+
typedef struct IndexSearcher {
|
778
|
+
Searcher super;
|
779
|
+
IndexReader *ir;
|
780
|
+
bool close_ir : 1;
|
781
|
+
} IndexSearcher;
|
782
|
+
|
783
|
+
extern Searcher *isea_new(IndexReader *ir);
|
784
|
+
extern int isea_doc_freq(Searcher *self, const char *field, const char *term);
|
1030
785
|
|
1031
786
|
/***************************************************************************
|
1032
787
|
*
|
@@ -1036,15 +791,15 @@ extern Searcher *sea_create(IndexReader *ir);
|
|
1036
791
|
|
1037
792
|
typedef struct MultiSearcher
|
1038
793
|
{
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
int
|
1043
|
-
|
794
|
+
Searcher super;
|
795
|
+
int s_cnt;
|
796
|
+
Searcher **searchers;
|
797
|
+
int *starts;
|
798
|
+
int max_doc;
|
799
|
+
bool close_subs : 1;
|
1044
800
|
} MultiSearcher;
|
1045
801
|
|
1046
|
-
extern Searcher *
|
1047
|
-
bool close_subs);
|
802
|
+
extern Searcher *msea_new(Searcher **searchers, int s_cnt, bool close_subs);
|
1048
803
|
|
1049
804
|
/***************************************************************************
|
1050
805
|
*
|
@@ -1052,93 +807,39 @@ extern Searcher *msea_create(Searcher **searchers, int s_cnt,
|
|
1052
807
|
*
|
1053
808
|
***************************************************************************/
|
1054
809
|
|
1055
|
-
#define
|
810
|
+
#define QP_CONC_WORDS 2
|
811
|
+
#define QP_MAX_CLAUSES 512
|
1056
812
|
|
1057
813
|
typedef struct QParser
|
1058
814
|
{
|
1059
815
|
mutex_t mutex;
|
1060
|
-
bool or_default : 1;
|
1061
|
-
bool wild_lower : 1;
|
1062
|
-
bool clean_str : 1;
|
1063
|
-
bool handle_parse_errors : 1;
|
1064
|
-
bool allow_any_fields : 1;
|
1065
|
-
bool close_def_fields : 1;
|
1066
816
|
int def_slop;
|
817
|
+
int max_clauses;
|
818
|
+
int phq_pos_inc;
|
1067
819
|
char *qstr;
|
1068
820
|
char *qstrp;
|
1069
|
-
char buf[
|
821
|
+
char buf[QP_CONC_WORDS][MAX_WORD_SIZE];
|
1070
822
|
int buf_index;
|
823
|
+
HashTable *field_cache;
|
1071
824
|
HashSet *fields;
|
1072
825
|
HashSet *fields_buf;
|
1073
826
|
HashSet *def_fields;
|
1074
827
|
HashSet *all_fields;
|
1075
828
|
Analyzer *analyzer;
|
829
|
+
HashTable *ts_cache;
|
1076
830
|
Query *result;
|
831
|
+
bool or_default : 1;
|
832
|
+
bool wild_lower : 1;
|
833
|
+
bool clean_str : 1;
|
834
|
+
bool handle_parse_errors : 1;
|
835
|
+
bool allow_any_fields : 1;
|
836
|
+
bool close_def_fields : 1;
|
1077
837
|
} QParser;
|
1078
838
|
|
1079
|
-
extern QParser *
|
1080
|
-
|
839
|
+
extern QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
|
840
|
+
Analyzer *analyzer);
|
1081
841
|
extern void qp_destroy(QParser *self);
|
1082
842
|
extern Query *qp_parse(QParser *self, char *qstr);
|
1083
843
|
extern char *qp_clean_str(char *str);
|
1084
844
|
|
1085
|
-
/***************************************************************************
|
1086
|
-
*
|
1087
|
-
* Index
|
1088
|
-
*
|
1089
|
-
***************************************************************************/
|
1090
|
-
|
1091
|
-
typedef struct Index
|
1092
|
-
{
|
1093
|
-
mutex_t mutex;
|
1094
|
-
Store *store;
|
1095
|
-
Analyzer *analyzer;
|
1096
|
-
IndexReader *ir;
|
1097
|
-
IndexWriter *iw;
|
1098
|
-
Searcher *sea;
|
1099
|
-
QParser *qp;
|
1100
|
-
HashSet *key;
|
1101
|
-
char *id_field;
|
1102
|
-
char *def_field;
|
1103
|
-
/* for IndexWriter */
|
1104
|
-
bool use_compound_file : 1;
|
1105
|
-
bool auto_flush : 1;
|
1106
|
-
bool has_writes : 1;
|
1107
|
-
bool check_latest : 1;
|
1108
|
-
} Index;
|
1109
|
-
|
1110
|
-
extern Index *index_create(Store *store, Analyzer *analyzer,
|
1111
|
-
HashSet *def_fields, bool create);
|
1112
|
-
extern void index_destroy(Index *self);
|
1113
|
-
extern void index_flush(Index *self);
|
1114
|
-
extern int index_size(Index *self);
|
1115
|
-
extern void index_optimize(Index *self);
|
1116
|
-
extern bool index_has_del(Index *self);
|
1117
|
-
extern bool index_is_deleted(Index *self, int doc_num);
|
1118
|
-
extern void index_add_doc(Index *self, Document *doc);
|
1119
|
-
extern void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
|
1120
|
-
extern void index_add_string(Index *self, char *str, Analyzer *analyzer);
|
1121
|
-
extern void index_add_array(Index *self, Array *ary, Analyzer *analyzer);
|
1122
|
-
extern TopDocs *index_search_str(Index *self, char *query, int first_doc,
|
1123
|
-
int num_docs, Filter *filter, Sort *sort);
|
1124
|
-
extern Query *index_get_query(Index *self, char *qstr);
|
1125
|
-
extern Document *index_get_doc(Index *self, int doc_num);
|
1126
|
-
extern Document *index_get_doc_ts(Index *self, int doc_num);
|
1127
|
-
extern Document *index_get_doc_id(Index *self, char *id);
|
1128
|
-
extern Document *index_get_doc_term(Index *self, Term *term);
|
1129
|
-
extern void index_delete(Index *self, int doc_num);
|
1130
|
-
extern void index_delete_term(Index *self, Term *term);
|
1131
|
-
extern void index_delete_id(Index *self, char *id);
|
1132
|
-
extern void index_delete_query(Index *self, Query *q, Filter *f);
|
1133
|
-
extern void index_delete_query_str(Index *self, char *qstr, Filter *f);
|
1134
|
-
extern int index_term_id(Index *self, Term *term);
|
1135
|
-
extern Explanation *index_explain(Index *self, Query *q, int doc_num);
|
1136
|
-
extern void index_auto_flush_ir(Index *self);
|
1137
|
-
extern void index_auto_flush_iw(Index *self);
|
1138
|
-
|
1139
|
-
extern inline void ensure_searcher_open(Index *self);
|
1140
|
-
extern inline void ensure_reader_open(Index *self);
|
1141
|
-
extern inline void ensure_writer_open(Index *self);
|
1142
|
-
|
1143
845
|
#endif
|
1144
|
-
|