ferret 0.9.6 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
data/ext/search.h
CHANGED
@@ -9,9 +9,6 @@ typedef struct Scorer Scorer;
|
|
9
9
|
#include "bitvector.h"
|
10
10
|
#include "similarity.h"
|
11
11
|
|
12
|
-
#define term_set_create() \
|
13
|
-
hs_create((hash_ft)&term_hash, (eq_ft)&term_eq, (free_ft)&term_destroy)
|
14
|
-
|
15
12
|
/***************************************************************************
|
16
13
|
*
|
17
14
|
* Explanation
|
@@ -24,15 +21,45 @@ typedef struct Explanation
|
|
24
21
|
float value;
|
25
22
|
char *description;
|
26
23
|
struct Explanation **details;
|
27
|
-
int dcnt;
|
28
|
-
int dcapa;
|
29
24
|
} Explanation;
|
25
|
+
|
26
|
+
extern Explanation *expl_new(float value, const char *description, ...);
|
27
|
+
extern void expl_destroy(Explanation *expl);
|
28
|
+
extern Explanation *expl_add_detail(Explanation *expl, Explanation *detail);
|
29
|
+
extern char *expl_to_s_depth(Explanation *expl, int depth);
|
30
|
+
extern char *expl_to_html(Explanation *expl);
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
32
|
+
#define expl_to_s(expl) expl_to_s_depth(expl, 0)
|
33
|
+
|
34
|
+
/***************************************************************************
|
35
|
+
*
|
36
|
+
* Highlighter
|
37
|
+
*
|
38
|
+
***************************************************************************/
|
39
|
+
|
40
|
+
typedef struct MatchRange
|
41
|
+
{
|
42
|
+
int start;
|
43
|
+
int end;
|
44
|
+
int start_offset;
|
45
|
+
int end_offset;
|
46
|
+
double score;
|
47
|
+
} MatchRange;
|
48
|
+
|
49
|
+
#define MATCH_VECTOR_INIT_CAPA 8
|
50
|
+
typedef struct MatchVector
|
51
|
+
{
|
52
|
+
int size;
|
53
|
+
int capa;
|
54
|
+
MatchRange *matches;
|
55
|
+
} MatchVector;
|
56
|
+
|
57
|
+
extern MatchVector *matchv_new();
|
58
|
+
extern MatchVector *matchv_add(MatchVector *mp, int start, int end);
|
59
|
+
extern MatchVector *matchv_sort(MatchVector *self);
|
60
|
+
extern void matchv_destroy(MatchVector *self);
|
61
|
+
extern MatchVector *matchv_compact(MatchVector *self);
|
62
|
+
extern MatchVector *matchv_compact_with_breaks(MatchVector *self);
|
36
63
|
|
37
64
|
/***************************************************************************
|
38
65
|
*
|
@@ -46,8 +73,6 @@ typedef struct Hit
|
|
46
73
|
float score;
|
47
74
|
} Hit;
|
48
75
|
|
49
|
-
extern bool hit_less_than(void *p1, void *p2);
|
50
|
-
|
51
76
|
/***************************************************************************
|
52
77
|
*
|
53
78
|
* TopDocs
|
@@ -59,9 +84,10 @@ typedef struct TopDocs
|
|
59
84
|
int total_hits;
|
60
85
|
int size;
|
61
86
|
Hit **hits;
|
87
|
+
float max_score;
|
62
88
|
} TopDocs;
|
63
89
|
|
64
|
-
extern TopDocs *
|
90
|
+
extern TopDocs *td_new(int total_hits, int size, Hit **hits, float max_score);
|
65
91
|
extern void td_destroy(TopDocs *td);
|
66
92
|
extern char *td_to_s(TopDocs *td);
|
67
93
|
|
@@ -73,22 +99,23 @@ extern char *td_to_s(TopDocs *td);
|
|
73
99
|
|
74
100
|
typedef struct Filter
|
75
101
|
{
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
102
|
+
char *name;
|
103
|
+
HashTable *cache;
|
104
|
+
BitVector *(*get_bv_i)(struct Filter *self, IndexReader *ir);
|
105
|
+
char *(*to_s)(struct Filter *self);
|
106
|
+
ulong (*hash)(struct Filter *self);
|
107
|
+
int (*eq)(struct Filter *self, struct Filter *o);
|
108
|
+
void (*destroy_i)(struct Filter *self);
|
109
|
+
int ref_cnt;
|
84
110
|
} Filter;
|
85
111
|
|
86
|
-
|
87
|
-
extern
|
88
|
-
extern BitVector *filt_get_bv(Filter *
|
89
|
-
extern void
|
90
|
-
extern
|
91
|
-
extern
|
112
|
+
#define filt_new(type) filt_create(sizeof(type), #type)
|
113
|
+
extern Filter *filt_create(size_t size, const char *name);
|
114
|
+
extern BitVector *filt_get_bv(Filter *filt, IndexReader *ir);
|
115
|
+
extern void filt_destroy_i(Filter *filt);
|
116
|
+
extern void filt_deref(Filter *filt);
|
117
|
+
extern ulong filt_hash(Filter *filt);
|
118
|
+
extern int filt_eq(Filter *filt, Filter *o);
|
92
119
|
|
93
120
|
/***************************************************************************
|
94
121
|
*
|
@@ -96,8 +123,9 @@ extern int filt_eq(Filter *self, Filter *o);
|
|
96
123
|
*
|
97
124
|
***************************************************************************/
|
98
125
|
|
99
|
-
extern Filter *
|
100
|
-
|
126
|
+
extern Filter *rfilt_new(const char *field,
|
127
|
+
const char *lower_term, const char *upper_term,
|
128
|
+
bool include_lower, bool include_upper);
|
101
129
|
|
102
130
|
/***************************************************************************
|
103
131
|
*
|
@@ -105,12 +133,8 @@ extern Filter *rfilt_create(const char *field, char *lower_term,
|
|
105
133
|
*
|
106
134
|
***************************************************************************/
|
107
135
|
|
108
|
-
|
109
|
-
|
110
|
-
Query *query;
|
111
|
-
} QueryFilter;
|
112
|
-
|
113
|
-
extern Filter *qfilt_create(Query *query);
|
136
|
+
extern Filter *qfilt_new(Query *query);
|
137
|
+
extern Filter *qfilt_new_nr(Query *query);
|
114
138
|
|
115
139
|
/***************************************************************************
|
116
140
|
*
|
@@ -120,12 +144,11 @@ extern Filter *qfilt_create(Query *query);
|
|
120
144
|
|
121
145
|
struct Weight
|
122
146
|
{
|
123
|
-
|
124
|
-
float
|
125
|
-
float
|
126
|
-
float
|
127
|
-
|
128
|
-
Query *query;
|
147
|
+
float value;
|
148
|
+
float qweight;
|
149
|
+
float qnorm;
|
150
|
+
float idf;
|
151
|
+
Query *query;
|
129
152
|
Similarity *similarity;
|
130
153
|
Query *(*get_query)(Weight *self);
|
131
154
|
float (*get_value)(Weight *self);
|
@@ -137,78 +160,26 @@ struct Weight
|
|
137
160
|
void (*destroy)(Weight *self);
|
138
161
|
};
|
139
162
|
|
140
|
-
|
163
|
+
#define w_new(type, query) w_create(sizeof(type), query)
|
164
|
+
extern Weight *w_create(size_t size, Query *query);
|
141
165
|
extern void w_destroy(Weight *self);
|
142
|
-
|
143
166
|
extern Query *w_get_query(Weight *self);
|
144
167
|
extern float w_get_value(Weight *self);
|
145
168
|
extern float w_sum_of_squared_weights(Weight *self);
|
146
169
|
extern void w_normalize(Weight *self, float normalization_factor);
|
147
170
|
|
148
|
-
/***************************************************************************
|
149
|
-
*
|
150
|
-
* TermWeight
|
151
|
-
*
|
152
|
-
***************************************************************************/
|
153
|
-
|
154
|
-
extern Weight *tw_create(Query *query, Searcher *searcher);
|
155
|
-
|
156
|
-
/***************************************************************************
|
157
|
-
*
|
158
|
-
* BooleanWeight
|
159
|
-
*
|
160
|
-
***************************************************************************/
|
161
|
-
|
162
|
-
typedef struct BooleanWeight {
|
163
|
-
Weight **weights;
|
164
|
-
int w_cnt;
|
165
|
-
} BooleanWeight;
|
166
|
-
|
167
|
-
extern Weight *bw_create(Query *query, Searcher *searcher);
|
168
|
-
|
169
|
-
/***************************************************************************
|
170
|
-
*
|
171
|
-
* PhraseWeight
|
172
|
-
*
|
173
|
-
***************************************************************************/
|
174
|
-
|
175
|
-
extern Weight *phw_create(Query *query, Searcher *searcher);
|
176
|
-
|
177
|
-
/***************************************************************************
|
178
|
-
*
|
179
|
-
* ConstantScoreWeight
|
180
|
-
*
|
181
|
-
***************************************************************************/
|
182
|
-
|
183
|
-
extern Weight *csw_create(Query *query, Searcher *searcher);
|
184
|
-
|
185
|
-
/***************************************************************************
|
186
|
-
*
|
187
|
-
* MatchAllWeight
|
188
|
-
*
|
189
|
-
***************************************************************************/
|
190
|
-
|
191
|
-
extern Weight *maw_create(Query *query, Searcher *searcher);
|
192
|
-
|
193
|
-
/***************************************************************************
|
194
|
-
*
|
195
|
-
* SpanWeight
|
196
|
-
*
|
197
|
-
***************************************************************************/
|
198
|
-
|
199
|
-
extern Weight *spanw_create(Query *query, Searcher *searcher);
|
200
|
-
|
201
171
|
/***************************************************************************
|
202
172
|
*
|
203
173
|
* Query
|
204
174
|
*
|
205
175
|
***************************************************************************/
|
206
176
|
|
207
|
-
enum QUERY_TYPE
|
177
|
+
enum QUERY_TYPE
|
178
|
+
{
|
208
179
|
TERM_QUERY,
|
180
|
+
MULTI_TERM_QUERY,
|
209
181
|
BOOLEAN_QUERY,
|
210
182
|
PHRASE_QUERY,
|
211
|
-
MULTI_PHRASE_QUERY,
|
212
183
|
CONSTANT_QUERY,
|
213
184
|
FILTERED_QUERY,
|
214
185
|
MATCH_ALL_QUERY,
|
@@ -225,57 +196,53 @@ enum QUERY_TYPE {
|
|
225
196
|
|
226
197
|
struct Query
|
227
198
|
{
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
bool destroy_all : 1;
|
199
|
+
int ref_cnt;
|
200
|
+
float boost;
|
201
|
+
Weight *weight;
|
202
|
+
Query *(*rewrite)(Query *self, IndexReader *ir);
|
203
|
+
void (*extract_terms)(Query *self, HashSet *terms);
|
204
|
+
Similarity *(*get_similarity)(Query *self, Searcher *searcher);
|
205
|
+
char *(*to_s)(Query *self, const char *field);
|
206
|
+
ulong (*hash)(Query *self);
|
207
|
+
int (*eq)(Query *self, Query *o);
|
208
|
+
void (*destroy_i)(Query *self);
|
209
|
+
Weight *(*create_weight_i)(Query *self, Searcher *searcher);
|
210
|
+
MatchVector *(*get_matchv_i)(Query *self, MatchVector *mv, TermVector *tv);
|
211
|
+
enum QUERY_TYPE type;
|
242
212
|
};
|
243
213
|
|
244
214
|
/* Internal Query Functions */
|
245
|
-
extern Query *q_create();
|
246
215
|
extern Similarity *q_get_similarity_i(Query *self, Searcher *searcher);
|
247
216
|
extern void q_destroy_i(Query *self);
|
248
217
|
extern Weight *q_create_weight_unsup(Query *self, Searcher *searcher);
|
249
218
|
|
250
|
-
|
251
219
|
extern void q_deref(Query *self);
|
220
|
+
extern const char *q_get_query_name(enum QUERY_TYPE type);
|
252
221
|
extern Weight *q_weight(Query *self, Searcher *searcher);
|
253
222
|
extern Query *q_combine(Query **queries, int q_cnt);
|
254
|
-
extern
|
223
|
+
extern ulong q_hash(Query *self);
|
255
224
|
extern int q_eq(Query *self, Query *o);
|
225
|
+
extern Query *q_create(size_t size);
|
226
|
+
#define q_new(type) q_create(sizeof(type))
|
256
227
|
|
257
228
|
/***************************************************************************
|
258
|
-
*
|
259
229
|
* TermQuery
|
260
|
-
*
|
261
230
|
***************************************************************************/
|
262
231
|
|
263
232
|
typedef struct TermQuery
|
264
233
|
{
|
265
|
-
|
234
|
+
Query super;
|
235
|
+
char *field;
|
236
|
+
char *term;
|
266
237
|
} TermQuery;
|
267
238
|
|
268
|
-
|
239
|
+
Query *tq_new(const char *field, const char *term);
|
269
240
|
|
270
241
|
/***************************************************************************
|
271
|
-
*
|
272
242
|
* BooleanQuery
|
273
|
-
*
|
274
243
|
***************************************************************************/
|
275
244
|
|
276
|
-
|
277
|
-
* BooleanClause
|
278
|
-
***************************************************************************/
|
245
|
+
/* *** BooleanClause *** */
|
279
246
|
|
280
247
|
enum BC_TYPE
|
281
248
|
{
|
@@ -284,22 +251,20 @@ enum BC_TYPE
|
|
284
251
|
BC_MUST_NOT
|
285
252
|
};
|
286
253
|
|
287
|
-
typedef struct BooleanClause
|
254
|
+
typedef struct BooleanClause
|
255
|
+
{
|
288
256
|
int ref_cnt;
|
289
257
|
Query *query;
|
290
|
-
Query *rewritten;
|
291
258
|
unsigned int occur : 4;
|
292
259
|
bool is_prohibited : 1;
|
293
260
|
bool is_required : 1;
|
294
261
|
} BooleanClause;
|
295
262
|
|
296
|
-
extern BooleanClause *
|
263
|
+
extern BooleanClause *bc_new(Query *query, enum BC_TYPE occur);
|
297
264
|
extern void bc_deref(BooleanClause *self);
|
298
|
-
extern void bc_set_occur(BooleanClause *self,
|
265
|
+
extern void bc_set_occur(BooleanClause *self, enum BC_TYPE occur);
|
299
266
|
|
300
|
-
|
301
|
-
* BooleanQuery
|
302
|
-
***************************************************************************/
|
267
|
+
/* *** BooleanQuery *** */
|
303
268
|
|
304
269
|
#define DEFAULT_MAX_CLAUSE_COUNT 1024
|
305
270
|
#define BOOLEAN_CLAUSES_START_CAPA 4
|
@@ -307,172 +272,184 @@ extern void bc_set_occur(BooleanClause *self, unsigned int occur);
|
|
307
272
|
|
308
273
|
typedef struct BooleanQuery
|
309
274
|
{
|
310
|
-
|
311
|
-
|
312
|
-
int
|
313
|
-
int
|
314
|
-
|
275
|
+
Query super;
|
276
|
+
bool coord_disabled;
|
277
|
+
int max_clause_cnt;
|
278
|
+
int clause_cnt;
|
279
|
+
int clause_capa;
|
280
|
+
float original_boost;
|
315
281
|
BooleanClause **clauses;
|
316
|
-
Similarity
|
282
|
+
Similarity *similarity;
|
317
283
|
} BooleanQuery;
|
318
284
|
|
319
|
-
extern Query *
|
285
|
+
extern Query *bq_new(bool coord_disabled);
|
320
286
|
extern BooleanClause *bq_add_query(Query *self, Query *sub_query,
|
321
|
-
|
287
|
+
enum BC_TYPE occur);
|
288
|
+
extern BooleanClause *bq_add_query_nr(Query *self, Query *sub_query,
|
289
|
+
enum BC_TYPE occur);
|
322
290
|
extern BooleanClause *bq_add_clause(Query *self, BooleanClause *bc);
|
291
|
+
extern BooleanClause *bq_add_clause_nr(Query *self, BooleanClause *bc);
|
323
292
|
|
324
293
|
/***************************************************************************
|
325
|
-
*
|
326
294
|
* PhraseQuery
|
327
|
-
*
|
328
295
|
***************************************************************************/
|
329
296
|
|
330
297
|
#define PHQ_INIT_CAPA 4
|
331
298
|
typedef struct PhraseQuery
|
332
299
|
{
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
int
|
338
|
-
|
300
|
+
Query super;
|
301
|
+
int slop;
|
302
|
+
char *field;
|
303
|
+
PhrasePosition *positions;
|
304
|
+
int pos_cnt;
|
305
|
+
int pos_capa;
|
339
306
|
} PhraseQuery;
|
340
307
|
|
341
|
-
extern Query *
|
342
|
-
extern void phq_add_term(Query *self,
|
308
|
+
extern Query *phq_new(const char *field);
|
309
|
+
extern void phq_add_term(Query *self, const char *term, int pos_inc);
|
310
|
+
extern void phq_add_term_abs(Query *self, const char *term, int position);
|
311
|
+
extern void phq_append_multi_term(Query *self, const char *term);
|
343
312
|
|
344
313
|
/***************************************************************************
|
345
|
-
*
|
346
|
-
* MultiPhraseQuery
|
347
|
-
*
|
314
|
+
* MultiTermQuery
|
348
315
|
***************************************************************************/
|
349
316
|
|
350
|
-
|
317
|
+
#define MULTI_TERM_QUERY_MAX_TERMS 256
|
318
|
+
typedef struct MultiTermQuery
|
351
319
|
{
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
int t_capa;
|
358
|
-
char *field;
|
359
|
-
} MultiPhraseQuery;
|
320
|
+
Query super;
|
321
|
+
char *field;
|
322
|
+
PriorityQueue *boosted_terms;
|
323
|
+
float min_boost;
|
324
|
+
} MultiTermQuery;
|
360
325
|
|
361
|
-
extern Query *
|
362
|
-
extern void
|
326
|
+
extern void multi_tq_add_term(Query *self, const char *term);
|
327
|
+
extern void multi_tq_add_term_boost(Query *self, const char *term, float boost);
|
328
|
+
extern Query *multi_tq_new(const char *field);
|
329
|
+
extern Query *multi_tq_new_conf(const char *field, int max_terms,
|
330
|
+
float min_boost);
|
331
|
+
|
332
|
+
#define MTQMaxTerms(query) (((MTQSubQuery *)(query))->max_terms)
|
333
|
+
typedef struct MTQSubQuery
|
334
|
+
{
|
335
|
+
Query super;
|
336
|
+
int max_terms;
|
337
|
+
} MTQSubQuery;
|
363
338
|
|
364
339
|
/***************************************************************************
|
365
|
-
*
|
366
340
|
* PrefixQuery
|
367
|
-
*
|
368
341
|
***************************************************************************/
|
369
342
|
|
370
|
-
|
343
|
+
#define PREFIX_QUERY_MAX_TERMS 256
|
344
|
+
|
345
|
+
|
346
|
+
typedef struct PrefixQuery
|
347
|
+
{
|
348
|
+
MTQSubQuery super;
|
349
|
+
char *field;
|
350
|
+
char *prefix;
|
351
|
+
} PrefixQuery;
|
352
|
+
|
353
|
+
extern Query *prefixq_new(const char *field, const char *prefix);
|
371
354
|
|
372
355
|
/***************************************************************************
|
373
|
-
*
|
374
356
|
* WildCardQuery
|
375
|
-
*
|
376
357
|
***************************************************************************/
|
377
358
|
|
378
359
|
#define WILD_CHAR '?'
|
379
360
|
#define WILD_STRING '*'
|
361
|
+
#define WILD_CARD_QUERY_MAX_TERMS 256
|
362
|
+
|
363
|
+
typedef struct WildCardQuery
|
364
|
+
{
|
365
|
+
MTQSubQuery super;
|
366
|
+
char *field;
|
367
|
+
char *pattern;
|
368
|
+
} WildCardQuery;
|
369
|
+
|
380
370
|
|
381
|
-
extern Query *
|
382
|
-
extern bool wc_match(char *pattern, char *text);
|
371
|
+
extern Query *wcq_new(const char *field, const char *pattern);
|
372
|
+
extern bool wc_match(const char *pattern, const char *text);
|
383
373
|
|
384
374
|
/***************************************************************************
|
385
|
-
*
|
386
375
|
* FuzzyQuery
|
387
|
-
*
|
388
376
|
***************************************************************************/
|
389
377
|
|
390
|
-
#define DEF_MIN_SIM 0.
|
378
|
+
#define DEF_MIN_SIM 0.5f
|
391
379
|
#define DEF_PRE_LEN 0
|
380
|
+
#define DEF_MAX_TERMS 256
|
392
381
|
#define TYPICAL_LONGEST_WORD 20
|
393
382
|
|
394
383
|
typedef struct FuzzyQuery
|
395
384
|
{
|
396
|
-
|
397
|
-
char
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
int
|
385
|
+
MTQSubQuery super;
|
386
|
+
char *field;
|
387
|
+
char *term;
|
388
|
+
const char *text; /* term text after prefix */
|
389
|
+
int text_len;
|
390
|
+
int pre_len;
|
391
|
+
float min_sim;
|
392
|
+
float scale_factor;
|
393
|
+
int max_distances[TYPICAL_LONGEST_WORD];
|
394
|
+
int *da;
|
405
395
|
} FuzzyQuery;
|
406
396
|
|
407
|
-
extern Query *
|
408
|
-
extern Query *
|
397
|
+
extern Query *fuzq_new(const char *term, const char *field);
|
398
|
+
extern Query *fuzq_new_conf(const char *field, const char *term,
|
399
|
+
float min_sim, int pre_len, int max_terms);
|
409
400
|
|
410
401
|
/***************************************************************************
|
411
|
-
*
|
412
402
|
* ConstantScoreQuery
|
413
|
-
*
|
414
403
|
***************************************************************************/
|
415
404
|
|
416
|
-
|
405
|
+
typedef struct ConstantScoreQuery
|
406
|
+
{
|
407
|
+
Query super;
|
408
|
+
Filter *filter;
|
409
|
+
} ConstantScoreQuery;
|
410
|
+
|
411
|
+
extern Query *csq_new(Filter *filter);
|
412
|
+
extern Query *csq_new_nr(Filter *filter);
|
417
413
|
|
418
414
|
/***************************************************************************
|
419
|
-
*
|
420
|
-
* FilteredQueryQuery
|
421
|
-
*
|
415
|
+
* FilteredQuery
|
422
416
|
***************************************************************************/
|
423
417
|
|
424
418
|
typedef struct FilteredQuery
|
425
419
|
{
|
426
|
-
Query
|
420
|
+
Query super;
|
421
|
+
Query *query;
|
427
422
|
Filter *filter;
|
428
423
|
} FilteredQuery;
|
429
424
|
|
430
|
-
extern Query *
|
425
|
+
extern Query *fq_new(Query *query, Filter *filter);
|
431
426
|
|
432
427
|
/***************************************************************************
|
433
|
-
*
|
434
428
|
* MatchAllQuery
|
435
|
-
*
|
436
429
|
***************************************************************************/
|
437
430
|
|
438
|
-
extern Query *
|
431
|
+
extern Query *maq_new();
|
439
432
|
|
440
433
|
/***************************************************************************
|
441
|
-
*
|
442
434
|
* RangeQuery
|
443
|
-
*
|
444
435
|
***************************************************************************/
|
445
436
|
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
} Range;
|
454
|
-
|
455
|
-
extern Query *rq_create(const char *field, char *lower_term,
|
456
|
-
char *upper_term, bool include_lower, bool include_upper);
|
457
|
-
extern Query *rq_create_less(const char *field, char *upper_term,
|
458
|
-
bool include_upper);
|
459
|
-
extern Query *rq_create_more(const char *field, char *lower_term,
|
460
|
-
bool include_lower);
|
437
|
+
extern Query *rq_new(const char *field, const char *lower_term,
|
438
|
+
const char *upper_term, bool include_lower,
|
439
|
+
bool include_upper);
|
440
|
+
extern Query *rq_new_less(const char *field, const char *upper_term,
|
441
|
+
bool include_upper);
|
442
|
+
extern Query *rq_new_more(const char *field, const char *lower_term,
|
443
|
+
bool include_lower);
|
461
444
|
|
462
445
|
/***************************************************************************
|
463
|
-
*
|
464
446
|
* SpanQuery
|
465
|
-
*
|
466
|
-
***************************************************************************/
|
467
|
-
|
468
|
-
/***************************************************************************
|
469
|
-
* SpanEnum
|
470
447
|
***************************************************************************/
|
471
448
|
|
449
|
+
/* ** SpanEnum ** */
|
472
450
|
typedef struct SpanEnum SpanEnum;
|
473
451
|
struct SpanEnum
|
474
452
|
{
|
475
|
-
void *data;
|
476
453
|
Query *query;
|
477
454
|
bool (*next)(SpanEnum *self);
|
478
455
|
bool (*skip_to)(SpanEnum *self, int target_doc);
|
@@ -483,107 +460,26 @@ struct SpanEnum
|
|
483
460
|
void (*destroy)(SpanEnum *self);
|
484
461
|
};
|
485
462
|
|
486
|
-
|
487
|
-
|
488
|
-
***************************************************************************/
|
489
|
-
|
490
|
-
typedef struct SpanTermEnum SpanTermEnum;
|
491
|
-
struct SpanTermEnum
|
492
|
-
{
|
493
|
-
TermDocEnum *positions;
|
494
|
-
int position;
|
495
|
-
int doc;
|
496
|
-
int count;
|
497
|
-
int freq;
|
498
|
-
};
|
499
|
-
|
500
|
-
extern SpanEnum *spante_create(Query *query, IndexReader *ir);
|
501
|
-
|
502
|
-
/***************************************************************************
|
503
|
-
* SpanFirstEnum
|
504
|
-
***************************************************************************/
|
505
|
-
|
506
|
-
extern SpanEnum *spanfe_create(Query *query, IndexReader *ir);
|
507
|
-
|
508
|
-
/***************************************************************************
|
509
|
-
* SpanOrEnum
|
510
|
-
***************************************************************************/
|
511
|
-
|
512
|
-
typedef struct SpanOrEnum
|
513
|
-
{
|
514
|
-
PriorityQueue *queue;
|
515
|
-
SpanEnum **span_enums;
|
516
|
-
int s_cnt;
|
517
|
-
bool first_time;
|
518
|
-
} SpanOrEnum;
|
519
|
-
|
520
|
-
extern SpanEnum *spanoe_create(Query *query, IndexReader *ir);
|
521
|
-
|
522
|
-
/***************************************************************************
|
523
|
-
* SpanEnumCell
|
524
|
-
***************************************************************************/
|
525
|
-
|
526
|
-
typedef struct SpanEnumCell
|
527
|
-
{
|
528
|
-
SpanEnum *parent;
|
529
|
-
SpanEnum *se;
|
530
|
-
int index;
|
531
|
-
int length;
|
532
|
-
} SpanEnumCell;
|
533
|
-
|
534
|
-
extern SpanEnum *spanec_create(Query *parent, Query *child, int index);
|
535
|
-
|
536
|
-
/***************************************************************************
|
537
|
-
* SpanNearEnum
|
538
|
-
***************************************************************************/
|
539
|
-
|
540
|
-
typedef struct SpanNearEnum
|
541
|
-
{
|
542
|
-
SpanEnum **span_enums;
|
543
|
-
int s_cnt;
|
544
|
-
int slop;
|
545
|
-
int current;
|
546
|
-
bool first_time : 1;
|
547
|
-
bool in_order : 1;
|
548
|
-
int doc;
|
549
|
-
int start;
|
550
|
-
int end;
|
551
|
-
} SpanNearEnum;
|
552
|
-
|
553
|
-
extern SpanEnum *spanne_create(Query *query, IndexReader *ir);
|
554
|
-
|
555
|
-
/***************************************************************************
|
556
|
-
* SpanNotEnum
|
557
|
-
***************************************************************************/
|
558
|
-
|
559
|
-
typedef struct SpanNotEnum
|
463
|
+
/* ** SpanQuery ** */
|
464
|
+
typedef struct SpanQuery
|
560
465
|
{
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
}
|
566
|
-
|
567
|
-
extern SpanEnum *spanxe_create(Query *query, IndexReader *ir);
|
466
|
+
Query super;
|
467
|
+
char *field;
|
468
|
+
SpanEnum *(*get_spans)(Query *self, IndexReader *ir);
|
469
|
+
HashSet *(*get_terms)(Query *self);
|
470
|
+
} SpanQuery;
|
568
471
|
|
569
472
|
/***************************************************************************
|
570
|
-
*
|
473
|
+
* SpanTermQuery
|
571
474
|
***************************************************************************/
|
572
475
|
|
573
|
-
typedef struct
|
574
|
-
struct SpanQuery
|
476
|
+
typedef struct SpanTermQuery
|
575
477
|
{
|
576
|
-
|
577
|
-
char
|
578
|
-
|
579
|
-
|
580
|
-
};
|
478
|
+
SpanQuery super;
|
479
|
+
char *term;
|
480
|
+
} SpanTermQuery;
|
481
|
+
extern Query *spantq_new(const char *field, const char *term);
|
581
482
|
|
582
|
-
/***************************************************************************
|
583
|
-
* SpanTermQuery
|
584
|
-
***************************************************************************/
|
585
|
-
|
586
|
-
extern Query *spantq_create(Term *term);
|
587
483
|
|
588
484
|
/***************************************************************************
|
589
485
|
* SpanFirstQuery
|
@@ -591,11 +487,13 @@ extern Query *spantq_create(Term *term);
|
|
591
487
|
|
592
488
|
typedef struct SpanFirstQuery
|
593
489
|
{
|
594
|
-
|
595
|
-
|
490
|
+
SpanQuery super;
|
491
|
+
int end;
|
492
|
+
Query *match;
|
596
493
|
} SpanFirstQuery;
|
597
494
|
|
598
|
-
extern Query *
|
495
|
+
extern Query *spanfq_new(Query *match, int end);
|
496
|
+
extern Query *spanfq_new_nr(Query *match, int end);
|
599
497
|
|
600
498
|
/***************************************************************************
|
601
499
|
* SpanOrQuery
|
@@ -603,11 +501,15 @@ extern Query *spanfq_create(Query *match, int end);
|
|
603
501
|
|
604
502
|
typedef struct SpanOrQuery
|
605
503
|
{
|
606
|
-
|
607
|
-
|
504
|
+
SpanQuery super;
|
505
|
+
Query **clauses;
|
506
|
+
int c_cnt;
|
507
|
+
int c_capa;
|
608
508
|
} SpanOrQuery;
|
609
509
|
|
610
|
-
extern Query *
|
510
|
+
extern Query *spanoq_new();
|
511
|
+
extern Query *spanoq_add_clause(Query *self, Query *clause);
|
512
|
+
extern Query *spanoq_add_clause_nr(Query *self, Query *clause);
|
611
513
|
|
612
514
|
/***************************************************************************
|
613
515
|
* SpanNearQuery
|
@@ -615,15 +517,17 @@ extern Query *spanoq_create(Query **clauses, int c_cnt);
|
|
615
517
|
|
616
518
|
typedef struct SpanNearQuery
|
617
519
|
{
|
618
|
-
|
619
|
-
|
620
|
-
int
|
621
|
-
|
520
|
+
SpanQuery super;
|
521
|
+
Query **clauses;
|
522
|
+
int c_cnt;
|
523
|
+
int c_capa;
|
524
|
+
int slop;
|
525
|
+
bool in_order : 1;
|
622
526
|
} SpanNearQuery;
|
623
527
|
|
624
|
-
extern Query *
|
625
|
-
|
626
|
-
|
528
|
+
extern Query *spannq_new(int slop, bool in_order);
|
529
|
+
extern Query *spannq_add_clause(Query *self, Query *clause);
|
530
|
+
extern Query *spannq_add_clause_nr(Query *self, Query *clause);
|
627
531
|
|
628
532
|
/***************************************************************************
|
629
533
|
* SpanNotQuery
|
@@ -631,11 +535,15 @@ extern Query *spannq_create(Query **clauses, int c_cnt, int slop,
|
|
631
535
|
|
632
536
|
typedef struct SpanNotQuery
|
633
537
|
{
|
634
|
-
|
635
|
-
Query
|
538
|
+
SpanQuery super;
|
539
|
+
Query *inc;
|
540
|
+
Query *exc;
|
636
541
|
} SpanNotQuery;
|
637
542
|
|
638
|
-
extern Query *
|
543
|
+
extern Query *spanxq_new(Query *inc, Query *exc);
|
544
|
+
extern Query *spanxq_new_nr(Query *inc, Query *exc);
|
545
|
+
|
546
|
+
|
639
547
|
|
640
548
|
/***************************************************************************
|
641
549
|
*
|
@@ -643,13 +551,15 @@ extern Query *spanxq_create(Query *inc, Query *exc);
|
|
643
551
|
*
|
644
552
|
***************************************************************************/
|
645
553
|
|
646
|
-
#define SCORER_NULLIFY(mscorer)
|
554
|
+
#define SCORER_NULLIFY(mscorer) do {\
|
555
|
+
(mscorer)->destroy(mscorer);\
|
556
|
+
(mscorer) = NULL;\
|
557
|
+
} while (0)
|
647
558
|
|
648
559
|
struct Scorer
|
649
560
|
{
|
650
|
-
void *data;
|
651
561
|
Similarity *similarity;
|
652
|
-
int
|
562
|
+
int doc;
|
653
563
|
float (*score)(Scorer *self);
|
654
564
|
bool (*next)(Scorer *self);
|
655
565
|
bool (*skip_to)(Scorer *self, int doc_num);
|
@@ -657,300 +567,80 @@ struct Scorer
|
|
657
567
|
void (*destroy)(Scorer *self);
|
658
568
|
};
|
659
569
|
|
570
|
+
#define scorer_new(type, similarity) scorer_create(sizeof(type), similarity)
|
660
571
|
/* Internal Scorer Function */
|
661
572
|
extern void scorer_destroy_i(Scorer *self);
|
662
|
-
|
663
|
-
extern Scorer *scorer_create(Similarity *similarity);
|
573
|
+
extern Scorer *scorer_create(size_t size, Similarity *similarity);
|
664
574
|
extern bool scorer_less_than(void *p1, void *p2);
|
665
|
-
extern bool scorer_doc_less_than(
|
575
|
+
extern bool scorer_doc_less_than(const Scorer *s1, const Scorer *s2);
|
666
576
|
extern int scorer_doc_cmp(const void *p1, const void *p2);
|
667
577
|
|
668
|
-
/***************************************************************************
|
669
|
-
*
|
670
|
-
* TermScorer
|
671
|
-
*
|
672
|
-
***************************************************************************/
|
673
|
-
|
674
|
-
#define SCORE_CACHE_SIZE 32
|
675
|
-
#define TDE_READ_SIZE 32
|
676
|
-
|
677
|
-
typedef struct TermScorer
|
678
|
-
{
|
679
|
-
int docs[TDE_READ_SIZE];
|
680
|
-
int freqs[TDE_READ_SIZE];
|
681
|
-
int pointer;
|
682
|
-
int pointer_max;
|
683
|
-
float score_cache[SCORE_CACHE_SIZE];
|
684
|
-
Weight *weight;
|
685
|
-
TermDocEnum *tde;
|
686
|
-
uchar *norms;
|
687
|
-
float weight_value;
|
688
|
-
} TermScorer;
|
689
|
-
|
690
|
-
extern Scorer *tsc_create(Weight *weight, TermDocEnum *tde, uchar *norms);
|
691
|
-
|
692
|
-
/***************************************************************************
|
693
|
-
*
|
694
|
-
* BooleanScorer
|
695
|
-
*
|
696
|
-
***************************************************************************/
|
697
|
-
|
698
|
-
/***************************************************************************
|
699
|
-
* Coordinator
|
700
|
-
***************************************************************************/
|
701
|
-
|
702
|
-
typedef struct Coordinator
|
703
|
-
{
|
704
|
-
int max_coord;
|
705
|
-
float *coord_factors;
|
706
|
-
Similarity *similarity;
|
707
|
-
int num_matches;
|
708
|
-
} Coordinator;
|
709
|
-
|
710
|
-
/***************************************************************************
|
711
|
-
* DisjunctionSumScorer
|
712
|
-
***************************************************************************/
|
713
|
-
|
714
|
-
typedef struct DisjunctionSumScorer
|
715
|
-
{
|
716
|
-
float cum_score;
|
717
|
-
int num_matches;
|
718
|
-
int min_num_matches;
|
719
|
-
Scorer **sub_scorers;
|
720
|
-
int ss_cnt;
|
721
|
-
PriorityQueue *scorer_queue;
|
722
|
-
Coordinator *coordinator;
|
723
|
-
} DisjunctionSumScorer;
|
724
|
-
|
725
|
-
/***************************************************************************
|
726
|
-
* ConjunctionScorer
|
727
|
-
***************************************************************************/
|
728
|
-
|
729
|
-
typedef struct ConjunctionScorer
|
730
|
-
{
|
731
|
-
bool first_time : 1;
|
732
|
-
bool more : 1;
|
733
|
-
float coord;
|
734
|
-
int ss_cnt;
|
735
|
-
int ss_capa;
|
736
|
-
Scorer **sub_scorers;
|
737
|
-
int first;
|
738
|
-
int last;
|
739
|
-
Coordinator *coordinator;
|
740
|
-
int last_scored_doc;
|
741
|
-
} ConjunctionScorer;
|
742
|
-
|
743
|
-
/***************************************************************************
|
744
|
-
* SingleMatchScorer
|
745
|
-
***************************************************************************/
|
746
|
-
|
747
|
-
typedef struct SingleMatchScorer
|
748
|
-
{
|
749
|
-
Coordinator *coordinator;
|
750
|
-
Scorer *scorer;
|
751
|
-
} SingleMatchScorer;
|
752
|
-
|
753
|
-
/***************************************************************************
|
754
|
-
* ReqOptSumScorer
|
755
|
-
***************************************************************************/
|
756
|
-
|
757
|
-
typedef struct ReqOptSumScorer
|
758
|
-
{
|
759
|
-
Scorer *req_scorer;
|
760
|
-
Scorer *opt_scorer;
|
761
|
-
bool first_time_opt;
|
762
|
-
} ReqOptSumScorer;
|
763
|
-
|
764
|
-
/***************************************************************************
|
765
|
-
* ReqExclScorer
|
766
|
-
***************************************************************************/
|
767
|
-
|
768
|
-
typedef struct ReqExclScorer
|
769
|
-
{
|
770
|
-
Scorer *req_scorer;
|
771
|
-
Scorer *excl_scorer;
|
772
|
-
bool first_time;
|
773
|
-
} ReqExclScorer;
|
774
|
-
|
775
|
-
/***************************************************************************
|
776
|
-
* BooleanScorer
|
777
|
-
***************************************************************************/
|
778
|
-
|
779
|
-
typedef struct BooleanScorer
|
780
|
-
{
|
781
|
-
Scorer **required_scorers;
|
782
|
-
int rs_cnt;
|
783
|
-
int rs_capa;
|
784
|
-
Scorer **optional_scorers;
|
785
|
-
int os_cnt;
|
786
|
-
int os_capa;
|
787
|
-
Scorer **prohibited_scorers;
|
788
|
-
int ps_cnt;
|
789
|
-
int ps_capa;
|
790
|
-
Scorer *counting_sum_scorer;
|
791
|
-
Coordinator *coordinator;
|
792
|
-
} BooleanScorer;
|
793
|
-
|
794
|
-
extern Scorer *bsc_create(Similarity *similarity);
|
795
|
-
extern void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur);
|
796
|
-
|
797
|
-
/***************************************************************************
|
798
|
-
*
|
799
|
-
* PhraseScorer
|
800
|
-
*
|
801
|
-
***************************************************************************/
|
802
|
-
|
803
|
-
/***************************************************************************
|
804
|
-
* PhrasePosition
|
805
|
-
***************************************************************************/
|
806
|
-
typedef struct PhrasePosition
|
807
|
-
{
|
808
|
-
TermDocEnum *tpe;
|
809
|
-
int offset;
|
810
|
-
int count;
|
811
|
-
int doc;
|
812
|
-
int position;
|
813
|
-
} PhrasePosition;
|
814
|
-
|
815
|
-
extern PhrasePosition *pp_create(TermDocEnum *tpe, int offset);
|
816
|
-
|
817
|
-
/***************************************************************************
|
818
|
-
* PhraseScorer
|
819
|
-
***************************************************************************/
|
820
|
-
|
821
|
-
typedef struct PhraseScorer
|
822
|
-
{
|
823
|
-
float freq;
|
824
|
-
uchar *norms;
|
825
|
-
float value;
|
826
|
-
Weight *weight;
|
827
|
-
bool first_time : 1;
|
828
|
-
bool more : 1;
|
829
|
-
int pp_first;
|
830
|
-
int pp_last;
|
831
|
-
int pp_cnt;
|
832
|
-
PhrasePosition **phrase_pos;
|
833
|
-
float (*phrase_freq)(Scorer *self);
|
834
|
-
int slop;
|
835
|
-
} PhraseScorer;
|
836
|
-
|
837
|
-
extern Scorer *phsc_create(Weight *weight, TermDocEnum **term_pos_enum,
|
838
|
-
int *positions, int t_cnt, Similarity *similarity, uchar *norms);
|
839
|
-
|
840
|
-
/***************************************************************************
|
841
|
-
* ExactPhraseScorer
|
842
|
-
***************************************************************************/
|
843
|
-
|
844
|
-
extern Scorer *exact_phrase_scorer_create(Weight *weight,
|
845
|
-
TermDocEnum **term_pos_enum, int *positions, int t_cnt,
|
846
|
-
Similarity *similarity, uchar *norms);
|
847
|
-
|
848
|
-
/***************************************************************************
|
849
|
-
* SloppyPhraseScorer
|
850
|
-
***************************************************************************/
|
851
|
-
|
852
|
-
extern Scorer *sloppy_phrase_scorer_create(Weight *weight,
|
853
|
-
TermDocEnum **term_pos_enum, int *positions, int t_cnt,
|
854
|
-
Similarity *similarity, int slop, uchar *norms);
|
855
|
-
|
856
|
-
/***************************************************************************
|
857
|
-
*
|
858
|
-
* ConstantScoreScorer
|
859
|
-
*
|
860
|
-
***************************************************************************/
|
861
|
-
|
862
|
-
typedef struct ConstantScoreScorer
|
863
|
-
{
|
864
|
-
BitVector *bv;
|
865
|
-
float score;
|
866
|
-
} ConstantScoreScorer;
|
867
|
-
|
868
|
-
extern Scorer *cssc_create(Weight *weight, IndexReader *ir);
|
869
|
-
|
870
|
-
|
871
|
-
/***************************************************************************
|
872
|
-
*
|
873
|
-
* MatchAllScorer
|
874
|
-
*
|
875
|
-
***************************************************************************/
|
876
|
-
|
877
|
-
typedef struct MatchAllScorer
|
878
|
-
{
|
879
|
-
IndexReader *ir;
|
880
|
-
int max_doc;
|
881
|
-
float score;
|
882
|
-
} MatchAllScorer;
|
883
|
-
|
884
|
-
extern Scorer *masc_create(Weight *weight, IndexReader *ir);
|
885
|
-
|
886
|
-
|
887
|
-
/***************************************************************************
|
888
|
-
*
|
889
|
-
* SpanScorer
|
890
|
-
*
|
891
|
-
***************************************************************************/
|
892
|
-
|
893
|
-
typedef struct SpanScorer
|
894
|
-
{
|
895
|
-
bool first_time : 1;
|
896
|
-
bool more : 1;
|
897
|
-
IndexReader *ir;
|
898
|
-
SpanEnum *spans;
|
899
|
-
Similarity *sim;
|
900
|
-
uchar *norms;
|
901
|
-
Weight *weight;
|
902
|
-
float value;
|
903
|
-
float freq;
|
904
|
-
} SpanScorer;
|
905
|
-
|
906
|
-
extern Scorer *spansc_create(Weight *weight, IndexReader *ir);
|
907
|
-
|
908
578
|
/***************************************************************************
|
909
579
|
*
|
910
580
|
* Sort
|
911
581
|
*
|
912
582
|
***************************************************************************/
|
913
583
|
|
914
|
-
enum SORT_TYPE
|
584
|
+
enum SORT_TYPE
|
585
|
+
{
|
915
586
|
SORT_TYPE_SCORE,
|
916
587
|
SORT_TYPE_DOC,
|
588
|
+
SORT_TYPE_BYTE,
|
917
589
|
SORT_TYPE_INTEGER,
|
918
590
|
SORT_TYPE_FLOAT,
|
919
591
|
SORT_TYPE_STRING,
|
920
592
|
SORT_TYPE_AUTO
|
921
593
|
};
|
922
594
|
|
595
|
+
/***************************************************************************
|
596
|
+
* Comparable
|
597
|
+
***************************************************************************/
|
598
|
+
|
599
|
+
typedef struct Comparable
|
600
|
+
{
|
601
|
+
int type;
|
602
|
+
union {
|
603
|
+
int i;
|
604
|
+
float f;
|
605
|
+
char *s;
|
606
|
+
void *p;
|
607
|
+
} val;
|
608
|
+
bool reverse : 1;
|
609
|
+
} Comparable;
|
610
|
+
|
923
611
|
/***************************************************************************
|
924
612
|
* SortField
|
925
613
|
***************************************************************************/
|
926
614
|
|
927
615
|
typedef struct SortField
|
928
616
|
{
|
929
|
-
mutex_t
|
930
|
-
char
|
931
|
-
|
932
|
-
bool
|
933
|
-
void
|
934
|
-
int
|
935
|
-
void
|
936
|
-
void
|
937
|
-
void
|
617
|
+
mutex_t mutex;
|
618
|
+
char *field;
|
619
|
+
enum SORT_TYPE type;
|
620
|
+
bool reverse : 1;
|
621
|
+
void *index;
|
622
|
+
int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
|
623
|
+
void (*get_val)(void *index_ptr, Hit *hit1, Comparable *comparable);
|
624
|
+
void *(*create_index)(int size);
|
625
|
+
void (*destroy_index)(void *p);
|
626
|
+
void (*handle_term)(void *index, TermDocEnum *tde, char *text);
|
938
627
|
} SortField;
|
939
628
|
|
940
|
-
extern SortField *
|
941
|
-
extern SortField *
|
942
|
-
extern SortField *
|
943
|
-
extern SortField *
|
944
|
-
extern SortField *
|
945
|
-
extern SortField *
|
946
|
-
extern SortField *
|
629
|
+
extern SortField *sort_field_new(char *field, enum SORT_TYPE type, bool reverse);
|
630
|
+
extern SortField *sort_field_score_new(bool reverse);
|
631
|
+
extern SortField *sort_field_doc_new(bool reverse);
|
632
|
+
extern SortField *sort_field_int_new(char *field, bool reverse);
|
633
|
+
extern SortField *sort_field_byte_new(char *field, bool reverse);
|
634
|
+
extern SortField *sort_field_float_new(char *field, bool reverse);
|
635
|
+
extern SortField *sort_field_string_new(char *field, bool reverse);
|
636
|
+
extern SortField *sort_field_auto_new(char *field, bool reverse);
|
947
637
|
extern void sort_field_destroy(void *p);
|
948
638
|
extern char *sort_field_to_s(SortField *self);
|
949
639
|
|
950
|
-
extern SortField SORT_FIELD_SCORE;
|
951
|
-
extern SortField SORT_FIELD_SCORE_REV;
|
952
|
-
extern SortField SORT_FIELD_DOC;
|
953
|
-
extern SortField SORT_FIELD_DOC_REV;
|
640
|
+
extern const SortField SORT_FIELD_SCORE;
|
641
|
+
extern const SortField SORT_FIELD_SCORE_REV;
|
642
|
+
extern const SortField SORT_FIELD_DOC;
|
643
|
+
extern const SortField SORT_FIELD_DOC_REV;
|
954
644
|
|
955
645
|
/***************************************************************************
|
956
646
|
* Sort
|
@@ -959,12 +649,13 @@ extern SortField SORT_FIELD_DOC_REV;
|
|
959
649
|
typedef struct Sort
|
960
650
|
{
|
961
651
|
SortField **sort_fields;
|
962
|
-
int
|
963
|
-
int
|
652
|
+
int size;
|
653
|
+
int capa;
|
654
|
+
int start;
|
964
655
|
bool destroy_all : 1;
|
965
656
|
} Sort;
|
966
657
|
|
967
|
-
extern Sort *
|
658
|
+
extern Sort *sort_new();
|
968
659
|
extern void sort_destroy(void *p);
|
969
660
|
extern void sort_add_sort_field(Sort *self, SortField *sf);
|
970
661
|
extern void sort_clear(Sort *self);
|
@@ -978,7 +669,27 @@ extern Hit *fshq_pq_pop(PriorityQueue *pq);
|
|
978
669
|
extern void fshq_pq_down(PriorityQueue *pq);
|
979
670
|
extern void fshq_pq_insert(PriorityQueue *pq, Hit *hit);
|
980
671
|
extern void fshq_pq_destroy(PriorityQueue *pq);
|
981
|
-
extern PriorityQueue *
|
672
|
+
extern PriorityQueue *fshq_pq_new(int size, Sort *sort, IndexReader *ir);
|
673
|
+
extern Hit *fshq_pq_pop_fd(PriorityQueue *pq);
|
674
|
+
|
675
|
+
/***************************************************************************
|
676
|
+
* FieldDoc
|
677
|
+
***************************************************************************/
|
678
|
+
|
679
|
+
typedef struct FieldDoc
|
680
|
+
{
|
681
|
+
Hit hit;
|
682
|
+
int size;
|
683
|
+
Comparable comparables[];
|
684
|
+
} FieldDoc;
|
685
|
+
|
686
|
+
extern void fd_destroy(FieldDoc *fd);
|
687
|
+
|
688
|
+
/***************************************************************************
|
689
|
+
* FieldDocSortedHitQueue
|
690
|
+
***************************************************************************/
|
691
|
+
|
692
|
+
extern bool fdshq_lt(FieldDoc *fd1, FieldDoc *fd2);
|
982
693
|
|
983
694
|
/***************************************************************************
|
984
695
|
*
|
@@ -986,47 +697,91 @@ extern PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir);
|
|
986
697
|
*
|
987
698
|
***************************************************************************/
|
988
699
|
|
989
|
-
|
990
|
-
|
991
|
-
|
700
|
+
typedef bool (*filter_ft)(int doc_num, float score, Searcher *self);
|
701
|
+
|
702
|
+
struct Searcher
|
703
|
+
{
|
992
704
|
Similarity *similarity;
|
993
|
-
|
994
|
-
|
995
|
-
int *(*doc_freqs)(Searcher *self, Term **terms, int tcnt);
|
705
|
+
int (*doc_freq)(Searcher *self, const char *field,
|
706
|
+
const char *term);
|
996
707
|
Document *(*get_doc)(Searcher *self, int doc_num);
|
708
|
+
LazyDoc *(*get_lazy_doc)(Searcher *self, int doc_num);
|
997
709
|
int (*max_doc)(Searcher *self);
|
998
710
|
Weight *(*create_weight)(Searcher *self, Query *query);
|
999
711
|
TopDocs *(*search)(Searcher *self, Query *query, int first_doc,
|
1000
|
-
int num_docs, Filter *filter, Sort *sort
|
712
|
+
int num_docs, Filter *filter, Sort *sort,
|
713
|
+
filter_ft filter_func,
|
714
|
+
bool load_fields);
|
715
|
+
TopDocs *(*search_w)(Searcher *self, Weight *weight, int first_doc,
|
716
|
+
int num_docs, Filter *filter, Sort *sort,
|
717
|
+
filter_ft filter_func,
|
718
|
+
bool load_fields);
|
1001
719
|
void (*search_each)(Searcher *self, Query *query, Filter *filter,
|
1002
|
-
|
720
|
+
filter_ft filter_func,
|
721
|
+
void (*fn)(Searcher *, int, float, void *),
|
722
|
+
void *arg);
|
1003
723
|
void (*search_each_w)(Searcher *self, Weight *weight,
|
1004
|
-
Filter *filter,
|
724
|
+
Filter *filter,
|
725
|
+
filter_ft filter_func,
|
726
|
+
void (*fn)(Searcher *, int, float, void *),
|
1005
727
|
void *arg);
|
1006
728
|
Query *(*rewrite)(Searcher *self, Query *original);
|
1007
729
|
Explanation *(*explain)(Searcher *self, Query *query, int doc_num);
|
1008
730
|
Explanation *(*explain_w)(Searcher *self, Weight *weight, int doc_num);
|
731
|
+
TermVector *(*get_term_vector)(Searcher *self, const int doc_num,
|
732
|
+
const char *field);
|
1009
733
|
Similarity *(*get_similarity)(Searcher *self);
|
1010
734
|
void (*close)(Searcher *self);
|
735
|
+
void *arg; /* used to pass values to Searcher functions */
|
1011
736
|
};
|
1012
737
|
|
1013
|
-
#define
|
1014
|
-
#define
|
1015
|
-
#define
|
1016
|
-
#define
|
1017
|
-
#define
|
1018
|
-
|
1019
|
-
#define
|
1020
|
-
|
1021
|
-
#define
|
1022
|
-
|
1023
|
-
|
1024
|
-
#define
|
1025
|
-
|
1026
|
-
#define
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
738
|
+
#define searcher_doc_freq(s, t) s->doc_freq(s, t)
|
739
|
+
#define searcher_get_doc(s, dn) s->get_doc(s, dn)
|
740
|
+
#define searcher_get_lazy_doc(s, dn) s->get_lazy_doc(s, dn)
|
741
|
+
#define searcher_max_doc(s) s->max_doc(s)
|
742
|
+
#define searcher_rewrite(s, q) s->rewrite(s, q)
|
743
|
+
#define searcher_explain(s, q, dn) s->explain(s, q, dn)
|
744
|
+
#define searcher_explain_w(s, q, dn) s->explain_w(s, q, dn)
|
745
|
+
#define searcher_get_similarity(s) s->get_similarity(s)
|
746
|
+
#define searcher_close(s) s->close(s)
|
747
|
+
#define searcher_search(s, q, fd, nd, filt, sort, ff)\
|
748
|
+
s->search(s, q, fd, nd, filt, sort, ff, false)
|
749
|
+
#define searcher_search_fd(s, q, fd, nd, filt, sort, ff)\
|
750
|
+
s->search(s, q, fd, nd, filt, sort, ff, true)
|
751
|
+
#define searcher_search_each(s, q, filt, ff, fn, arg)\
|
752
|
+
s->search_each(s, q, filt, ff, fn, arg)
|
753
|
+
#define searcher_search_each_w(s, q, filt, ff, fn, arg)\
|
754
|
+
s->search_each_w(s, q, filt, ff, fn, arg)
|
755
|
+
|
756
|
+
|
757
|
+
extern MatchVector *searcher_get_match_vector(Searcher *self,
|
758
|
+
Query *query,
|
759
|
+
const int doc_num,
|
760
|
+
const char *field);
|
761
|
+
extern char **searcher_highlight(Searcher *self,
|
762
|
+
Query *query,
|
763
|
+
const int doc_num,
|
764
|
+
const char *field,
|
765
|
+
const int excerpt_len,
|
766
|
+
const int num_excerpts,
|
767
|
+
const char *pre_tag,
|
768
|
+
const char *post_tag,
|
769
|
+
const char *ellipsis);
|
770
|
+
|
771
|
+
/***************************************************************************
|
772
|
+
*
|
773
|
+
* IndexSearcher
|
774
|
+
*
|
775
|
+
***************************************************************************/
|
776
|
+
|
777
|
+
typedef struct IndexSearcher {
|
778
|
+
Searcher super;
|
779
|
+
IndexReader *ir;
|
780
|
+
bool close_ir : 1;
|
781
|
+
} IndexSearcher;
|
782
|
+
|
783
|
+
extern Searcher *isea_new(IndexReader *ir);
|
784
|
+
extern int isea_doc_freq(Searcher *self, const char *field, const char *term);
|
1030
785
|
|
1031
786
|
/***************************************************************************
|
1032
787
|
*
|
@@ -1036,15 +791,15 @@ extern Searcher *sea_create(IndexReader *ir);
|
|
1036
791
|
|
1037
792
|
typedef struct MultiSearcher
|
1038
793
|
{
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
int
|
1043
|
-
|
794
|
+
Searcher super;
|
795
|
+
int s_cnt;
|
796
|
+
Searcher **searchers;
|
797
|
+
int *starts;
|
798
|
+
int max_doc;
|
799
|
+
bool close_subs : 1;
|
1044
800
|
} MultiSearcher;
|
1045
801
|
|
1046
|
-
extern Searcher *
|
1047
|
-
bool close_subs);
|
802
|
+
extern Searcher *msea_new(Searcher **searchers, int s_cnt, bool close_subs);
|
1048
803
|
|
1049
804
|
/***************************************************************************
|
1050
805
|
*
|
@@ -1052,93 +807,39 @@ extern Searcher *msea_create(Searcher **searchers, int s_cnt,
|
|
1052
807
|
*
|
1053
808
|
***************************************************************************/
|
1054
809
|
|
1055
|
-
#define
|
810
|
+
#define QP_CONC_WORDS 2
|
811
|
+
#define QP_MAX_CLAUSES 512
|
1056
812
|
|
1057
813
|
typedef struct QParser
|
1058
814
|
{
|
1059
815
|
mutex_t mutex;
|
1060
|
-
bool or_default : 1;
|
1061
|
-
bool wild_lower : 1;
|
1062
|
-
bool clean_str : 1;
|
1063
|
-
bool handle_parse_errors : 1;
|
1064
|
-
bool allow_any_fields : 1;
|
1065
|
-
bool close_def_fields : 1;
|
1066
816
|
int def_slop;
|
817
|
+
int max_clauses;
|
818
|
+
int phq_pos_inc;
|
1067
819
|
char *qstr;
|
1068
820
|
char *qstrp;
|
1069
|
-
char buf[
|
821
|
+
char buf[QP_CONC_WORDS][MAX_WORD_SIZE];
|
1070
822
|
int buf_index;
|
823
|
+
HashTable *field_cache;
|
1071
824
|
HashSet *fields;
|
1072
825
|
HashSet *fields_buf;
|
1073
826
|
HashSet *def_fields;
|
1074
827
|
HashSet *all_fields;
|
1075
828
|
Analyzer *analyzer;
|
829
|
+
HashTable *ts_cache;
|
1076
830
|
Query *result;
|
831
|
+
bool or_default : 1;
|
832
|
+
bool wild_lower : 1;
|
833
|
+
bool clean_str : 1;
|
834
|
+
bool handle_parse_errors : 1;
|
835
|
+
bool allow_any_fields : 1;
|
836
|
+
bool close_def_fields : 1;
|
1077
837
|
} QParser;
|
1078
838
|
|
1079
|
-
extern QParser *
|
1080
|
-
|
839
|
+
extern QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
|
840
|
+
Analyzer *analyzer);
|
1081
841
|
extern void qp_destroy(QParser *self);
|
1082
842
|
extern Query *qp_parse(QParser *self, char *qstr);
|
1083
843
|
extern char *qp_clean_str(char *str);
|
1084
844
|
|
1085
|
-
/***************************************************************************
|
1086
|
-
*
|
1087
|
-
* Index
|
1088
|
-
*
|
1089
|
-
***************************************************************************/
|
1090
|
-
|
1091
|
-
typedef struct Index
|
1092
|
-
{
|
1093
|
-
mutex_t mutex;
|
1094
|
-
Store *store;
|
1095
|
-
Analyzer *analyzer;
|
1096
|
-
IndexReader *ir;
|
1097
|
-
IndexWriter *iw;
|
1098
|
-
Searcher *sea;
|
1099
|
-
QParser *qp;
|
1100
|
-
HashSet *key;
|
1101
|
-
char *id_field;
|
1102
|
-
char *def_field;
|
1103
|
-
/* for IndexWriter */
|
1104
|
-
bool use_compound_file : 1;
|
1105
|
-
bool auto_flush : 1;
|
1106
|
-
bool has_writes : 1;
|
1107
|
-
bool check_latest : 1;
|
1108
|
-
} Index;
|
1109
|
-
|
1110
|
-
extern Index *index_create(Store *store, Analyzer *analyzer,
|
1111
|
-
HashSet *def_fields, bool create);
|
1112
|
-
extern void index_destroy(Index *self);
|
1113
|
-
extern void index_flush(Index *self);
|
1114
|
-
extern int index_size(Index *self);
|
1115
|
-
extern void index_optimize(Index *self);
|
1116
|
-
extern bool index_has_del(Index *self);
|
1117
|
-
extern bool index_is_deleted(Index *self, int doc_num);
|
1118
|
-
extern void index_add_doc(Index *self, Document *doc);
|
1119
|
-
extern void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer);
|
1120
|
-
extern void index_add_string(Index *self, char *str, Analyzer *analyzer);
|
1121
|
-
extern void index_add_array(Index *self, Array *ary, Analyzer *analyzer);
|
1122
|
-
extern TopDocs *index_search_str(Index *self, char *query, int first_doc,
|
1123
|
-
int num_docs, Filter *filter, Sort *sort);
|
1124
|
-
extern Query *index_get_query(Index *self, char *qstr);
|
1125
|
-
extern Document *index_get_doc(Index *self, int doc_num);
|
1126
|
-
extern Document *index_get_doc_ts(Index *self, int doc_num);
|
1127
|
-
extern Document *index_get_doc_id(Index *self, char *id);
|
1128
|
-
extern Document *index_get_doc_term(Index *self, Term *term);
|
1129
|
-
extern void index_delete(Index *self, int doc_num);
|
1130
|
-
extern void index_delete_term(Index *self, Term *term);
|
1131
|
-
extern void index_delete_id(Index *self, char *id);
|
1132
|
-
extern void index_delete_query(Index *self, Query *q, Filter *f);
|
1133
|
-
extern void index_delete_query_str(Index *self, char *qstr, Filter *f);
|
1134
|
-
extern int index_term_id(Index *self, Term *term);
|
1135
|
-
extern Explanation *index_explain(Index *self, Query *q, int doc_num);
|
1136
|
-
extern void index_auto_flush_ir(Index *self);
|
1137
|
-
extern void index_auto_flush_iw(Index *self);
|
1138
|
-
|
1139
|
-
extern inline void ensure_searcher_open(Index *self);
|
1140
|
-
extern inline void ensure_reader_open(Index *self);
|
1141
|
-
extern inline void ensure_writer_open(Index *self);
|
1142
|
-
|
1143
845
|
#endif
|
1144
|
-
|