ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
data/ext/q_parser.c
CHANGED
@@ -80,19 +80,28 @@
|
|
80
80
|
|
81
81
|
|
82
82
|
/* Copy the first part of user declarations. */
|
83
|
-
#line 1 "src/
|
83
|
+
#line 1 "src/q_parser.y"
|
84
84
|
|
85
85
|
#include <string.h>
|
86
|
+
#include <ctype.h>
|
87
|
+
#include <wctype.h>
|
86
88
|
#include "search.h"
|
89
|
+
#include "array.h"
|
87
90
|
|
88
91
|
typedef struct Phrase {
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
int *w_capa;
|
92
|
+
int size;
|
93
|
+
int capa;
|
94
|
+
int pos_inc;
|
95
|
+
PhrasePosition *positions;
|
94
96
|
} Phrase;
|
95
97
|
|
98
|
+
#define BCA_INIT_CAPA 4
|
99
|
+
typedef struct BCArray {
|
100
|
+
int size;
|
101
|
+
int capa;
|
102
|
+
BooleanClause **clauses;
|
103
|
+
} BCArray;
|
104
|
+
|
96
105
|
|
97
106
|
|
98
107
|
/* Enabling traces. */
|
@@ -114,17 +123,17 @@ typedef struct Phrase {
|
|
114
123
|
#endif
|
115
124
|
|
116
125
|
#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
|
117
|
-
#line
|
126
|
+
#line 23 "src/q_parser.y"
|
118
127
|
typedef union YYSTYPE {
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
128
|
+
Query *query;
|
129
|
+
BooleanClause *bcls;
|
130
|
+
BCArray *bclss;
|
131
|
+
HashSet *hashset;
|
132
|
+
Phrase *phrase;
|
133
|
+
char *str;
|
125
134
|
} YYSTYPE;
|
126
135
|
/* Line 196 of yacc.c. */
|
127
|
-
#line
|
136
|
+
#line 137 "y.tab.c"
|
128
137
|
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
|
129
138
|
# define YYSTYPE_IS_DECLARED 1
|
130
139
|
# define YYSTYPE_IS_TRIVIAL 1
|
@@ -133,62 +142,58 @@ typedef union YYSTYPE {
|
|
133
142
|
|
134
143
|
|
135
144
|
/* Copy the second part of user declarations. */
|
136
|
-
#line
|
145
|
+
#line 31 "src/q_parser.y"
|
137
146
|
|
138
|
-
int yylex(YYSTYPE *lvalp, QParser *qp);
|
139
|
-
int yyerror(QParser *qp, char const *msg);
|
147
|
+
static int yylex(YYSTYPE *lvalp, QParser *qp);
|
148
|
+
static int yyerror(QParser *qp, char const *msg);
|
140
149
|
|
141
150
|
#define PHRASE_INIT_CAPA 4
|
142
|
-
Query *get_bool_q(
|
151
|
+
static Query *get_bool_q(BCArray *bca);
|
143
152
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
153
|
+
static BCArray *first_cls(BooleanClause *boolean_clause);
|
154
|
+
static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause);
|
155
|
+
static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause);
|
156
|
+
static BCArray *add_default_cls(QParser *qp, BCArray *bca, BooleanClause *clause);
|
148
157
|
|
149
|
-
BooleanClause *get_bool_cls(Query *q, unsigned int occur);
|
158
|
+
static BooleanClause *get_bool_cls(Query *q, unsigned int occur);
|
150
159
|
|
151
|
-
Query *get_term_q(QParser *qp, char *field, char *word);
|
152
|
-
Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop);
|
153
|
-
Query *get_wild_q(QParser *qp, char *field, char *pattern);
|
160
|
+
static Query *get_term_q(QParser *qp, char *field, char *word);
|
161
|
+
static Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop);
|
162
|
+
static Query *get_wild_q(QParser *qp, char *field, char *pattern);
|
154
163
|
|
155
|
-
HashSet *first_field(QParser *qp, char *field);
|
156
|
-
HashSet *add_field(QParser *qp, char *field);
|
164
|
+
static HashSet *first_field(QParser *qp, char *field);
|
165
|
+
static HashSet *add_field(QParser *qp, char *field);
|
157
166
|
|
158
|
-
Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop);
|
167
|
+
static Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop);
|
159
168
|
|
160
|
-
Phrase *ph_first_word(char *word);
|
161
|
-
Phrase *ph_add_word(Phrase *self, char *word);
|
162
|
-
Phrase *ph_add_multi_word(Phrase *self, char *word);
|
169
|
+
static Phrase *ph_first_word(char *word);
|
170
|
+
static Phrase *ph_add_word(Phrase *self, char *word);
|
171
|
+
static Phrase *ph_add_multi_word(Phrase *self, char *word);
|
163
172
|
|
164
|
-
Query *get_range_q(char *field, char *from, char *to,
|
165
|
-
|
173
|
+
static Query *get_range_q(const char *field, const char *from, const char *to,
|
174
|
+
bool inc_lower, bool inc_upper);
|
166
175
|
|
167
176
|
#define FLDS(q, func) do {\
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
if (((BooleanQuery *)q->data)->clause_cnt == 0) {\
|
183
|
-
q_deref(q);\
|
184
|
-
q = NULL;\
|
177
|
+
char *field;\
|
178
|
+
if (qp->fields->size == 0) {\
|
179
|
+
q = NULL;\
|
180
|
+
} else if (qp->fields->size == 1) {\
|
181
|
+
field = (char *)qp->fields->elems[0];\
|
182
|
+
q = func;\
|
183
|
+
} else {\
|
184
|
+
int i;Query *sq;\
|
185
|
+
q = bq_new(false);\
|
186
|
+
for (i = 0; i < qp->fields->size; i++) {\
|
187
|
+
field = (char *)qp->fields->elems[i];\
|
188
|
+
sq = func;\
|
189
|
+
if (sq) bq_add_query_nr(q, sq, BC_SHOULD);\
|
190
|
+
}\
|
185
191
|
}\
|
186
|
-
}\
|
187
192
|
} while (0)
|
188
193
|
|
189
194
|
|
190
195
|
/* Line 219 of yacc.c. */
|
191
|
-
#line
|
196
|
+
#line 197 "y.tab.c"
|
192
197
|
|
193
198
|
#if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__)
|
194
199
|
# define YYSIZE_T __SIZE_TYPE__
|
@@ -427,12 +432,12 @@ static const yysigned_char yyrhs[] =
|
|
427
432
|
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
|
428
433
|
static const unsigned char yyrline[] =
|
429
434
|
{
|
430
|
-
0,
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
435
|
+
0, 95, 95, 96, 98, 99, 100, 101, 103, 104,
|
436
|
+
105, 107, 108, 110, 111, 112, 113, 114, 115, 117,
|
437
|
+
118, 119, 121, 123, 123, 125, 125, 125, 128, 129,
|
438
|
+
131, 132, 133, 134, 136, 137, 138, 139, 140, 142,
|
439
|
+
143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
|
440
|
+
153
|
436
441
|
};
|
437
442
|
#endif
|
438
443
|
|
@@ -1240,217 +1245,217 @@ yyreduce:
|
|
1240
1245
|
switch (yyn)
|
1241
1246
|
{
|
1242
1247
|
case 2:
|
1243
|
-
#line
|
1248
|
+
#line 95 "src/q_parser.y"
|
1244
1249
|
{ qp->result = (yyval.query) = NULL; }
|
1245
1250
|
break;
|
1246
1251
|
|
1247
1252
|
case 3:
|
1248
|
-
#line
|
1249
|
-
{ qp->result = (yyval.query) = get_bool_q((yyvsp[0].
|
1253
|
+
#line 96 "src/q_parser.y"
|
1254
|
+
{ qp->result = (yyval.query) = get_bool_q((yyvsp[0].bclss)); }
|
1250
1255
|
break;
|
1251
1256
|
|
1252
1257
|
case 4:
|
1253
|
-
#line
|
1254
|
-
{ (yyval.
|
1258
|
+
#line 98 "src/q_parser.y"
|
1259
|
+
{ (yyval.bclss) = first_cls((yyvsp[0].bcls)); }
|
1255
1260
|
break;
|
1256
1261
|
|
1257
1262
|
case 5:
|
1258
|
-
#line
|
1259
|
-
{ (yyval.
|
1263
|
+
#line 99 "src/q_parser.y"
|
1264
|
+
{ (yyval.bclss) = add_and_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
|
1260
1265
|
break;
|
1261
1266
|
|
1262
1267
|
case 6:
|
1263
|
-
#line
|
1264
|
-
{ (yyval.
|
1268
|
+
#line 100 "src/q_parser.y"
|
1269
|
+
{ (yyval.bclss) = add_or_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
|
1265
1270
|
break;
|
1266
1271
|
|
1267
1272
|
case 7:
|
1268
|
-
#line
|
1269
|
-
{ (yyval.
|
1273
|
+
#line 101 "src/q_parser.y"
|
1274
|
+
{ (yyval.bclss) = add_default_cls(qp, (yyvsp[-1].bclss), (yyvsp[0].bcls)); }
|
1270
1275
|
break;
|
1271
1276
|
|
1272
1277
|
case 8:
|
1273
|
-
#line
|
1278
|
+
#line 103 "src/q_parser.y"
|
1274
1279
|
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST); }
|
1275
1280
|
break;
|
1276
1281
|
|
1277
1282
|
case 9:
|
1278
|
-
#line
|
1283
|
+
#line 104 "src/q_parser.y"
|
1279
1284
|
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST_NOT); }
|
1280
1285
|
break;
|
1281
1286
|
|
1282
1287
|
case 10:
|
1283
|
-
#line
|
1288
|
+
#line 105 "src/q_parser.y"
|
1284
1289
|
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_SHOULD); }
|
1285
1290
|
break;
|
1286
1291
|
|
1287
1292
|
case 12:
|
1288
|
-
#line
|
1293
|
+
#line 108 "src/q_parser.y"
|
1289
1294
|
{ if ((yyvsp[-2].query)) sscanf((yyvsp[0].str),"%f",&((yyvsp[-2].query)->boost)); (yyval.query)=(yyvsp[-2].query); }
|
1290
1295
|
break;
|
1291
1296
|
|
1292
1297
|
case 14:
|
1293
|
-
#line
|
1294
|
-
{ (yyval.query) = get_bool_q((yyvsp[-1].
|
1298
|
+
#line 111 "src/q_parser.y"
|
1299
|
+
{ (yyval.query) = get_bool_q((yyvsp[-1].bclss)); }
|
1295
1300
|
break;
|
1296
1301
|
|
1297
1302
|
case 19:
|
1298
|
-
#line
|
1303
|
+
#line 117 "src/q_parser.y"
|
1299
1304
|
{ FLDS((yyval.query), get_term_q(qp, field, (yyvsp[0].str))); }
|
1300
1305
|
break;
|
1301
1306
|
|
1302
1307
|
case 20:
|
1303
|
-
#line
|
1308
|
+
#line 118 "src/q_parser.y"
|
1304
1309
|
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-2].str), (yyvsp[0].str))); }
|
1305
1310
|
break;
|
1306
1311
|
|
1307
1312
|
case 21:
|
1308
|
-
#line
|
1313
|
+
#line 119 "src/q_parser.y"
|
1309
1314
|
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-1].str), NULL)); }
|
1310
1315
|
break;
|
1311
1316
|
|
1312
1317
|
case 22:
|
1313
|
-
#line
|
1318
|
+
#line 121 "src/q_parser.y"
|
1314
1319
|
{ FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[0].str))); }
|
1315
1320
|
break;
|
1316
1321
|
|
1317
1322
|
case 23:
|
1318
|
-
#line
|
1323
|
+
#line 123 "src/q_parser.y"
|
1319
1324
|
{ qp->fields = qp->def_fields; }
|
1320
1325
|
break;
|
1321
1326
|
|
1322
1327
|
case 24:
|
1323
|
-
#line
|
1328
|
+
#line 124 "src/q_parser.y"
|
1324
1329
|
{ (yyval.query) = (yyvsp[-1].query); }
|
1325
1330
|
break;
|
1326
1331
|
|
1327
1332
|
case 25:
|
1328
|
-
#line
|
1333
|
+
#line 125 "src/q_parser.y"
|
1329
1334
|
{ qp->fields = qp->all_fields; }
|
1330
1335
|
break;
|
1331
1336
|
|
1332
1337
|
case 26:
|
1333
|
-
#line
|
1338
|
+
#line 125 "src/q_parser.y"
|
1334
1339
|
{qp->fields = qp->def_fields;}
|
1335
1340
|
break;
|
1336
1341
|
|
1337
1342
|
case 27:
|
1338
|
-
#line
|
1343
|
+
#line 126 "src/q_parser.y"
|
1339
1344
|
{ (yyval.query) = (yyvsp[-1].query); }
|
1340
1345
|
break;
|
1341
1346
|
|
1342
1347
|
case 28:
|
1343
|
-
#line
|
1348
|
+
#line 128 "src/q_parser.y"
|
1344
1349
|
{ (yyval.hashset) = first_field(qp, (yyvsp[0].str)); }
|
1345
1350
|
break;
|
1346
1351
|
|
1347
1352
|
case 29:
|
1348
|
-
#line
|
1353
|
+
#line 129 "src/q_parser.y"
|
1349
1354
|
{ (yyval.hashset) = add_field(qp, (yyvsp[0].str));}
|
1350
1355
|
break;
|
1351
1356
|
|
1352
1357
|
case 30:
|
1353
|
-
#line
|
1358
|
+
#line 131 "src/q_parser.y"
|
1354
1359
|
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-1].phrase), NULL); }
|
1355
1360
|
break;
|
1356
1361
|
|
1357
1362
|
case 31:
|
1358
|
-
#line
|
1363
|
+
#line 132 "src/q_parser.y"
|
1359
1364
|
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-3].phrase), (yyvsp[0].str)); }
|
1360
1365
|
break;
|
1361
1366
|
|
1362
1367
|
case 32:
|
1363
|
-
#line
|
1368
|
+
#line 133 "src/q_parser.y"
|
1364
1369
|
{ (yyval.query) = NULL; }
|
1365
1370
|
break;
|
1366
1371
|
|
1367
1372
|
case 33:
|
1368
|
-
#line
|
1373
|
+
#line 134 "src/q_parser.y"
|
1369
1374
|
{ (yyval.query) = NULL; }
|
1370
1375
|
break;
|
1371
1376
|
|
1372
1377
|
case 34:
|
1373
|
-
#line
|
1378
|
+
#line 136 "src/q_parser.y"
|
1374
1379
|
{ (yyval.phrase) = ph_first_word((yyvsp[0].str)); }
|
1375
1380
|
break;
|
1376
1381
|
|
1377
1382
|
case 35:
|
1378
|
-
#line
|
1383
|
+
#line 137 "src/q_parser.y"
|
1379
1384
|
{ (yyval.phrase) = ph_first_word(NULL); }
|
1380
1385
|
break;
|
1381
1386
|
|
1382
1387
|
case 36:
|
1383
|
-
#line
|
1388
|
+
#line 138 "src/q_parser.y"
|
1384
1389
|
{ (yyval.phrase) = ph_add_word((yyvsp[-1].phrase), (yyvsp[0].str)); }
|
1385
1390
|
break;
|
1386
1391
|
|
1387
1392
|
case 37:
|
1388
|
-
#line
|
1393
|
+
#line 139 "src/q_parser.y"
|
1389
1394
|
{ (yyval.phrase) = ph_add_word((yyvsp[-2].phrase), NULL); }
|
1390
1395
|
break;
|
1391
1396
|
|
1392
1397
|
case 38:
|
1393
|
-
#line
|
1398
|
+
#line 140 "src/q_parser.y"
|
1394
1399
|
{ (yyval.phrase) = ph_add_multi_word((yyvsp[-2].phrase), (yyvsp[0].str)); }
|
1395
1400
|
break;
|
1396
1401
|
|
1397
1402
|
case 39:
|
1398
|
-
#line
|
1403
|
+
#line 142 "src/q_parser.y"
|
1399
1404
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, true)); }
|
1400
1405
|
break;
|
1401
1406
|
|
1402
1407
|
case 40:
|
1403
|
-
#line
|
1408
|
+
#line 143 "src/q_parser.y"
|
1404
1409
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, false)); }
|
1405
1410
|
break;
|
1406
1411
|
|
1407
1412
|
case 41:
|
1408
|
-
#line
|
1413
|
+
#line 144 "src/q_parser.y"
|
1409
1414
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, true)); }
|
1410
1415
|
break;
|
1411
1416
|
|
1412
1417
|
case 42:
|
1413
|
-
#line
|
1418
|
+
#line 145 "src/q_parser.y"
|
1414
1419
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, false)); }
|
1415
1420
|
break;
|
1416
1421
|
|
1417
1422
|
case 43:
|
1418
|
-
#line
|
1423
|
+
#line 146 "src/q_parser.y"
|
1419
1424
|
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, false)); }
|
1420
1425
|
break;
|
1421
1426
|
|
1422
1427
|
case 44:
|
1423
|
-
#line
|
1428
|
+
#line 147 "src/q_parser.y"
|
1424
1429
|
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, true)); }
|
1425
1430
|
break;
|
1426
1431
|
|
1427
1432
|
case 45:
|
1428
|
-
#line
|
1433
|
+
#line 148 "src/q_parser.y"
|
1429
1434
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,true, false)); }
|
1430
1435
|
break;
|
1431
1436
|
|
1432
1437
|
case 46:
|
1433
|
-
#line
|
1438
|
+
#line 149 "src/q_parser.y"
|
1434
1439
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,false, false)); }
|
1435
1440
|
break;
|
1436
1441
|
|
1437
1442
|
case 47:
|
1438
|
-
#line
|
1443
|
+
#line 150 "src/q_parser.y"
|
1439
1444
|
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, false)); }
|
1440
1445
|
break;
|
1441
1446
|
|
1442
1447
|
case 48:
|
1443
|
-
#line
|
1448
|
+
#line 151 "src/q_parser.y"
|
1444
1449
|
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, true)); }
|
1445
1450
|
break;
|
1446
1451
|
|
1447
1452
|
case 49:
|
1448
|
-
#line
|
1453
|
+
#line 152 "src/q_parser.y"
|
1449
1454
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,true, false)); }
|
1450
1455
|
break;
|
1451
1456
|
|
1452
1457
|
case 50:
|
1453
|
-
#line
|
1458
|
+
#line 153 "src/q_parser.y"
|
1454
1459
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,false, false)); }
|
1455
1460
|
break;
|
1456
1461
|
|
@@ -1459,7 +1464,7 @@ yyreduce:
|
|
1459
1464
|
}
|
1460
1465
|
|
1461
1466
|
/* Line 1126 of yacc.c. */
|
1462
|
-
#line
|
1467
|
+
#line 1468 "y.tab.c"
|
1463
1468
|
|
1464
1469
|
yyvsp -= yylen;
|
1465
1470
|
yyssp -= yylen;
|
@@ -1727,613 +1732,682 @@ yyreturn:
|
|
1727
1732
|
}
|
1728
1733
|
|
1729
1734
|
|
1730
|
-
#line
|
1735
|
+
#line 155 "src/q_parser.y"
|
1731
1736
|
|
1732
1737
|
|
1733
1738
|
const char *special_char = "&:()[]{}!+\"~^-|<>=*?";
|
1734
1739
|
const char *not_word = " \t&:()[]{}!+\"~^-|<>=";
|
1735
1740
|
|
1736
|
-
int get_word(YYSTYPE *lvalp, QParser *qp)
|
1741
|
+
static int get_word(YYSTYPE *lvalp, QParser *qp)
|
1737
1742
|
{
|
1738
|
-
|
1739
|
-
|
1740
|
-
|
1741
|
-
|
1742
|
-
|
1743
|
-
|
1744
|
-
|
1745
|
-
|
1746
|
-
|
1747
|
-
|
1748
|
-
|
1749
|
-
|
1750
|
-
|
1751
|
-
|
1752
|
-
|
1753
|
-
|
1754
|
-
|
1743
|
+
bool is_wild = false;
|
1744
|
+
int len;
|
1745
|
+
char c;
|
1746
|
+
char *buf = qp->buf[qp->buf_index];
|
1747
|
+
char *bufp = buf;
|
1748
|
+
qp->buf_index = (qp->buf_index + 1) % QP_CONC_WORDS;
|
1749
|
+
|
1750
|
+
qp->qstrp--; /* need to back up one character */
|
1751
|
+
|
1752
|
+
while (!strchr(not_word, (c=*qp->qstrp++))) {
|
1753
|
+
switch (c) {
|
1754
|
+
case '\\':
|
1755
|
+
if ((c=*qp->qstrp) == ' ' && c != '\t' && c != '\0') {
|
1756
|
+
*bufp++ = '\\';
|
1757
|
+
}
|
1758
|
+
else {
|
1759
|
+
*bufp++ = c;
|
1760
|
+
qp->qstrp++;
|
1761
|
+
}
|
1762
|
+
break;
|
1763
|
+
case '*': case '?':
|
1764
|
+
is_wild = true;
|
1765
|
+
/* fall through */
|
1766
|
+
default:
|
1767
|
+
*bufp++ = c;
|
1755
1768
|
}
|
1756
|
-
break;
|
1757
|
-
case '*': case '?':
|
1758
|
-
is_wild = true;
|
1759
|
-
default:
|
1760
|
-
*bufp++ = c;
|
1761
1769
|
}
|
1762
|
-
|
1763
|
-
|
1764
|
-
|
1765
|
-
|
1766
|
-
|
1767
|
-
|
1768
|
-
|
1769
|
-
|
1770
|
-
|
1771
|
-
|
1772
|
-
|
1773
|
-
|
1774
|
-
|
1775
|
-
|
1776
|
-
|
1777
|
-
|
1778
|
-
return WORD;
|
1770
|
+
qp->qstrp--;
|
1771
|
+
/* check for keywords. There are only four so we have a bit of a hack which
|
1772
|
+
* just checks for all of them. */
|
1773
|
+
*bufp = '\0';
|
1774
|
+
len = (int)(bufp - buf);
|
1775
|
+
if (len == 3) {
|
1776
|
+
if (buf[0] == 'A' && buf[1] == 'N' && buf[2] == 'D') return AND;
|
1777
|
+
if (buf[0] == 'N' && buf[1] == 'O' && buf[2] == 'T') return NOT;
|
1778
|
+
if (buf[0] == 'R' && buf[1] == 'E' && buf[2] == 'Q') return REQ;
|
1779
|
+
}
|
1780
|
+
if (len == 2 && buf[0] == 'O' && buf[1] == 'R') return OR;
|
1781
|
+
|
1782
|
+
/* found a word so return it. */
|
1783
|
+
lvalp->str = buf;
|
1784
|
+
if (is_wild) return WILD_STR;
|
1785
|
+
return WORD;
|
1779
1786
|
}
|
1780
1787
|
|
1781
|
-
int yylex(YYSTYPE *lvalp, QParser *qp)
|
1788
|
+
static int yylex(YYSTYPE *lvalp, QParser *qp)
|
1782
1789
|
{
|
1783
|
-
|
1790
|
+
char c, nc;
|
1784
1791
|
|
1785
|
-
|
1786
|
-
|
1787
|
-
if (c == '\0')
|
1788
|
-
return 0;
|
1792
|
+
while ((c=*qp->qstrp++) == ' ' || c == '\t') {
|
1793
|
+
}
|
1789
1794
|
|
1790
|
-
|
1791
|
-
|
1792
|
-
|
1793
|
-
|
1794
|
-
|
1795
|
-
|
1796
|
-
|
1797
|
-
|
1798
|
-
|
1799
|
-
|
1800
|
-
|
1801
|
-
|
1795
|
+
if (c == '\0') return 0;
|
1796
|
+
|
1797
|
+
if (strchr(special_char, c)) { /* comment */
|
1798
|
+
nc = *qp->qstrp;
|
1799
|
+
switch (c) {
|
1800
|
+
case '-': case '!': return NOT;
|
1801
|
+
case '+': return REQ;
|
1802
|
+
case '*':
|
1803
|
+
if (nc == ':') return c;
|
1804
|
+
break;
|
1805
|
+
case '&':
|
1806
|
+
if (nc == '&') {
|
1807
|
+
qp->qstrp++;
|
1808
|
+
return AND;
|
1809
|
+
}
|
1810
|
+
break; /* Don't return single & character. Use in word. */
|
1811
|
+
case '|':
|
1812
|
+
if (nc == '|') {
|
1813
|
+
qp->qstrp++;
|
1814
|
+
return OR;
|
1815
|
+
}
|
1816
|
+
default:
|
1817
|
+
return c;
|
1802
1818
|
}
|
1803
|
-
break; /* Don't return single & character. Use in word. */
|
1804
|
-
case '|':
|
1805
|
-
if (nc == '|') {
|
1806
|
-
qp->qstrp++;
|
1807
|
-
return OR;
|
1808
|
-
}
|
1809
|
-
default:
|
1810
|
-
return c;
|
1811
1819
|
}
|
1812
|
-
}
|
1813
1820
|
|
1814
|
-
|
1821
|
+
return get_word(lvalp, qp);
|
1815
1822
|
}
|
1816
1823
|
|
1817
|
-
int yyerror(QParser *qp, char const *msg)
|
1824
|
+
static int yyerror(QParser *qp, char const *msg)
|
1818
1825
|
{
|
1819
|
-
|
1820
|
-
|
1821
|
-
|
1822
|
-
|
1823
|
-
|
1826
|
+
if (!qp->handle_parse_errors) {
|
1827
|
+
char buf[1024];
|
1828
|
+
buf[1023] = '\0';
|
1829
|
+
strncpy(buf, qp->qstr, 1023);
|
1830
|
+
if (qp->clean_str) {
|
1831
|
+
free(qp->qstr);
|
1832
|
+
}
|
1833
|
+
mutex_unlock(&qp->mutex);
|
1834
|
+
RAISE(PARSE_ERROR, "couldn't parse query ``%s''. Error message "
|
1835
|
+
" was %se", buf, (char *)msg);
|
1836
|
+
}
|
1837
|
+
return 0;
|
1824
1838
|
}
|
1825
1839
|
|
1840
|
+
#define BQ(query) ((BooleanQuery *)(query))
|
1826
1841
|
|
1827
|
-
|
1842
|
+
static TokenStream *get_cached_ts(QParser *qp, char *field, char *text)
|
1828
1843
|
{
|
1829
|
-
|
1830
|
-
|
1831
|
-
|
1832
|
-
|
1833
|
-
|
1834
|
-
|
1835
|
-
|
1836
|
-
|
1837
|
-
|
1838
|
-
q = bc->query;
|
1839
|
-
free(bc);
|
1840
|
-
ary_destroy(bclauses);
|
1841
|
-
} else {
|
1842
|
-
q = bq_create(false);
|
1843
|
-
/* copy clauses into query */
|
1844
|
-
bq = (BooleanQuery *)q->data;
|
1845
|
-
bq->clause_cnt = bclauses->size;
|
1846
|
-
bq->clause_capa = bclauses->allocated;
|
1847
|
-
free(bq->clauses);
|
1848
|
-
bq->clauses = (BooleanClause **)bclauses->elems;
|
1849
|
-
free(bclauses);
|
1850
|
-
}
|
1851
|
-
return q;
|
1844
|
+
TokenStream *ts = h_get(qp->ts_cache, field);
|
1845
|
+
if (!ts) {
|
1846
|
+
ts = a_get_ts(qp->analyzer, field, text);
|
1847
|
+
h_set(qp->ts_cache, estrdup(field), ts);
|
1848
|
+
}
|
1849
|
+
else {
|
1850
|
+
ts->reset(ts, text);
|
1851
|
+
}
|
1852
|
+
return ts;
|
1852
1853
|
}
|
1853
1854
|
|
1854
|
-
|
1855
|
-
Array *first_cls(BooleanClause *cls)
|
1855
|
+
static char *get_cached_field(HashTable *field_cache, const char *field)
|
1856
1856
|
{
|
1857
|
-
|
1858
|
-
|
1859
|
-
|
1857
|
+
char *cached_field = h_get(field_cache, field);
|
1858
|
+
if (!cached_field) {
|
1859
|
+
cached_field = estrdup(field);
|
1860
|
+
h_set(field_cache, cached_field, cached_field);
|
1861
|
+
}
|
1862
|
+
return cached_field;
|
1860
1863
|
}
|
1861
1864
|
|
1862
|
-
|
1865
|
+
static Query *get_bool_q(BCArray *bca)
|
1863
1866
|
{
|
1864
|
-
|
1865
|
-
|
1866
|
-
if (clauses->size == 1) {
|
1867
|
-
last_cl = clauses->elems[0];
|
1868
|
-
if (!last_cl->is_prohibited) bc_set_occur(last_cl, BC_MUST);
|
1869
|
-
}
|
1867
|
+
Query *q;
|
1868
|
+
const int clause_count = bca->size;
|
1870
1869
|
|
1871
|
-
if (
|
1872
|
-
|
1873
|
-
|
1874
|
-
|
1870
|
+
if (clause_count == 0) {
|
1871
|
+
q = NULL;
|
1872
|
+
free(bca->clauses);
|
1873
|
+
}
|
1874
|
+
else if (clause_count == 1) {
|
1875
|
+
BooleanClause *bc = bca->clauses[0];
|
1876
|
+
q = bc->query;
|
1877
|
+
free(bc);
|
1878
|
+
free(bca->clauses);
|
1879
|
+
}
|
1880
|
+
else {
|
1881
|
+
q = bq_new(false);
|
1882
|
+
/* copy clauses into query */
|
1883
|
+
|
1884
|
+
BQ(q)->clause_cnt = clause_count;
|
1885
|
+
BQ(q)->clause_capa = bca->capa;
|
1886
|
+
free(BQ(q)->clauses);
|
1887
|
+
BQ(q)->clauses = bca->clauses;
|
1888
|
+
}
|
1889
|
+
free(bca);
|
1890
|
+
return q;
|
1875
1891
|
}
|
1876
1892
|
|
1877
|
-
|
1893
|
+
static void bca_add_clause(BCArray *bca, BooleanClause *clause)
|
1878
1894
|
{
|
1879
|
-
|
1880
|
-
|
1895
|
+
if (bca->size >= bca->capa) {
|
1896
|
+
bca->capa <<= 1;
|
1897
|
+
REALLOC_N(bca->clauses, BooleanClause *, bca->capa);
|
1898
|
+
}
|
1899
|
+
bca->clauses[bca->size] = clause;
|
1900
|
+
bca->size++;
|
1881
1901
|
}
|
1882
1902
|
|
1883
|
-
|
1903
|
+
static BCArray *first_cls(BooleanClause *clause)
|
1884
1904
|
{
|
1885
|
-
|
1886
|
-
|
1887
|
-
|
1888
|
-
|
1889
|
-
|
1890
|
-
|
1905
|
+
BCArray *bca = ALLOC_AND_ZERO(BCArray);
|
1906
|
+
bca->capa = BCA_INIT_CAPA;
|
1907
|
+
bca->clauses = ALLOC_N(BooleanClause *, BCA_INIT_CAPA);
|
1908
|
+
if (clause) {
|
1909
|
+
bca_add_clause(bca, clause);
|
1910
|
+
}
|
1911
|
+
return bca;
|
1891
1912
|
}
|
1892
1913
|
|
1893
|
-
|
1914
|
+
static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause)
|
1894
1915
|
{
|
1895
|
-
|
1896
|
-
|
1916
|
+
if (clause) {
|
1917
|
+
if (bca->size == 1) {
|
1918
|
+
if (!bca->clauses[0]->is_prohibited) {
|
1919
|
+
bc_set_occur(bca->clauses[0], BC_MUST);
|
1920
|
+
}
|
1921
|
+
}
|
1922
|
+
if (!clause->is_prohibited) {
|
1923
|
+
bc_set_occur(clause, BC_MUST);
|
1924
|
+
}
|
1925
|
+
bca_add_clause(bca, clause);
|
1926
|
+
}
|
1927
|
+
return bca;
|
1897
1928
|
}
|
1898
1929
|
|
1899
|
-
|
1930
|
+
static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause)
|
1900
1931
|
{
|
1901
|
-
|
1902
|
-
|
1903
|
-
TokenStream *stream = a_get_ts(qp->analyzer, field, word);
|
1904
|
-
|
1905
|
-
if ((token = ts_next(stream)) == NULL) {
|
1906
|
-
q = NULL;
|
1907
|
-
} else {
|
1908
|
-
Term *term = term_create(field, token->text);
|
1909
|
-
if ((token = ts_next(stream)) == NULL) {
|
1910
|
-
q = tq_create(term);
|
1911
|
-
} else {
|
1912
|
-
q = phq_create();
|
1913
|
-
phq_add_term(q, term, 0);
|
1914
|
-
do {
|
1915
|
-
phq_add_term(q, term_create(field, token->text), token->pos_inc);
|
1916
|
-
} while ((token = ts_next(stream)) != NULL);
|
1932
|
+
if (clause) {
|
1933
|
+
bca_add_clause(bca, clause);
|
1917
1934
|
}
|
1918
|
-
|
1919
|
-
return q;
|
1935
|
+
return bca;
|
1920
1936
|
}
|
1921
1937
|
|
1922
|
-
|
1938
|
+
static BCArray *add_default_cls(QParser *qp, BCArray *bca,
|
1939
|
+
BooleanClause *clause)
|
1923
1940
|
{
|
1924
|
-
|
1925
|
-
|
1926
|
-
|
1927
|
-
|
1928
|
-
|
1929
|
-
q = NULL;
|
1930
|
-
} else {
|
1931
|
-
/* it only makes sense to find one term in a fuzzy query */
|
1932
|
-
Term *term = term_create(field, token->text);
|
1933
|
-
if (slop_str) {
|
1934
|
-
float slop;
|
1935
|
-
sscanf(slop_str, "%f", &slop);
|
1936
|
-
q = fuzq_create_mp(term, slop, DEF_PRE_LEN);
|
1937
|
-
} else {
|
1938
|
-
q = fuzq_create(term);
|
1941
|
+
if (qp->or_default) {
|
1942
|
+
add_or_cls(bca, clause);
|
1943
|
+
}
|
1944
|
+
else {
|
1945
|
+
add_and_cls(bca, clause);
|
1939
1946
|
}
|
1940
|
-
|
1941
|
-
return q;
|
1947
|
+
return bca;
|
1942
1948
|
}
|
1943
1949
|
|
1944
|
-
|
1950
|
+
static BooleanClause *get_bool_cls(Query *q, unsigned int occur)
|
1945
1951
|
{
|
1946
|
-
|
1947
|
-
|
1948
|
-
|
1949
|
-
|
1950
|
-
|
1951
|
-
if (qp->wild_lower) lower_str(pattern);
|
1952
|
-
|
1953
|
-
/* simplify the wildcard query to a prefix query if possible. Basically a
|
1954
|
-
* prefix query is any wildcard query that has a '*' as the last character
|
1955
|
-
* and no other wildcard characters before it. */
|
1956
|
-
if (pattern[len-1] == '*') {
|
1957
|
-
is_prefix = true;
|
1958
|
-
for (p = &pattern[len-2]; p >= pattern; p--) {
|
1959
|
-
if (*p == '*' || *p == '?') {
|
1960
|
-
is_prefix = false;
|
1961
|
-
break;
|
1962
|
-
}
|
1952
|
+
if (q) {
|
1953
|
+
return bc_new(q, occur);
|
1954
|
+
}
|
1955
|
+
else {
|
1956
|
+
return NULL;
|
1963
1957
|
}
|
1964
|
-
}
|
1965
|
-
|
1966
|
-
if (is_prefix) {
|
1967
|
-
/* chop off the '*' temporarily to create the query */
|
1968
|
-
pattern[len-1] = 0;
|
1969
|
-
q = prefixq_create(term_create(field, pattern));;
|
1970
|
-
pattern[len-1] = '*';
|
1971
|
-
} else {
|
1972
|
-
q = wcq_create(term_create(field, pattern));;
|
1973
|
-
}
|
1974
|
-
return q;
|
1975
1958
|
}
|
1976
1959
|
|
1977
|
-
|
1960
|
+
static Query *get_term_q(QParser *qp, char *field, char *word)
|
1978
1961
|
{
|
1979
|
-
|
1980
|
-
|
1981
|
-
|
1982
|
-
|
1983
|
-
|
1984
|
-
|
1985
|
-
|
1986
|
-
|
1987
|
-
|
1962
|
+
Query *q;
|
1963
|
+
Token *token;
|
1964
|
+
TokenStream *stream = get_cached_ts(qp, field, word);
|
1965
|
+
|
1966
|
+
if ((token = ts_next(stream)) == NULL) {
|
1967
|
+
q = NULL;
|
1968
|
+
}
|
1969
|
+
else {
|
1970
|
+
q = tq_new(field, token->text);
|
1971
|
+
if ((token = ts_next(stream)) != NULL) {
|
1972
|
+
/* Less likely case, destroy the term query and create a
|
1973
|
+
* phrase query instead */
|
1974
|
+
Query *phq = phq_new(field);
|
1975
|
+
phq_add_term(phq, ((TermQuery *)q)->term, 0);
|
1976
|
+
q->destroy_i(q);
|
1977
|
+
q = phq;
|
1978
|
+
do {
|
1979
|
+
phq_add_term(q, token->text, token->pos_inc);
|
1980
|
+
} while ((token = ts_next(stream)) != NULL);
|
1981
|
+
}
|
1982
|
+
}
|
1983
|
+
return q;
|
1988
1984
|
}
|
1989
1985
|
|
1990
|
-
|
1986
|
+
static Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
|
1991
1987
|
{
|
1992
|
-
|
1993
|
-
|
1994
|
-
|
1995
|
-
|
1988
|
+
Query *q;
|
1989
|
+
Token *token;
|
1990
|
+
TokenStream *stream = get_cached_ts(qp, field, word);
|
1991
|
+
|
1992
|
+
if ((token = ts_next(stream)) == NULL) {
|
1993
|
+
q = NULL;
|
1994
|
+
}
|
1995
|
+
else {
|
1996
|
+
/* it only makes sense to find one term in a fuzzy query */
|
1997
|
+
float slop = DEF_MIN_SIM;
|
1998
|
+
if (slop_str) {
|
1999
|
+
sscanf(slop_str, "%f", &slop);
|
2000
|
+
}
|
2001
|
+
q = fuzq_new_conf(field, token->text, slop, DEF_PRE_LEN,
|
2002
|
+
qp->max_clauses);
|
2003
|
+
}
|
2004
|
+
return q;
|
1996
2005
|
}
|
1997
2006
|
|
1998
|
-
|
2007
|
+
static char *lower_str(char *str)
|
1999
2008
|
{
|
2000
|
-
|
2001
|
-
|
2002
|
-
|
2003
|
-
|
2009
|
+
const int max_len = (int)strlen(str) + 1;
|
2010
|
+
int cnt;
|
2011
|
+
wchar_t *wstr = ALLOC_N(wchar_t, max_len);
|
2012
|
+
if ((cnt = mbstowcs(wstr, str, max_len)) > 0) {
|
2013
|
+
wchar_t *w = wstr;
|
2014
|
+
while (*w) {
|
2015
|
+
*w = towlower(*w);
|
2016
|
+
w++;
|
2017
|
+
}
|
2018
|
+
wcstombs(str, wstr, max_len);
|
2004
2019
|
}
|
2005
|
-
|
2006
|
-
|
2007
|
-
|
2008
|
-
|
2009
|
-
|
2010
|
-
|
2020
|
+
else {
|
2021
|
+
char *s = str;
|
2022
|
+
while (*s) {
|
2023
|
+
*s = tolower(*s);
|
2024
|
+
s++;
|
2025
|
+
}
|
2026
|
+
}
|
2027
|
+
free(wstr);
|
2028
|
+
str[max_len] = '\0';
|
2029
|
+
return str;
|
2011
2030
|
}
|
2012
2031
|
|
2013
|
-
|
2014
|
-
Phrase *ph_create()
|
2032
|
+
static Query *get_wild_q(QParser *qp, char *field, char *pattern)
|
2015
2033
|
{
|
2016
|
-
|
2017
|
-
|
2018
|
-
|
2019
|
-
|
2020
|
-
|
2021
|
-
|
2022
|
-
|
2034
|
+
Query *q;
|
2035
|
+
bool is_prefix = false;
|
2036
|
+
char *p;
|
2037
|
+
int len = (int)strlen(pattern);
|
2038
|
+
|
2039
|
+
if (qp->wild_lower) {
|
2040
|
+
lower_str(pattern);
|
2041
|
+
}
|
2042
|
+
|
2043
|
+
/* simplify the wildcard query to a prefix query if possible. Basically a
|
2044
|
+
* prefix query is any wildcard query that has a '*' as the last character
|
2045
|
+
* and no other wildcard characters before it. */
|
2046
|
+
if (pattern[len - 1] == '*') {
|
2047
|
+
is_prefix = true;
|
2048
|
+
for (p = &pattern[len - 2]; p >= pattern; p--) {
|
2049
|
+
if (*p == '*' || *p == '?') {
|
2050
|
+
is_prefix = false;
|
2051
|
+
break;
|
2052
|
+
}
|
2053
|
+
}
|
2054
|
+
}
|
2055
|
+
if (is_prefix) {
|
2056
|
+
/* chop off the '*' temporarily to create the query */
|
2057
|
+
pattern[len - 1] = 0;
|
2058
|
+
q = prefixq_new(field, pattern);
|
2059
|
+
pattern[len - 1] = '*';
|
2060
|
+
}
|
2061
|
+
else {
|
2062
|
+
q = wcq_new(field, pattern);
|
2063
|
+
}
|
2064
|
+
MTQMaxTerms(q) = qp->max_clauses;
|
2065
|
+
return q;
|
2023
2066
|
}
|
2024
2067
|
|
2025
|
-
|
2068
|
+
static HashSet *add_field(QParser *qp, char *field)
|
2026
2069
|
{
|
2027
|
-
|
2028
|
-
|
2029
|
-
|
2030
|
-
|
2031
|
-
self->w_cnt[0] = self->w_capa[0] = 1;
|
2032
|
-
self->cnt = 1;
|
2033
|
-
}
|
2034
|
-
return self;
|
2070
|
+
if (qp->allow_any_fields || hs_exists(qp->all_fields, field)) {
|
2071
|
+
hs_add(qp->fields, get_cached_field(qp->field_cache, field));
|
2072
|
+
}
|
2073
|
+
return qp->fields;
|
2035
2074
|
}
|
2036
2075
|
|
2037
|
-
|
2076
|
+
static HashSet *first_field(QParser *qp, char *field)
|
2038
2077
|
{
|
2039
|
-
|
2040
|
-
|
2041
|
-
|
2042
|
-
|
2043
|
-
REALLOC_N(self->w_cnt, int, self->capa);
|
2044
|
-
REALLOC_N(self->w_capa, int, self->capa);
|
2045
|
-
}
|
2046
|
-
i = self->cnt;
|
2047
|
-
self->cnt++;
|
2048
|
-
self->words[i] = ALLOC(char *);
|
2049
|
-
self->words[i][0] = word ? estrdup(word) : NULL;
|
2050
|
-
self->w_cnt[i] = self->w_capa[i] = 1;
|
2051
|
-
return self;
|
2078
|
+
qp->fields = qp->fields_buf;
|
2079
|
+
qp->fields->size = 0;
|
2080
|
+
h_clear(qp->fields->ht);
|
2081
|
+
return add_field(qp, field);
|
2052
2082
|
}
|
2053
2083
|
|
2054
|
-
|
2084
|
+
static void ph_destroy(Phrase *self)
|
2055
2085
|
{
|
2056
|
-
|
2086
|
+
int i;
|
2087
|
+
for (i = 0; i < self->size; i++) {
|
2088
|
+
ary_destroy(self->positions[i].terms, &free);
|
2089
|
+
}
|
2090
|
+
free(self->positions);
|
2091
|
+
free(self);
|
2092
|
+
}
|
2057
2093
|
|
2058
|
-
if (!word) return self; /* no point in adding NULL in multi */
|
2059
2094
|
|
2060
|
-
|
2061
|
-
|
2062
|
-
|
2063
|
-
|
2064
|
-
self->
|
2065
|
-
self->w_cnt[i]++;
|
2095
|
+
static Phrase *ph_new()
|
2096
|
+
{
|
2097
|
+
Phrase *self = ALLOC_AND_ZERO(Phrase);
|
2098
|
+
self->capa = PHRASE_INIT_CAPA;
|
2099
|
+
self->positions = ALLOC_AND_ZERO_N(PhrasePosition, PHRASE_INIT_CAPA);
|
2066
2100
|
return self;
|
2067
2101
|
}
|
2068
2102
|
|
2069
|
-
|
2103
|
+
static Phrase *ph_first_word(char *word)
|
2070
2104
|
{
|
2071
|
-
|
2072
|
-
|
2073
|
-
|
2074
|
-
|
2075
|
-
|
2076
|
-
|
2077
|
-
Query *pq = phq_create();
|
2078
|
-
((PhraseQuery *)pq->data)->slop = slop;
|
2079
|
-
|
2080
|
-
for (i = 0; i < phrase->cnt; i++) {
|
2081
|
-
word = phrase->words[i][0];
|
2082
|
-
if (!word) {
|
2083
|
-
pos_inc++;
|
2084
|
-
} else {
|
2085
|
-
stream = a_get_ts(qp->analyzer, field, word);
|
2086
|
-
while ((token = ts_next(stream))) {
|
2087
|
-
phq_add_term(pq, term_create(field, token->text),
|
2088
|
-
token->pos_inc + pos_inc);
|
2089
|
-
pos_inc = 0;
|
2090
|
-
}
|
2105
|
+
Phrase *self = ph_new();
|
2106
|
+
if (word) { /* no point in adding NULL in start */
|
2107
|
+
self->positions[0].terms = ary_new_type_capa(char *, 1);
|
2108
|
+
ary_push(self->positions[0].terms, estrdup(word));
|
2109
|
+
self->size = 1;
|
2091
2110
|
}
|
2092
|
-
|
2093
|
-
return pq;
|
2111
|
+
return self;
|
2094
2112
|
}
|
2095
2113
|
|
2096
|
-
|
2114
|
+
static Phrase *ph_add_word(Phrase *self, char *word)
|
2097
2115
|
{
|
2098
|
-
|
2099
|
-
|
2100
|
-
|
2101
|
-
|
2102
|
-
|
2103
|
-
|
2104
|
-
|
2105
|
-
|
2106
|
-
Query *mpq = mphq_create();
|
2107
|
-
((MultiPhraseQuery *)mpq->data)->slop = slop;
|
2108
|
-
|
2109
|
-
for (i = 0; i < phrase->cnt; i++) {
|
2110
|
-
word = phrase->words[i][0];
|
2111
|
-
if (!word) {
|
2112
|
-
pos_inc++;
|
2113
|
-
} else {
|
2114
|
-
t_cnt = phrase->w_cnt[i];
|
2115
|
-
if (t_cnt > 1) {
|
2116
|
-
terms = ALLOC_N(Term *, t_cnt);
|
2117
|
-
for (j = 0; j < t_cnt; j++) {
|
2118
|
-
word = phrase->words[i][j];
|
2119
|
-
stream = a_get_ts(qp->analyzer, field, word);
|
2120
|
-
if ((token = ts_next(stream))) {
|
2121
|
-
terms[j] = term_create(field, token->text);
|
2122
|
-
} else {
|
2123
|
-
t_cnt--; j--;
|
2124
|
-
}
|
2125
|
-
}
|
2126
|
-
/* must advance at least one */
|
2127
|
-
mphq_add_terms(mpq, terms, t_cnt, pos_inc + 1);
|
2128
|
-
} else {
|
2129
|
-
stream = a_get_ts(qp->analyzer, field, word);
|
2130
|
-
while ((token = ts_next(stream))) {
|
2131
|
-
terms = ALLOC(Term *);
|
2132
|
-
terms[0] = term_create(field, token->text);
|
2133
|
-
mphq_add_terms(mpq, terms, 1, token->pos_inc + pos_inc);
|
2134
|
-
pos_inc = 0;
|
2116
|
+
if (word) {
|
2117
|
+
const int index = self->size;
|
2118
|
+
PhrasePosition *pp = self->positions;
|
2119
|
+
if (index >= self->capa) {
|
2120
|
+
self->capa <<= 1;
|
2121
|
+
REALLOC_N(pp, PhrasePosition, self->capa);
|
2122
|
+
self->positions = pp;
|
2135
2123
|
}
|
2136
|
-
|
2124
|
+
pp[index].pos = self->pos_inc;
|
2125
|
+
pp[index].terms = ary_new_type_capa(char *, 1);
|
2126
|
+
ary_push(pp[index].terms, estrdup(word));
|
2127
|
+
self->size++;
|
2128
|
+
self->pos_inc = 0;
|
2129
|
+
}
|
2130
|
+
else {
|
2131
|
+
self->pos_inc++;
|
2137
2132
|
}
|
2138
|
-
|
2139
|
-
return mpq;
|
2133
|
+
return self;
|
2140
2134
|
}
|
2141
2135
|
|
2142
|
-
|
2136
|
+
static Phrase *ph_add_multi_word(Phrase *self, char *word)
|
2143
2137
|
{
|
2144
|
-
|
2145
|
-
|
2146
|
-
|
2147
|
-
|
2148
|
-
|
2149
|
-
q = NULL;
|
2150
|
-
} else if (phrase->cnt == 1) {
|
2151
|
-
if (phrase->w_cnt[0] == 1) {
|
2152
|
-
FLDS(q, get_term_q(qp, field, phrase->words[0][0]));
|
2153
|
-
} else {
|
2154
|
-
Query *bq;
|
2155
|
-
q = bq_create(false);
|
2156
|
-
for (j = 0; j < phrase->w_cnt[0]; j++) {
|
2157
|
-
FLDS(bq, tq_create(term_create(field, phrase->words[0][j])));
|
2158
|
-
if (bq) bq_add_query(q, bq, BC_SHOULD);
|
2159
|
-
}
|
2138
|
+
const int index = self->size - 1;
|
2139
|
+
PhrasePosition *pp = self->positions;
|
2140
|
+
|
2141
|
+
if (word) {
|
2142
|
+
ary_push(pp[index].terms, estrdup(word));
|
2160
2143
|
}
|
2161
|
-
|
2162
|
-
|
2163
|
-
|
2164
|
-
|
2144
|
+
return self;
|
2145
|
+
}
|
2146
|
+
|
2147
|
+
static Query *get_phrase_query(QParser *qp, char *field,
|
2148
|
+
Phrase *phrase, char *slop_str)
|
2149
|
+
{
|
2150
|
+
const int pos_cnt = phrase->size;
|
2151
|
+
Query *q = NULL;
|
2152
|
+
|
2153
|
+
if (pos_cnt == 1) {
|
2154
|
+
char **words = phrase->positions[0].terms;
|
2155
|
+
const int word_count = ary_size(words);
|
2156
|
+
if (word_count == 1) {
|
2157
|
+
q = get_term_q(qp, field, words[0]);
|
2158
|
+
}
|
2159
|
+
else {
|
2160
|
+
int i;
|
2161
|
+
q = bq_new(false);
|
2162
|
+
for (i = 0; i < word_count; i++) {
|
2163
|
+
bq_add_query_nr(q, get_term_q(qp, field, words[i]), BC_SHOULD);
|
2164
|
+
}
|
2165
|
+
}
|
2165
2166
|
}
|
2166
|
-
|
2167
|
-
|
2168
|
-
|
2169
|
-
|
2170
|
-
|
2171
|
-
|
2167
|
+
else if (pos_cnt > 1) {
|
2168
|
+
Token *token;
|
2169
|
+
TokenStream *stream;
|
2170
|
+
int i, j;
|
2171
|
+
q = phq_new(field);
|
2172
|
+
if (slop_str) {
|
2173
|
+
int slop;
|
2174
|
+
sscanf(slop_str,"%d",&slop);
|
2175
|
+
((PhraseQuery *)q)->slop = slop;
|
2176
|
+
}
|
2177
|
+
|
2178
|
+
for (i = 0; i < pos_cnt; i++) {
|
2179
|
+
int pos_inc = phrase->positions[i].pos; /* Actually holds pos_inc */
|
2180
|
+
char **words = phrase->positions[i].terms;
|
2181
|
+
const int word_count = ary_size(words);
|
2182
|
+
|
2183
|
+
if (word_count == 1) {
|
2184
|
+
stream = get_cached_ts(qp, field, words[0]);
|
2185
|
+
while ((token = ts_next(stream))) {
|
2186
|
+
phq_add_term(q, token->text, token->pos_inc + pos_inc);
|
2187
|
+
pos_inc = 0;
|
2188
|
+
}
|
2189
|
+
}
|
2190
|
+
else {
|
2191
|
+
bool added_position = false;
|
2192
|
+
|
2193
|
+
for (j = 0; j < word_count; j++) {
|
2194
|
+
stream = get_cached_ts(qp, field, words[j]);
|
2195
|
+
if ((token = ts_next(stream))) {
|
2196
|
+
if (!added_position) {
|
2197
|
+
phq_add_term(q, token->text, token->pos_inc + pos_inc);
|
2198
|
+
added_position = true;
|
2199
|
+
}
|
2200
|
+
else {
|
2201
|
+
phq_append_multi_term(q, token->text);
|
2202
|
+
}
|
2203
|
+
}
|
2204
|
+
}
|
2205
|
+
}
|
2206
|
+
}
|
2172
2207
|
}
|
2173
|
-
|
2174
|
-
|
2175
|
-
|
2208
|
+
return q;
|
2209
|
+
}
|
2210
|
+
|
2211
|
+
static Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop_str)
|
2212
|
+
{
|
2213
|
+
Query *q;
|
2214
|
+
FLDS(q, get_phrase_query(qp, field, phrase, slop_str));
|
2215
|
+
ph_destroy(phrase);
|
2216
|
+
return q;
|
2176
2217
|
}
|
2177
2218
|
|
2178
|
-
Query *get_range_q(char *field, char *from, char *to,
|
2219
|
+
static Query *get_range_q(const char *field, const char *from, const char *to,
|
2220
|
+
bool inc_lower, bool inc_upper)
|
2179
2221
|
{
|
2180
|
-
|
2222
|
+
return rq_new(field, from, to, inc_lower, inc_upper);
|
2181
2223
|
}
|
2182
2224
|
|
2183
2225
|
void qp_destroy(QParser *self)
|
2184
2226
|
{
|
2185
|
-
|
2186
|
-
|
2187
|
-
|
2188
|
-
|
2189
|
-
|
2227
|
+
if (self->close_def_fields) {
|
2228
|
+
hs_destroy(self->def_fields);
|
2229
|
+
}
|
2230
|
+
hs_destroy(self->all_fields);
|
2231
|
+
hs_destroy(self->fields_buf);
|
2232
|
+
h_destroy(self->field_cache);
|
2233
|
+
h_destroy(self->ts_cache);
|
2234
|
+
a_deref(self->analyzer);
|
2235
|
+
free(self);
|
2190
2236
|
}
|
2191
2237
|
|
2192
|
-
QParser *
|
2238
|
+
QParser *qp_new(HashSet *all_fields, HashSet *def_fields, Analyzer *analyzer)
|
2193
2239
|
{
|
2194
|
-
|
2195
|
-
|
2196
|
-
|
2197
|
-
|
2198
|
-
|
2199
|
-
|
2200
|
-
|
2201
|
-
|
2202
|
-
|
2203
|
-
|
2204
|
-
|
2205
|
-
|
2206
|
-
|
2207
|
-
|
2208
|
-
|
2209
|
-
|
2240
|
+
int i;
|
2241
|
+
QParser *self = ALLOC(QParser);
|
2242
|
+
self->or_default = true;
|
2243
|
+
self->wild_lower = true;
|
2244
|
+
self->clean_str = false;
|
2245
|
+
self->max_clauses = QP_MAX_CLAUSES;
|
2246
|
+
self->handle_parse_errors = false;
|
2247
|
+
self->allow_any_fields = false;
|
2248
|
+
self->def_slop = 0;
|
2249
|
+
self->fields_buf = hs_new_str(NULL);
|
2250
|
+
self->all_fields = all_fields;
|
2251
|
+
if (def_fields) {
|
2252
|
+
self->def_fields = def_fields;
|
2253
|
+
for (i = 0; i < self->def_fields->size; i++) {
|
2254
|
+
if (!hs_exists(self->all_fields, self->def_fields->elems[i])) {
|
2255
|
+
hs_add(self->all_fields, estrdup(self->def_fields->elems[i]));
|
2256
|
+
}
|
2257
|
+
}
|
2258
|
+
self->close_def_fields = true;
|
2210
2259
|
}
|
2211
|
-
|
2212
|
-
|
2213
|
-
|
2214
|
-
|
2215
|
-
|
2216
|
-
|
2217
|
-
|
2218
|
-
|
2219
|
-
|
2220
|
-
|
2260
|
+
else {
|
2261
|
+
self->def_fields = all_fields;
|
2262
|
+
self->close_def_fields = false;
|
2263
|
+
}
|
2264
|
+
self->field_cache = h_new_str((free_ft)NULL, &free);
|
2265
|
+
for (i = 0; i < self->all_fields->size; i++) {
|
2266
|
+
char *field = estrdup(self->all_fields->elems[i]);
|
2267
|
+
h_set(self->field_cache, field, field);
|
2268
|
+
}
|
2269
|
+
self->fields = self->def_fields;
|
2270
|
+
/* make sure all_fields contains the default fields */
|
2271
|
+
self->analyzer = analyzer;
|
2272
|
+
self->ts_cache = h_new_str(&free, (free_ft)&ts_deref);
|
2273
|
+
self->buf_index = 0;
|
2274
|
+
mutex_init(&self->mutex, NULL);
|
2275
|
+
return self;
|
2221
2276
|
}
|
2222
2277
|
|
2223
2278
|
/* these chars have meaning within phrases */
|
2224
2279
|
static const char *PHRASE_CHARS = "<>|\"";
|
2225
2280
|
|
2226
|
-
void str_insert(char *str, int len, char chr)
|
2281
|
+
static void str_insert(char *str, int len, char chr)
|
2227
2282
|
{
|
2228
|
-
|
2229
|
-
|
2283
|
+
memmove(str+1, str, len*sizeof(char));
|
2284
|
+
*str = chr;
|
2230
2285
|
}
|
2231
2286
|
|
2232
2287
|
char *qp_clean_str(char *str)
|
2233
2288
|
{
|
2234
|
-
|
2235
|
-
|
2236
|
-
|
2237
|
-
|
2238
|
-
|
2239
|
-
|
2240
|
-
|
2241
|
-
|
2242
|
-
|
2243
|
-
|
2244
|
-
|
2245
|
-
|
2246
|
-
|
2247
|
-
|
2248
|
-
|
2249
|
-
|
2250
|
-
|
2251
|
-
|
2252
|
-
|
2253
|
-
continue;
|
2254
|
-
}
|
2255
|
-
switch (b) {
|
2256
|
-
case '\\':
|
2257
|
-
if (!quote_open) /* We do our own escaping below */
|
2258
|
-
*nsp++ = b;
|
2259
|
-
break;
|
2260
|
-
case '"':
|
2261
|
-
quote_open = !quote_open;
|
2262
|
-
*nsp++ = b;
|
2263
|
-
break;
|
2264
|
-
case '(':
|
2265
|
-
if (!quote_open) {
|
2266
|
-
br_cnt++;
|
2267
|
-
} else {
|
2268
|
-
*nsp++ = '\\';
|
2269
|
-
}
|
2270
|
-
*nsp++ = b;
|
2271
|
-
break;
|
2272
|
-
case ')':
|
2273
|
-
if (!quote_open) {
|
2274
|
-
if (br_cnt == 0) {
|
2275
|
-
str_insert(new_str, (int)(nsp - new_str), '(');
|
2276
|
-
nsp++;
|
2277
|
-
} else {
|
2278
|
-
br_cnt--;
|
2279
|
-
}
|
2280
|
-
} else {
|
2281
|
-
*nsp++ = '\\';
|
2282
|
-
}
|
2283
|
-
*nsp++ = b;
|
2284
|
-
break;
|
2285
|
-
case '>':
|
2286
|
-
if (quote_open) {
|
2287
|
-
if (pb == '<') {
|
2288
|
-
/* remove the escape character */
|
2289
|
-
nsp--;
|
2290
|
-
nsp[-1] = '<';
|
2291
|
-
} else {
|
2292
|
-
*nsp++ = '\\';
|
2293
|
-
}
|
2289
|
+
int b, pb = -1;
|
2290
|
+
int br_cnt = 0;
|
2291
|
+
bool quote_open = false;
|
2292
|
+
char *sp, *nsp;
|
2293
|
+
|
2294
|
+
/* leave a little extra */
|
2295
|
+
char *new_str = ALLOC_N(char, strlen(str)*2 + 1);
|
2296
|
+
|
2297
|
+
for (sp = str, nsp = new_str; *sp; sp++) {
|
2298
|
+
b = *sp;
|
2299
|
+
/* ignore escaped characters */
|
2300
|
+
if (pb == '\\') {
|
2301
|
+
if (quote_open && strrchr(PHRASE_CHARS, b)) {
|
2302
|
+
*nsp++ = '\\'; /* this was left off the first time through */
|
2303
|
+
}
|
2304
|
+
*nsp++ = b;
|
2305
|
+
/* \\ has escaped itself so has no power. Assign pb random char : */
|
2306
|
+
pb = ((b == '\\') ? ':' : b);
|
2307
|
+
continue;
|
2294
2308
|
}
|
2295
|
-
|
2296
|
-
|
2297
|
-
|
2298
|
-
|
2299
|
-
|
2300
|
-
|
2301
|
-
|
2309
|
+
switch (b) {
|
2310
|
+
case '\\':
|
2311
|
+
if (!quote_open) { /* We do our own escaping below */
|
2312
|
+
*nsp++ = b;
|
2313
|
+
}
|
2314
|
+
break;
|
2315
|
+
case '"':
|
2316
|
+
quote_open = !quote_open;
|
2317
|
+
*nsp++ = b;
|
2318
|
+
break;
|
2319
|
+
case '(':
|
2320
|
+
if (!quote_open) {
|
2321
|
+
br_cnt++;
|
2322
|
+
}
|
2323
|
+
else {
|
2324
|
+
*nsp++ = '\\';
|
2325
|
+
}
|
2326
|
+
*nsp++ = b;
|
2327
|
+
break;
|
2328
|
+
case ')':
|
2329
|
+
if (!quote_open) {
|
2330
|
+
if (br_cnt == 0) {
|
2331
|
+
str_insert(new_str, (int)(nsp - new_str), '(');
|
2332
|
+
nsp++;
|
2333
|
+
}
|
2334
|
+
else {
|
2335
|
+
br_cnt--;
|
2336
|
+
}
|
2337
|
+
}
|
2338
|
+
else {
|
2339
|
+
*nsp++ = '\\';
|
2340
|
+
}
|
2341
|
+
*nsp++ = b;
|
2342
|
+
break;
|
2343
|
+
case '>':
|
2344
|
+
if (quote_open) {
|
2345
|
+
if (pb == '<') {
|
2346
|
+
/* remove the escape character */
|
2347
|
+
nsp--;
|
2348
|
+
nsp[-1] = '<';
|
2349
|
+
}
|
2350
|
+
else {
|
2351
|
+
*nsp++ = '\\';
|
2352
|
+
}
|
2353
|
+
}
|
2354
|
+
*nsp++ = b;
|
2355
|
+
break;
|
2356
|
+
default:
|
2357
|
+
if (quote_open) {
|
2358
|
+
if (strrchr(special_char, b) && b != '|') {
|
2359
|
+
*nsp++ = '\\';
|
2360
|
+
}
|
2361
|
+
}
|
2362
|
+
*nsp++ = b;
|
2302
2363
|
}
|
2303
|
-
|
2364
|
+
pb = b;
|
2365
|
+
}
|
2366
|
+
if (quote_open) {
|
2367
|
+
*nsp++ = '"';
|
2304
2368
|
}
|
2305
|
-
|
2306
|
-
|
2307
|
-
|
2308
|
-
|
2309
|
-
|
2310
|
-
}
|
2311
|
-
*nsp = '\0';
|
2312
|
-
return new_str;
|
2369
|
+
for (;br_cnt > 0; br_cnt--) {
|
2370
|
+
*nsp++ = ')';
|
2371
|
+
}
|
2372
|
+
*nsp = '\0';
|
2373
|
+
return new_str;
|
2313
2374
|
}
|
2314
2375
|
|
2315
2376
|
Query *qp_get_bad_query(QParser *qp, char *str)
|
2316
2377
|
{
|
2317
|
-
|
2318
|
-
|
2319
|
-
|
2378
|
+
Query *q;
|
2379
|
+
FLDS(q, get_term_q(qp, field, str));
|
2380
|
+
return q;
|
2320
2381
|
}
|
2321
2382
|
|
2322
2383
|
Query *qp_parse(QParser *self, char *qstr)
|
2323
2384
|
{
|
2324
|
-
|
2325
|
-
self->
|
2326
|
-
|
2327
|
-
|
2328
|
-
|
2329
|
-
|
2330
|
-
|
2331
|
-
|
2332
|
-
|
2333
|
-
self->result =
|
2334
|
-
|
2335
|
-
|
2336
|
-
|
2385
|
+
Query *result;
|
2386
|
+
mutex_lock(&self->mutex);
|
2387
|
+
if (self->clean_str) {
|
2388
|
+
self->qstrp = self->qstr = qp_clean_str(qstr);
|
2389
|
+
}
|
2390
|
+
else {
|
2391
|
+
self->qstrp = self->qstr = qstr;
|
2392
|
+
}
|
2393
|
+
self->fields = self->def_fields;
|
2394
|
+
self->result = NULL;
|
2395
|
+
|
2396
|
+
yyparse(self);
|
2397
|
+
|
2398
|
+
result = self->result;
|
2399
|
+
if (!result && self->handle_parse_errors) {
|
2400
|
+
result = qp_get_bad_query(self, self->qstr);
|
2401
|
+
}
|
2402
|
+
if (!result) {
|
2403
|
+
result = bq_new(false);
|
2404
|
+
}
|
2405
|
+
if (self->clean_str) {
|
2406
|
+
free(self->qstr);
|
2407
|
+
}
|
2408
|
+
|
2409
|
+
mutex_unlock(&self->mutex);
|
2410
|
+
return result;
|
2337
2411
|
}
|
2338
2412
|
|
2339
2413
|
|