ferret 0.9.6 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
data/ext/q_parser.c
CHANGED
@@ -80,19 +80,28 @@
|
|
80
80
|
|
81
81
|
|
82
82
|
/* Copy the first part of user declarations. */
|
83
|
-
#line 1 "src/
|
83
|
+
#line 1 "src/q_parser.y"
|
84
84
|
|
85
85
|
#include <string.h>
|
86
|
+
#include <ctype.h>
|
87
|
+
#include <wctype.h>
|
86
88
|
#include "search.h"
|
89
|
+
#include "array.h"
|
87
90
|
|
88
91
|
typedef struct Phrase {
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
int *w_capa;
|
92
|
+
int size;
|
93
|
+
int capa;
|
94
|
+
int pos_inc;
|
95
|
+
PhrasePosition *positions;
|
94
96
|
} Phrase;
|
95
97
|
|
98
|
+
#define BCA_INIT_CAPA 4
|
99
|
+
typedef struct BCArray {
|
100
|
+
int size;
|
101
|
+
int capa;
|
102
|
+
BooleanClause **clauses;
|
103
|
+
} BCArray;
|
104
|
+
|
96
105
|
|
97
106
|
|
98
107
|
/* Enabling traces. */
|
@@ -114,17 +123,17 @@ typedef struct Phrase {
|
|
114
123
|
#endif
|
115
124
|
|
116
125
|
#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
|
117
|
-
#line
|
126
|
+
#line 23 "src/q_parser.y"
|
118
127
|
typedef union YYSTYPE {
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
128
|
+
Query *query;
|
129
|
+
BooleanClause *bcls;
|
130
|
+
BCArray *bclss;
|
131
|
+
HashSet *hashset;
|
132
|
+
Phrase *phrase;
|
133
|
+
char *str;
|
125
134
|
} YYSTYPE;
|
126
135
|
/* Line 196 of yacc.c. */
|
127
|
-
#line
|
136
|
+
#line 137 "y.tab.c"
|
128
137
|
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
|
129
138
|
# define YYSTYPE_IS_DECLARED 1
|
130
139
|
# define YYSTYPE_IS_TRIVIAL 1
|
@@ -133,62 +142,58 @@ typedef union YYSTYPE {
|
|
133
142
|
|
134
143
|
|
135
144
|
/* Copy the second part of user declarations. */
|
136
|
-
#line
|
145
|
+
#line 31 "src/q_parser.y"
|
137
146
|
|
138
|
-
int yylex(YYSTYPE *lvalp, QParser *qp);
|
139
|
-
int yyerror(QParser *qp, char const *msg);
|
147
|
+
static int yylex(YYSTYPE *lvalp, QParser *qp);
|
148
|
+
static int yyerror(QParser *qp, char const *msg);
|
140
149
|
|
141
150
|
#define PHRASE_INIT_CAPA 4
|
142
|
-
Query *get_bool_q(
|
151
|
+
static Query *get_bool_q(BCArray *bca);
|
143
152
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
153
|
+
static BCArray *first_cls(BooleanClause *boolean_clause);
|
154
|
+
static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause);
|
155
|
+
static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause);
|
156
|
+
static BCArray *add_default_cls(QParser *qp, BCArray *bca, BooleanClause *clause);
|
148
157
|
|
149
|
-
BooleanClause *get_bool_cls(Query *q, unsigned int occur);
|
158
|
+
static BooleanClause *get_bool_cls(Query *q, unsigned int occur);
|
150
159
|
|
151
|
-
Query *get_term_q(QParser *qp, char *field, char *word);
|
152
|
-
Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop);
|
153
|
-
Query *get_wild_q(QParser *qp, char *field, char *pattern);
|
160
|
+
static Query *get_term_q(QParser *qp, char *field, char *word);
|
161
|
+
static Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop);
|
162
|
+
static Query *get_wild_q(QParser *qp, char *field, char *pattern);
|
154
163
|
|
155
|
-
HashSet *first_field(QParser *qp, char *field);
|
156
|
-
HashSet *add_field(QParser *qp, char *field);
|
164
|
+
static HashSet *first_field(QParser *qp, char *field);
|
165
|
+
static HashSet *add_field(QParser *qp, char *field);
|
157
166
|
|
158
|
-
Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop);
|
167
|
+
static Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop);
|
159
168
|
|
160
|
-
Phrase *ph_first_word(char *word);
|
161
|
-
Phrase *ph_add_word(Phrase *self, char *word);
|
162
|
-
Phrase *ph_add_multi_word(Phrase *self, char *word);
|
169
|
+
static Phrase *ph_first_word(char *word);
|
170
|
+
static Phrase *ph_add_word(Phrase *self, char *word);
|
171
|
+
static Phrase *ph_add_multi_word(Phrase *self, char *word);
|
163
172
|
|
164
|
-
Query *get_range_q(char *field, char *from, char *to,
|
165
|
-
|
173
|
+
static Query *get_range_q(const char *field, const char *from, const char *to,
|
174
|
+
bool inc_lower, bool inc_upper);
|
166
175
|
|
167
176
|
#define FLDS(q, func) do {\
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
if (((BooleanQuery *)q->data)->clause_cnt == 0) {\
|
183
|
-
q_deref(q);\
|
184
|
-
q = NULL;\
|
177
|
+
char *field;\
|
178
|
+
if (qp->fields->size == 0) {\
|
179
|
+
q = NULL;\
|
180
|
+
} else if (qp->fields->size == 1) {\
|
181
|
+
field = (char *)qp->fields->elems[0];\
|
182
|
+
q = func;\
|
183
|
+
} else {\
|
184
|
+
int i;Query *sq;\
|
185
|
+
q = bq_new(false);\
|
186
|
+
for (i = 0; i < qp->fields->size; i++) {\
|
187
|
+
field = (char *)qp->fields->elems[i];\
|
188
|
+
sq = func;\
|
189
|
+
if (sq) bq_add_query_nr(q, sq, BC_SHOULD);\
|
190
|
+
}\
|
185
191
|
}\
|
186
|
-
}\
|
187
192
|
} while (0)
|
188
193
|
|
189
194
|
|
190
195
|
/* Line 219 of yacc.c. */
|
191
|
-
#line
|
196
|
+
#line 197 "y.tab.c"
|
192
197
|
|
193
198
|
#if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__)
|
194
199
|
# define YYSIZE_T __SIZE_TYPE__
|
@@ -427,12 +432,12 @@ static const yysigned_char yyrhs[] =
|
|
427
432
|
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
|
428
433
|
static const unsigned char yyrline[] =
|
429
434
|
{
|
430
|
-
0,
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
435
|
+
0, 95, 95, 96, 98, 99, 100, 101, 103, 104,
|
436
|
+
105, 107, 108, 110, 111, 112, 113, 114, 115, 117,
|
437
|
+
118, 119, 121, 123, 123, 125, 125, 125, 128, 129,
|
438
|
+
131, 132, 133, 134, 136, 137, 138, 139, 140, 142,
|
439
|
+
143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
|
440
|
+
153
|
436
441
|
};
|
437
442
|
#endif
|
438
443
|
|
@@ -1240,217 +1245,217 @@ yyreduce:
|
|
1240
1245
|
switch (yyn)
|
1241
1246
|
{
|
1242
1247
|
case 2:
|
1243
|
-
#line
|
1248
|
+
#line 95 "src/q_parser.y"
|
1244
1249
|
{ qp->result = (yyval.query) = NULL; }
|
1245
1250
|
break;
|
1246
1251
|
|
1247
1252
|
case 3:
|
1248
|
-
#line
|
1249
|
-
{ qp->result = (yyval.query) = get_bool_q((yyvsp[0].
|
1253
|
+
#line 96 "src/q_parser.y"
|
1254
|
+
{ qp->result = (yyval.query) = get_bool_q((yyvsp[0].bclss)); }
|
1250
1255
|
break;
|
1251
1256
|
|
1252
1257
|
case 4:
|
1253
|
-
#line
|
1254
|
-
{ (yyval.
|
1258
|
+
#line 98 "src/q_parser.y"
|
1259
|
+
{ (yyval.bclss) = first_cls((yyvsp[0].bcls)); }
|
1255
1260
|
break;
|
1256
1261
|
|
1257
1262
|
case 5:
|
1258
|
-
#line
|
1259
|
-
{ (yyval.
|
1263
|
+
#line 99 "src/q_parser.y"
|
1264
|
+
{ (yyval.bclss) = add_and_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
|
1260
1265
|
break;
|
1261
1266
|
|
1262
1267
|
case 6:
|
1263
|
-
#line
|
1264
|
-
{ (yyval.
|
1268
|
+
#line 100 "src/q_parser.y"
|
1269
|
+
{ (yyval.bclss) = add_or_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
|
1265
1270
|
break;
|
1266
1271
|
|
1267
1272
|
case 7:
|
1268
|
-
#line
|
1269
|
-
{ (yyval.
|
1273
|
+
#line 101 "src/q_parser.y"
|
1274
|
+
{ (yyval.bclss) = add_default_cls(qp, (yyvsp[-1].bclss), (yyvsp[0].bcls)); }
|
1270
1275
|
break;
|
1271
1276
|
|
1272
1277
|
case 8:
|
1273
|
-
#line
|
1278
|
+
#line 103 "src/q_parser.y"
|
1274
1279
|
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST); }
|
1275
1280
|
break;
|
1276
1281
|
|
1277
1282
|
case 9:
|
1278
|
-
#line
|
1283
|
+
#line 104 "src/q_parser.y"
|
1279
1284
|
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST_NOT); }
|
1280
1285
|
break;
|
1281
1286
|
|
1282
1287
|
case 10:
|
1283
|
-
#line
|
1288
|
+
#line 105 "src/q_parser.y"
|
1284
1289
|
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_SHOULD); }
|
1285
1290
|
break;
|
1286
1291
|
|
1287
1292
|
case 12:
|
1288
|
-
#line
|
1293
|
+
#line 108 "src/q_parser.y"
|
1289
1294
|
{ if ((yyvsp[-2].query)) sscanf((yyvsp[0].str),"%f",&((yyvsp[-2].query)->boost)); (yyval.query)=(yyvsp[-2].query); }
|
1290
1295
|
break;
|
1291
1296
|
|
1292
1297
|
case 14:
|
1293
|
-
#line
|
1294
|
-
{ (yyval.query) = get_bool_q((yyvsp[-1].
|
1298
|
+
#line 111 "src/q_parser.y"
|
1299
|
+
{ (yyval.query) = get_bool_q((yyvsp[-1].bclss)); }
|
1295
1300
|
break;
|
1296
1301
|
|
1297
1302
|
case 19:
|
1298
|
-
#line
|
1303
|
+
#line 117 "src/q_parser.y"
|
1299
1304
|
{ FLDS((yyval.query), get_term_q(qp, field, (yyvsp[0].str))); }
|
1300
1305
|
break;
|
1301
1306
|
|
1302
1307
|
case 20:
|
1303
|
-
#line
|
1308
|
+
#line 118 "src/q_parser.y"
|
1304
1309
|
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-2].str), (yyvsp[0].str))); }
|
1305
1310
|
break;
|
1306
1311
|
|
1307
1312
|
case 21:
|
1308
|
-
#line
|
1313
|
+
#line 119 "src/q_parser.y"
|
1309
1314
|
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-1].str), NULL)); }
|
1310
1315
|
break;
|
1311
1316
|
|
1312
1317
|
case 22:
|
1313
|
-
#line
|
1318
|
+
#line 121 "src/q_parser.y"
|
1314
1319
|
{ FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[0].str))); }
|
1315
1320
|
break;
|
1316
1321
|
|
1317
1322
|
case 23:
|
1318
|
-
#line
|
1323
|
+
#line 123 "src/q_parser.y"
|
1319
1324
|
{ qp->fields = qp->def_fields; }
|
1320
1325
|
break;
|
1321
1326
|
|
1322
1327
|
case 24:
|
1323
|
-
#line
|
1328
|
+
#line 124 "src/q_parser.y"
|
1324
1329
|
{ (yyval.query) = (yyvsp[-1].query); }
|
1325
1330
|
break;
|
1326
1331
|
|
1327
1332
|
case 25:
|
1328
|
-
#line
|
1333
|
+
#line 125 "src/q_parser.y"
|
1329
1334
|
{ qp->fields = qp->all_fields; }
|
1330
1335
|
break;
|
1331
1336
|
|
1332
1337
|
case 26:
|
1333
|
-
#line
|
1338
|
+
#line 125 "src/q_parser.y"
|
1334
1339
|
{qp->fields = qp->def_fields;}
|
1335
1340
|
break;
|
1336
1341
|
|
1337
1342
|
case 27:
|
1338
|
-
#line
|
1343
|
+
#line 126 "src/q_parser.y"
|
1339
1344
|
{ (yyval.query) = (yyvsp[-1].query); }
|
1340
1345
|
break;
|
1341
1346
|
|
1342
1347
|
case 28:
|
1343
|
-
#line
|
1348
|
+
#line 128 "src/q_parser.y"
|
1344
1349
|
{ (yyval.hashset) = first_field(qp, (yyvsp[0].str)); }
|
1345
1350
|
break;
|
1346
1351
|
|
1347
1352
|
case 29:
|
1348
|
-
#line
|
1353
|
+
#line 129 "src/q_parser.y"
|
1349
1354
|
{ (yyval.hashset) = add_field(qp, (yyvsp[0].str));}
|
1350
1355
|
break;
|
1351
1356
|
|
1352
1357
|
case 30:
|
1353
|
-
#line
|
1358
|
+
#line 131 "src/q_parser.y"
|
1354
1359
|
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-1].phrase), NULL); }
|
1355
1360
|
break;
|
1356
1361
|
|
1357
1362
|
case 31:
|
1358
|
-
#line
|
1363
|
+
#line 132 "src/q_parser.y"
|
1359
1364
|
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-3].phrase), (yyvsp[0].str)); }
|
1360
1365
|
break;
|
1361
1366
|
|
1362
1367
|
case 32:
|
1363
|
-
#line
|
1368
|
+
#line 133 "src/q_parser.y"
|
1364
1369
|
{ (yyval.query) = NULL; }
|
1365
1370
|
break;
|
1366
1371
|
|
1367
1372
|
case 33:
|
1368
|
-
#line
|
1373
|
+
#line 134 "src/q_parser.y"
|
1369
1374
|
{ (yyval.query) = NULL; }
|
1370
1375
|
break;
|
1371
1376
|
|
1372
1377
|
case 34:
|
1373
|
-
#line
|
1378
|
+
#line 136 "src/q_parser.y"
|
1374
1379
|
{ (yyval.phrase) = ph_first_word((yyvsp[0].str)); }
|
1375
1380
|
break;
|
1376
1381
|
|
1377
1382
|
case 35:
|
1378
|
-
#line
|
1383
|
+
#line 137 "src/q_parser.y"
|
1379
1384
|
{ (yyval.phrase) = ph_first_word(NULL); }
|
1380
1385
|
break;
|
1381
1386
|
|
1382
1387
|
case 36:
|
1383
|
-
#line
|
1388
|
+
#line 138 "src/q_parser.y"
|
1384
1389
|
{ (yyval.phrase) = ph_add_word((yyvsp[-1].phrase), (yyvsp[0].str)); }
|
1385
1390
|
break;
|
1386
1391
|
|
1387
1392
|
case 37:
|
1388
|
-
#line
|
1393
|
+
#line 139 "src/q_parser.y"
|
1389
1394
|
{ (yyval.phrase) = ph_add_word((yyvsp[-2].phrase), NULL); }
|
1390
1395
|
break;
|
1391
1396
|
|
1392
1397
|
case 38:
|
1393
|
-
#line
|
1398
|
+
#line 140 "src/q_parser.y"
|
1394
1399
|
{ (yyval.phrase) = ph_add_multi_word((yyvsp[-2].phrase), (yyvsp[0].str)); }
|
1395
1400
|
break;
|
1396
1401
|
|
1397
1402
|
case 39:
|
1398
|
-
#line
|
1403
|
+
#line 142 "src/q_parser.y"
|
1399
1404
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, true)); }
|
1400
1405
|
break;
|
1401
1406
|
|
1402
1407
|
case 40:
|
1403
|
-
#line
|
1408
|
+
#line 143 "src/q_parser.y"
|
1404
1409
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, false)); }
|
1405
1410
|
break;
|
1406
1411
|
|
1407
1412
|
case 41:
|
1408
|
-
#line
|
1413
|
+
#line 144 "src/q_parser.y"
|
1409
1414
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, true)); }
|
1410
1415
|
break;
|
1411
1416
|
|
1412
1417
|
case 42:
|
1413
|
-
#line
|
1418
|
+
#line 145 "src/q_parser.y"
|
1414
1419
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, false)); }
|
1415
1420
|
break;
|
1416
1421
|
|
1417
1422
|
case 43:
|
1418
|
-
#line
|
1423
|
+
#line 146 "src/q_parser.y"
|
1419
1424
|
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, false)); }
|
1420
1425
|
break;
|
1421
1426
|
|
1422
1427
|
case 44:
|
1423
|
-
#line
|
1428
|
+
#line 147 "src/q_parser.y"
|
1424
1429
|
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, true)); }
|
1425
1430
|
break;
|
1426
1431
|
|
1427
1432
|
case 45:
|
1428
|
-
#line
|
1433
|
+
#line 148 "src/q_parser.y"
|
1429
1434
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,true, false)); }
|
1430
1435
|
break;
|
1431
1436
|
|
1432
1437
|
case 46:
|
1433
|
-
#line
|
1438
|
+
#line 149 "src/q_parser.y"
|
1434
1439
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,false, false)); }
|
1435
1440
|
break;
|
1436
1441
|
|
1437
1442
|
case 47:
|
1438
|
-
#line
|
1443
|
+
#line 150 "src/q_parser.y"
|
1439
1444
|
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, false)); }
|
1440
1445
|
break;
|
1441
1446
|
|
1442
1447
|
case 48:
|
1443
|
-
#line
|
1448
|
+
#line 151 "src/q_parser.y"
|
1444
1449
|
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, true)); }
|
1445
1450
|
break;
|
1446
1451
|
|
1447
1452
|
case 49:
|
1448
|
-
#line
|
1453
|
+
#line 152 "src/q_parser.y"
|
1449
1454
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,true, false)); }
|
1450
1455
|
break;
|
1451
1456
|
|
1452
1457
|
case 50:
|
1453
|
-
#line
|
1458
|
+
#line 153 "src/q_parser.y"
|
1454
1459
|
{ FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,false, false)); }
|
1455
1460
|
break;
|
1456
1461
|
|
@@ -1459,7 +1464,7 @@ yyreduce:
|
|
1459
1464
|
}
|
1460
1465
|
|
1461
1466
|
/* Line 1126 of yacc.c. */
|
1462
|
-
#line
|
1467
|
+
#line 1468 "y.tab.c"
|
1463
1468
|
|
1464
1469
|
yyvsp -= yylen;
|
1465
1470
|
yyssp -= yylen;
|
@@ -1727,613 +1732,682 @@ yyreturn:
|
|
1727
1732
|
}
|
1728
1733
|
|
1729
1734
|
|
1730
|
-
#line
|
1735
|
+
#line 155 "src/q_parser.y"
|
1731
1736
|
|
1732
1737
|
|
1733
1738
|
const char *special_char = "&:()[]{}!+\"~^-|<>=*?";
|
1734
1739
|
const char *not_word = " \t&:()[]{}!+\"~^-|<>=";
|
1735
1740
|
|
1736
|
-
int get_word(YYSTYPE *lvalp, QParser *qp)
|
1741
|
+
static int get_word(YYSTYPE *lvalp, QParser *qp)
|
1737
1742
|
{
|
1738
|
-
|
1739
|
-
|
1740
|
-
|
1741
|
-
|
1742
|
-
|
1743
|
-
|
1744
|
-
|
1745
|
-
|
1746
|
-
|
1747
|
-
|
1748
|
-
|
1749
|
-
|
1750
|
-
|
1751
|
-
|
1752
|
-
|
1753
|
-
|
1754
|
-
|
1743
|
+
bool is_wild = false;
|
1744
|
+
int len;
|
1745
|
+
char c;
|
1746
|
+
char *buf = qp->buf[qp->buf_index];
|
1747
|
+
char *bufp = buf;
|
1748
|
+
qp->buf_index = (qp->buf_index + 1) % QP_CONC_WORDS;
|
1749
|
+
|
1750
|
+
qp->qstrp--; /* need to back up one character */
|
1751
|
+
|
1752
|
+
while (!strchr(not_word, (c=*qp->qstrp++))) {
|
1753
|
+
switch (c) {
|
1754
|
+
case '\\':
|
1755
|
+
if ((c=*qp->qstrp) == ' ' && c != '\t' && c != '\0') {
|
1756
|
+
*bufp++ = '\\';
|
1757
|
+
}
|
1758
|
+
else {
|
1759
|
+
*bufp++ = c;
|
1760
|
+
qp->qstrp++;
|
1761
|
+
}
|
1762
|
+
break;
|
1763
|
+
case '*': case '?':
|
1764
|
+
is_wild = true;
|
1765
|
+
/* fall through */
|
1766
|
+
default:
|
1767
|
+
*bufp++ = c;
|
1755
1768
|
}
|
1756
|
-
break;
|
1757
|
-
case '*': case '?':
|
1758
|
-
is_wild = true;
|
1759
|
-
default:
|
1760
|
-
*bufp++ = c;
|
1761
1769
|
}
|
1762
|
-
|
1763
|
-
|
1764
|
-
|
1765
|
-
|
1766
|
-
|
1767
|
-
|
1768
|
-
|
1769
|
-
|
1770
|
-
|
1771
|
-
|
1772
|
-
|
1773
|
-
|
1774
|
-
|
1775
|
-
|
1776
|
-
|
1777
|
-
|
1778
|
-
return WORD;
|
1770
|
+
qp->qstrp--;
|
1771
|
+
/* check for keywords. There are only four so we have a bit of a hack which
|
1772
|
+
* just checks for all of them. */
|
1773
|
+
*bufp = '\0';
|
1774
|
+
len = (int)(bufp - buf);
|
1775
|
+
if (len == 3) {
|
1776
|
+
if (buf[0] == 'A' && buf[1] == 'N' && buf[2] == 'D') return AND;
|
1777
|
+
if (buf[0] == 'N' && buf[1] == 'O' && buf[2] == 'T') return NOT;
|
1778
|
+
if (buf[0] == 'R' && buf[1] == 'E' && buf[2] == 'Q') return REQ;
|
1779
|
+
}
|
1780
|
+
if (len == 2 && buf[0] == 'O' && buf[1] == 'R') return OR;
|
1781
|
+
|
1782
|
+
/* found a word so return it. */
|
1783
|
+
lvalp->str = buf;
|
1784
|
+
if (is_wild) return WILD_STR;
|
1785
|
+
return WORD;
|
1779
1786
|
}
|
1780
1787
|
|
1781
|
-
int yylex(YYSTYPE *lvalp, QParser *qp)
|
1788
|
+
static int yylex(YYSTYPE *lvalp, QParser *qp)
|
1782
1789
|
{
|
1783
|
-
|
1790
|
+
char c, nc;
|
1784
1791
|
|
1785
|
-
|
1786
|
-
|
1787
|
-
if (c == '\0')
|
1788
|
-
return 0;
|
1792
|
+
while ((c=*qp->qstrp++) == ' ' || c == '\t') {
|
1793
|
+
}
|
1789
1794
|
|
1790
|
-
|
1791
|
-
|
1792
|
-
|
1793
|
-
|
1794
|
-
|
1795
|
-
|
1796
|
-
|
1797
|
-
|
1798
|
-
|
1799
|
-
|
1800
|
-
|
1801
|
-
|
1795
|
+
if (c == '\0') return 0;
|
1796
|
+
|
1797
|
+
if (strchr(special_char, c)) { /* comment */
|
1798
|
+
nc = *qp->qstrp;
|
1799
|
+
switch (c) {
|
1800
|
+
case '-': case '!': return NOT;
|
1801
|
+
case '+': return REQ;
|
1802
|
+
case '*':
|
1803
|
+
if (nc == ':') return c;
|
1804
|
+
break;
|
1805
|
+
case '&':
|
1806
|
+
if (nc == '&') {
|
1807
|
+
qp->qstrp++;
|
1808
|
+
return AND;
|
1809
|
+
}
|
1810
|
+
break; /* Don't return single & character. Use in word. */
|
1811
|
+
case '|':
|
1812
|
+
if (nc == '|') {
|
1813
|
+
qp->qstrp++;
|
1814
|
+
return OR;
|
1815
|
+
}
|
1816
|
+
default:
|
1817
|
+
return c;
|
1802
1818
|
}
|
1803
|
-
break; /* Don't return single & character. Use in word. */
|
1804
|
-
case '|':
|
1805
|
-
if (nc == '|') {
|
1806
|
-
qp->qstrp++;
|
1807
|
-
return OR;
|
1808
|
-
}
|
1809
|
-
default:
|
1810
|
-
return c;
|
1811
1819
|
}
|
1812
|
-
}
|
1813
1820
|
|
1814
|
-
|
1821
|
+
return get_word(lvalp, qp);
|
1815
1822
|
}
|
1816
1823
|
|
1817
|
-
int yyerror(QParser *qp, char const *msg)
|
1824
|
+
static int yyerror(QParser *qp, char const *msg)
|
1818
1825
|
{
|
1819
|
-
|
1820
|
-
|
1821
|
-
|
1822
|
-
|
1823
|
-
|
1826
|
+
if (!qp->handle_parse_errors) {
|
1827
|
+
char buf[1024];
|
1828
|
+
buf[1023] = '\0';
|
1829
|
+
strncpy(buf, qp->qstr, 1023);
|
1830
|
+
if (qp->clean_str) {
|
1831
|
+
free(qp->qstr);
|
1832
|
+
}
|
1833
|
+
mutex_unlock(&qp->mutex);
|
1834
|
+
RAISE(PARSE_ERROR, "couldn't parse query ``%s''. Error message "
|
1835
|
+
" was %se", buf, (char *)msg);
|
1836
|
+
}
|
1837
|
+
return 0;
|
1824
1838
|
}
|
1825
1839
|
|
1840
|
+
#define BQ(query) ((BooleanQuery *)(query))
|
1826
1841
|
|
1827
|
-
|
1842
|
+
static TokenStream *get_cached_ts(QParser *qp, char *field, char *text)
|
1828
1843
|
{
|
1829
|
-
|
1830
|
-
|
1831
|
-
|
1832
|
-
|
1833
|
-
|
1834
|
-
|
1835
|
-
|
1836
|
-
|
1837
|
-
|
1838
|
-
q = bc->query;
|
1839
|
-
free(bc);
|
1840
|
-
ary_destroy(bclauses);
|
1841
|
-
} else {
|
1842
|
-
q = bq_create(false);
|
1843
|
-
/* copy clauses into query */
|
1844
|
-
bq = (BooleanQuery *)q->data;
|
1845
|
-
bq->clause_cnt = bclauses->size;
|
1846
|
-
bq->clause_capa = bclauses->allocated;
|
1847
|
-
free(bq->clauses);
|
1848
|
-
bq->clauses = (BooleanClause **)bclauses->elems;
|
1849
|
-
free(bclauses);
|
1850
|
-
}
|
1851
|
-
return q;
|
1844
|
+
TokenStream *ts = h_get(qp->ts_cache, field);
|
1845
|
+
if (!ts) {
|
1846
|
+
ts = a_get_ts(qp->analyzer, field, text);
|
1847
|
+
h_set(qp->ts_cache, estrdup(field), ts);
|
1848
|
+
}
|
1849
|
+
else {
|
1850
|
+
ts->reset(ts, text);
|
1851
|
+
}
|
1852
|
+
return ts;
|
1852
1853
|
}
|
1853
1854
|
|
1854
|
-
|
1855
|
-
Array *first_cls(BooleanClause *cls)
|
1855
|
+
static char *get_cached_field(HashTable *field_cache, const char *field)
|
1856
1856
|
{
|
1857
|
-
|
1858
|
-
|
1859
|
-
|
1857
|
+
char *cached_field = h_get(field_cache, field);
|
1858
|
+
if (!cached_field) {
|
1859
|
+
cached_field = estrdup(field);
|
1860
|
+
h_set(field_cache, cached_field, cached_field);
|
1861
|
+
}
|
1862
|
+
return cached_field;
|
1860
1863
|
}
|
1861
1864
|
|
1862
|
-
|
1865
|
+
static Query *get_bool_q(BCArray *bca)
|
1863
1866
|
{
|
1864
|
-
|
1865
|
-
|
1866
|
-
if (clauses->size == 1) {
|
1867
|
-
last_cl = clauses->elems[0];
|
1868
|
-
if (!last_cl->is_prohibited) bc_set_occur(last_cl, BC_MUST);
|
1869
|
-
}
|
1867
|
+
Query *q;
|
1868
|
+
const int clause_count = bca->size;
|
1870
1869
|
|
1871
|
-
if (
|
1872
|
-
|
1873
|
-
|
1874
|
-
|
1870
|
+
if (clause_count == 0) {
|
1871
|
+
q = NULL;
|
1872
|
+
free(bca->clauses);
|
1873
|
+
}
|
1874
|
+
else if (clause_count == 1) {
|
1875
|
+
BooleanClause *bc = bca->clauses[0];
|
1876
|
+
q = bc->query;
|
1877
|
+
free(bc);
|
1878
|
+
free(bca->clauses);
|
1879
|
+
}
|
1880
|
+
else {
|
1881
|
+
q = bq_new(false);
|
1882
|
+
/* copy clauses into query */
|
1883
|
+
|
1884
|
+
BQ(q)->clause_cnt = clause_count;
|
1885
|
+
BQ(q)->clause_capa = bca->capa;
|
1886
|
+
free(BQ(q)->clauses);
|
1887
|
+
BQ(q)->clauses = bca->clauses;
|
1888
|
+
}
|
1889
|
+
free(bca);
|
1890
|
+
return q;
|
1875
1891
|
}
|
1876
1892
|
|
1877
|
-
|
1893
|
+
static void bca_add_clause(BCArray *bca, BooleanClause *clause)
|
1878
1894
|
{
|
1879
|
-
|
1880
|
-
|
1895
|
+
if (bca->size >= bca->capa) {
|
1896
|
+
bca->capa <<= 1;
|
1897
|
+
REALLOC_N(bca->clauses, BooleanClause *, bca->capa);
|
1898
|
+
}
|
1899
|
+
bca->clauses[bca->size] = clause;
|
1900
|
+
bca->size++;
|
1881
1901
|
}
|
1882
1902
|
|
1883
|
-
|
1903
|
+
static BCArray *first_cls(BooleanClause *clause)
|
1884
1904
|
{
|
1885
|
-
|
1886
|
-
|
1887
|
-
|
1888
|
-
|
1889
|
-
|
1890
|
-
|
1905
|
+
BCArray *bca = ALLOC_AND_ZERO(BCArray);
|
1906
|
+
bca->capa = BCA_INIT_CAPA;
|
1907
|
+
bca->clauses = ALLOC_N(BooleanClause *, BCA_INIT_CAPA);
|
1908
|
+
if (clause) {
|
1909
|
+
bca_add_clause(bca, clause);
|
1910
|
+
}
|
1911
|
+
return bca;
|
1891
1912
|
}
|
1892
1913
|
|
1893
|
-
|
1914
|
+
static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause)
|
1894
1915
|
{
|
1895
|
-
|
1896
|
-
|
1916
|
+
if (clause) {
|
1917
|
+
if (bca->size == 1) {
|
1918
|
+
if (!bca->clauses[0]->is_prohibited) {
|
1919
|
+
bc_set_occur(bca->clauses[0], BC_MUST);
|
1920
|
+
}
|
1921
|
+
}
|
1922
|
+
if (!clause->is_prohibited) {
|
1923
|
+
bc_set_occur(clause, BC_MUST);
|
1924
|
+
}
|
1925
|
+
bca_add_clause(bca, clause);
|
1926
|
+
}
|
1927
|
+
return bca;
|
1897
1928
|
}
|
1898
1929
|
|
1899
|
-
|
1930
|
+
static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause)
|
1900
1931
|
{
|
1901
|
-
|
1902
|
-
|
1903
|
-
TokenStream *stream = a_get_ts(qp->analyzer, field, word);
|
1904
|
-
|
1905
|
-
if ((token = ts_next(stream)) == NULL) {
|
1906
|
-
q = NULL;
|
1907
|
-
} else {
|
1908
|
-
Term *term = term_create(field, token->text);
|
1909
|
-
if ((token = ts_next(stream)) == NULL) {
|
1910
|
-
q = tq_create(term);
|
1911
|
-
} else {
|
1912
|
-
q = phq_create();
|
1913
|
-
phq_add_term(q, term, 0);
|
1914
|
-
do {
|
1915
|
-
phq_add_term(q, term_create(field, token->text), token->pos_inc);
|
1916
|
-
} while ((token = ts_next(stream)) != NULL);
|
1932
|
+
if (clause) {
|
1933
|
+
bca_add_clause(bca, clause);
|
1917
1934
|
}
|
1918
|
-
|
1919
|
-
return q;
|
1935
|
+
return bca;
|
1920
1936
|
}
|
1921
1937
|
|
1922
|
-
|
1938
|
+
static BCArray *add_default_cls(QParser *qp, BCArray *bca,
|
1939
|
+
BooleanClause *clause)
|
1923
1940
|
{
|
1924
|
-
|
1925
|
-
|
1926
|
-
|
1927
|
-
|
1928
|
-
|
1929
|
-
q = NULL;
|
1930
|
-
} else {
|
1931
|
-
/* it only makes sense to find one term in a fuzzy query */
|
1932
|
-
Term *term = term_create(field, token->text);
|
1933
|
-
if (slop_str) {
|
1934
|
-
float slop;
|
1935
|
-
sscanf(slop_str, "%f", &slop);
|
1936
|
-
q = fuzq_create_mp(term, slop, DEF_PRE_LEN);
|
1937
|
-
} else {
|
1938
|
-
q = fuzq_create(term);
|
1941
|
+
if (qp->or_default) {
|
1942
|
+
add_or_cls(bca, clause);
|
1943
|
+
}
|
1944
|
+
else {
|
1945
|
+
add_and_cls(bca, clause);
|
1939
1946
|
}
|
1940
|
-
|
1941
|
-
return q;
|
1947
|
+
return bca;
|
1942
1948
|
}
|
1943
1949
|
|
1944
|
-
|
1950
|
+
static BooleanClause *get_bool_cls(Query *q, unsigned int occur)
|
1945
1951
|
{
|
1946
|
-
|
1947
|
-
|
1948
|
-
|
1949
|
-
|
1950
|
-
|
1951
|
-
if (qp->wild_lower) lower_str(pattern);
|
1952
|
-
|
1953
|
-
/* simplify the wildcard query to a prefix query if possible. Basically a
|
1954
|
-
* prefix query is any wildcard query that has a '*' as the last character
|
1955
|
-
* and no other wildcard characters before it. */
|
1956
|
-
if (pattern[len-1] == '*') {
|
1957
|
-
is_prefix = true;
|
1958
|
-
for (p = &pattern[len-2]; p >= pattern; p--) {
|
1959
|
-
if (*p == '*' || *p == '?') {
|
1960
|
-
is_prefix = false;
|
1961
|
-
break;
|
1962
|
-
}
|
1952
|
+
if (q) {
|
1953
|
+
return bc_new(q, occur);
|
1954
|
+
}
|
1955
|
+
else {
|
1956
|
+
return NULL;
|
1963
1957
|
}
|
1964
|
-
}
|
1965
|
-
|
1966
|
-
if (is_prefix) {
|
1967
|
-
/* chop off the '*' temporarily to create the query */
|
1968
|
-
pattern[len-1] = 0;
|
1969
|
-
q = prefixq_create(term_create(field, pattern));;
|
1970
|
-
pattern[len-1] = '*';
|
1971
|
-
} else {
|
1972
|
-
q = wcq_create(term_create(field, pattern));;
|
1973
|
-
}
|
1974
|
-
return q;
|
1975
1958
|
}
|
1976
1959
|
|
1977
|
-
|
1960
|
+
static Query *get_term_q(QParser *qp, char *field, char *word)
|
1978
1961
|
{
|
1979
|
-
|
1980
|
-
|
1981
|
-
|
1982
|
-
|
1983
|
-
|
1984
|
-
|
1985
|
-
|
1986
|
-
|
1987
|
-
|
1962
|
+
Query *q;
|
1963
|
+
Token *token;
|
1964
|
+
TokenStream *stream = get_cached_ts(qp, field, word);
|
1965
|
+
|
1966
|
+
if ((token = ts_next(stream)) == NULL) {
|
1967
|
+
q = NULL;
|
1968
|
+
}
|
1969
|
+
else {
|
1970
|
+
q = tq_new(field, token->text);
|
1971
|
+
if ((token = ts_next(stream)) != NULL) {
|
1972
|
+
/* Less likely case, destroy the term query and create a
|
1973
|
+
* phrase query instead */
|
1974
|
+
Query *phq = phq_new(field);
|
1975
|
+
phq_add_term(phq, ((TermQuery *)q)->term, 0);
|
1976
|
+
q->destroy_i(q);
|
1977
|
+
q = phq;
|
1978
|
+
do {
|
1979
|
+
phq_add_term(q, token->text, token->pos_inc);
|
1980
|
+
} while ((token = ts_next(stream)) != NULL);
|
1981
|
+
}
|
1982
|
+
}
|
1983
|
+
return q;
|
1988
1984
|
}
|
1989
1985
|
|
1990
|
-
|
1986
|
+
static Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
|
1991
1987
|
{
|
1992
|
-
|
1993
|
-
|
1994
|
-
|
1995
|
-
|
1988
|
+
Query *q;
|
1989
|
+
Token *token;
|
1990
|
+
TokenStream *stream = get_cached_ts(qp, field, word);
|
1991
|
+
|
1992
|
+
if ((token = ts_next(stream)) == NULL) {
|
1993
|
+
q = NULL;
|
1994
|
+
}
|
1995
|
+
else {
|
1996
|
+
/* it only makes sense to find one term in a fuzzy query */
|
1997
|
+
float slop = DEF_MIN_SIM;
|
1998
|
+
if (slop_str) {
|
1999
|
+
sscanf(slop_str, "%f", &slop);
|
2000
|
+
}
|
2001
|
+
q = fuzq_new_conf(field, token->text, slop, DEF_PRE_LEN,
|
2002
|
+
qp->max_clauses);
|
2003
|
+
}
|
2004
|
+
return q;
|
1996
2005
|
}
|
1997
2006
|
|
1998
|
-
|
2007
|
+
static char *lower_str(char *str)
|
1999
2008
|
{
|
2000
|
-
|
2001
|
-
|
2002
|
-
|
2003
|
-
|
2009
|
+
const int max_len = (int)strlen(str) + 1;
|
2010
|
+
int cnt;
|
2011
|
+
wchar_t *wstr = ALLOC_N(wchar_t, max_len);
|
2012
|
+
if ((cnt = mbstowcs(wstr, str, max_len)) > 0) {
|
2013
|
+
wchar_t *w = wstr;
|
2014
|
+
while (*w) {
|
2015
|
+
*w = towlower(*w);
|
2016
|
+
w++;
|
2017
|
+
}
|
2018
|
+
wcstombs(str, wstr, max_len);
|
2004
2019
|
}
|
2005
|
-
|
2006
|
-
|
2007
|
-
|
2008
|
-
|
2009
|
-
|
2010
|
-
|
2020
|
+
else {
|
2021
|
+
char *s = str;
|
2022
|
+
while (*s) {
|
2023
|
+
*s = tolower(*s);
|
2024
|
+
s++;
|
2025
|
+
}
|
2026
|
+
}
|
2027
|
+
free(wstr);
|
2028
|
+
str[max_len] = '\0';
|
2029
|
+
return str;
|
2011
2030
|
}
|
2012
2031
|
|
2013
|
-
|
2014
|
-
Phrase *ph_create()
|
2032
|
+
static Query *get_wild_q(QParser *qp, char *field, char *pattern)
|
2015
2033
|
{
|
2016
|
-
|
2017
|
-
|
2018
|
-
|
2019
|
-
|
2020
|
-
|
2021
|
-
|
2022
|
-
|
2034
|
+
Query *q;
|
2035
|
+
bool is_prefix = false;
|
2036
|
+
char *p;
|
2037
|
+
int len = (int)strlen(pattern);
|
2038
|
+
|
2039
|
+
if (qp->wild_lower) {
|
2040
|
+
lower_str(pattern);
|
2041
|
+
}
|
2042
|
+
|
2043
|
+
/* simplify the wildcard query to a prefix query if possible. Basically a
|
2044
|
+
* prefix query is any wildcard query that has a '*' as the last character
|
2045
|
+
* and no other wildcard characters before it. */
|
2046
|
+
if (pattern[len - 1] == '*') {
|
2047
|
+
is_prefix = true;
|
2048
|
+
for (p = &pattern[len - 2]; p >= pattern; p--) {
|
2049
|
+
if (*p == '*' || *p == '?') {
|
2050
|
+
is_prefix = false;
|
2051
|
+
break;
|
2052
|
+
}
|
2053
|
+
}
|
2054
|
+
}
|
2055
|
+
if (is_prefix) {
|
2056
|
+
/* chop off the '*' temporarily to create the query */
|
2057
|
+
pattern[len - 1] = 0;
|
2058
|
+
q = prefixq_new(field, pattern);
|
2059
|
+
pattern[len - 1] = '*';
|
2060
|
+
}
|
2061
|
+
else {
|
2062
|
+
q = wcq_new(field, pattern);
|
2063
|
+
}
|
2064
|
+
MTQMaxTerms(q) = qp->max_clauses;
|
2065
|
+
return q;
|
2023
2066
|
}
|
2024
2067
|
|
2025
|
-
|
2068
|
+
static HashSet *add_field(QParser *qp, char *field)
|
2026
2069
|
{
|
2027
|
-
|
2028
|
-
|
2029
|
-
|
2030
|
-
|
2031
|
-
self->w_cnt[0] = self->w_capa[0] = 1;
|
2032
|
-
self->cnt = 1;
|
2033
|
-
}
|
2034
|
-
return self;
|
2070
|
+
if (qp->allow_any_fields || hs_exists(qp->all_fields, field)) {
|
2071
|
+
hs_add(qp->fields, get_cached_field(qp->field_cache, field));
|
2072
|
+
}
|
2073
|
+
return qp->fields;
|
2035
2074
|
}
|
2036
2075
|
|
2037
|
-
|
2076
|
+
static HashSet *first_field(QParser *qp, char *field)
|
2038
2077
|
{
|
2039
|
-
|
2040
|
-
|
2041
|
-
|
2042
|
-
|
2043
|
-
REALLOC_N(self->w_cnt, int, self->capa);
|
2044
|
-
REALLOC_N(self->w_capa, int, self->capa);
|
2045
|
-
}
|
2046
|
-
i = self->cnt;
|
2047
|
-
self->cnt++;
|
2048
|
-
self->words[i] = ALLOC(char *);
|
2049
|
-
self->words[i][0] = word ? estrdup(word) : NULL;
|
2050
|
-
self->w_cnt[i] = self->w_capa[i] = 1;
|
2051
|
-
return self;
|
2078
|
+
qp->fields = qp->fields_buf;
|
2079
|
+
qp->fields->size = 0;
|
2080
|
+
h_clear(qp->fields->ht);
|
2081
|
+
return add_field(qp, field);
|
2052
2082
|
}
|
2053
2083
|
|
2054
|
-
|
2084
|
+
static void ph_destroy(Phrase *self)
|
2055
2085
|
{
|
2056
|
-
|
2086
|
+
int i;
|
2087
|
+
for (i = 0; i < self->size; i++) {
|
2088
|
+
ary_destroy(self->positions[i].terms, &free);
|
2089
|
+
}
|
2090
|
+
free(self->positions);
|
2091
|
+
free(self);
|
2092
|
+
}
|
2057
2093
|
|
2058
|
-
if (!word) return self; /* no point in adding NULL in multi */
|
2059
2094
|
|
2060
|
-
|
2061
|
-
|
2062
|
-
|
2063
|
-
|
2064
|
-
self->
|
2065
|
-
self->w_cnt[i]++;
|
2095
|
+
static Phrase *ph_new()
|
2096
|
+
{
|
2097
|
+
Phrase *self = ALLOC_AND_ZERO(Phrase);
|
2098
|
+
self->capa = PHRASE_INIT_CAPA;
|
2099
|
+
self->positions = ALLOC_AND_ZERO_N(PhrasePosition, PHRASE_INIT_CAPA);
|
2066
2100
|
return self;
|
2067
2101
|
}
|
2068
2102
|
|
2069
|
-
|
2103
|
+
static Phrase *ph_first_word(char *word)
|
2070
2104
|
{
|
2071
|
-
|
2072
|
-
|
2073
|
-
|
2074
|
-
|
2075
|
-
|
2076
|
-
|
2077
|
-
Query *pq = phq_create();
|
2078
|
-
((PhraseQuery *)pq->data)->slop = slop;
|
2079
|
-
|
2080
|
-
for (i = 0; i < phrase->cnt; i++) {
|
2081
|
-
word = phrase->words[i][0];
|
2082
|
-
if (!word) {
|
2083
|
-
pos_inc++;
|
2084
|
-
} else {
|
2085
|
-
stream = a_get_ts(qp->analyzer, field, word);
|
2086
|
-
while ((token = ts_next(stream))) {
|
2087
|
-
phq_add_term(pq, term_create(field, token->text),
|
2088
|
-
token->pos_inc + pos_inc);
|
2089
|
-
pos_inc = 0;
|
2090
|
-
}
|
2105
|
+
Phrase *self = ph_new();
|
2106
|
+
if (word) { /* no point in adding NULL in start */
|
2107
|
+
self->positions[0].terms = ary_new_type_capa(char *, 1);
|
2108
|
+
ary_push(self->positions[0].terms, estrdup(word));
|
2109
|
+
self->size = 1;
|
2091
2110
|
}
|
2092
|
-
|
2093
|
-
return pq;
|
2111
|
+
return self;
|
2094
2112
|
}
|
2095
2113
|
|
2096
|
-
|
2114
|
+
static Phrase *ph_add_word(Phrase *self, char *word)
|
2097
2115
|
{
|
2098
|
-
|
2099
|
-
|
2100
|
-
|
2101
|
-
|
2102
|
-
|
2103
|
-
|
2104
|
-
|
2105
|
-
|
2106
|
-
Query *mpq = mphq_create();
|
2107
|
-
((MultiPhraseQuery *)mpq->data)->slop = slop;
|
2108
|
-
|
2109
|
-
for (i = 0; i < phrase->cnt; i++) {
|
2110
|
-
word = phrase->words[i][0];
|
2111
|
-
if (!word) {
|
2112
|
-
pos_inc++;
|
2113
|
-
} else {
|
2114
|
-
t_cnt = phrase->w_cnt[i];
|
2115
|
-
if (t_cnt > 1) {
|
2116
|
-
terms = ALLOC_N(Term *, t_cnt);
|
2117
|
-
for (j = 0; j < t_cnt; j++) {
|
2118
|
-
word = phrase->words[i][j];
|
2119
|
-
stream = a_get_ts(qp->analyzer, field, word);
|
2120
|
-
if ((token = ts_next(stream))) {
|
2121
|
-
terms[j] = term_create(field, token->text);
|
2122
|
-
} else {
|
2123
|
-
t_cnt--; j--;
|
2124
|
-
}
|
2125
|
-
}
|
2126
|
-
/* must advance at least one */
|
2127
|
-
mphq_add_terms(mpq, terms, t_cnt, pos_inc + 1);
|
2128
|
-
} else {
|
2129
|
-
stream = a_get_ts(qp->analyzer, field, word);
|
2130
|
-
while ((token = ts_next(stream))) {
|
2131
|
-
terms = ALLOC(Term *);
|
2132
|
-
terms[0] = term_create(field, token->text);
|
2133
|
-
mphq_add_terms(mpq, terms, 1, token->pos_inc + pos_inc);
|
2134
|
-
pos_inc = 0;
|
2116
|
+
if (word) {
|
2117
|
+
const int index = self->size;
|
2118
|
+
PhrasePosition *pp = self->positions;
|
2119
|
+
if (index >= self->capa) {
|
2120
|
+
self->capa <<= 1;
|
2121
|
+
REALLOC_N(pp, PhrasePosition, self->capa);
|
2122
|
+
self->positions = pp;
|
2135
2123
|
}
|
2136
|
-
|
2124
|
+
pp[index].pos = self->pos_inc;
|
2125
|
+
pp[index].terms = ary_new_type_capa(char *, 1);
|
2126
|
+
ary_push(pp[index].terms, estrdup(word));
|
2127
|
+
self->size++;
|
2128
|
+
self->pos_inc = 0;
|
2129
|
+
}
|
2130
|
+
else {
|
2131
|
+
self->pos_inc++;
|
2137
2132
|
}
|
2138
|
-
|
2139
|
-
return mpq;
|
2133
|
+
return self;
|
2140
2134
|
}
|
2141
2135
|
|
2142
|
-
|
2136
|
+
static Phrase *ph_add_multi_word(Phrase *self, char *word)
|
2143
2137
|
{
|
2144
|
-
|
2145
|
-
|
2146
|
-
|
2147
|
-
|
2148
|
-
|
2149
|
-
q = NULL;
|
2150
|
-
} else if (phrase->cnt == 1) {
|
2151
|
-
if (phrase->w_cnt[0] == 1) {
|
2152
|
-
FLDS(q, get_term_q(qp, field, phrase->words[0][0]));
|
2153
|
-
} else {
|
2154
|
-
Query *bq;
|
2155
|
-
q = bq_create(false);
|
2156
|
-
for (j = 0; j < phrase->w_cnt[0]; j++) {
|
2157
|
-
FLDS(bq, tq_create(term_create(field, phrase->words[0][j])));
|
2158
|
-
if (bq) bq_add_query(q, bq, BC_SHOULD);
|
2159
|
-
}
|
2138
|
+
const int index = self->size - 1;
|
2139
|
+
PhrasePosition *pp = self->positions;
|
2140
|
+
|
2141
|
+
if (word) {
|
2142
|
+
ary_push(pp[index].terms, estrdup(word));
|
2160
2143
|
}
|
2161
|
-
|
2162
|
-
|
2163
|
-
|
2164
|
-
|
2144
|
+
return self;
|
2145
|
+
}
|
2146
|
+
|
2147
|
+
static Query *get_phrase_query(QParser *qp, char *field,
|
2148
|
+
Phrase *phrase, char *slop_str)
|
2149
|
+
{
|
2150
|
+
const int pos_cnt = phrase->size;
|
2151
|
+
Query *q = NULL;
|
2152
|
+
|
2153
|
+
if (pos_cnt == 1) {
|
2154
|
+
char **words = phrase->positions[0].terms;
|
2155
|
+
const int word_count = ary_size(words);
|
2156
|
+
if (word_count == 1) {
|
2157
|
+
q = get_term_q(qp, field, words[0]);
|
2158
|
+
}
|
2159
|
+
else {
|
2160
|
+
int i;
|
2161
|
+
q = bq_new(false);
|
2162
|
+
for (i = 0; i < word_count; i++) {
|
2163
|
+
bq_add_query_nr(q, get_term_q(qp, field, words[i]), BC_SHOULD);
|
2164
|
+
}
|
2165
|
+
}
|
2165
2166
|
}
|
2166
|
-
|
2167
|
-
|
2168
|
-
|
2169
|
-
|
2170
|
-
|
2171
|
-
|
2167
|
+
else if (pos_cnt > 1) {
|
2168
|
+
Token *token;
|
2169
|
+
TokenStream *stream;
|
2170
|
+
int i, j;
|
2171
|
+
q = phq_new(field);
|
2172
|
+
if (slop_str) {
|
2173
|
+
int slop;
|
2174
|
+
sscanf(slop_str,"%d",&slop);
|
2175
|
+
((PhraseQuery *)q)->slop = slop;
|
2176
|
+
}
|
2177
|
+
|
2178
|
+
for (i = 0; i < pos_cnt; i++) {
|
2179
|
+
int pos_inc = phrase->positions[i].pos; /* Actually holds pos_inc */
|
2180
|
+
char **words = phrase->positions[i].terms;
|
2181
|
+
const int word_count = ary_size(words);
|
2182
|
+
|
2183
|
+
if (word_count == 1) {
|
2184
|
+
stream = get_cached_ts(qp, field, words[0]);
|
2185
|
+
while ((token = ts_next(stream))) {
|
2186
|
+
phq_add_term(q, token->text, token->pos_inc + pos_inc);
|
2187
|
+
pos_inc = 0;
|
2188
|
+
}
|
2189
|
+
}
|
2190
|
+
else {
|
2191
|
+
bool added_position = false;
|
2192
|
+
|
2193
|
+
for (j = 0; j < word_count; j++) {
|
2194
|
+
stream = get_cached_ts(qp, field, words[j]);
|
2195
|
+
if ((token = ts_next(stream))) {
|
2196
|
+
if (!added_position) {
|
2197
|
+
phq_add_term(q, token->text, token->pos_inc + pos_inc);
|
2198
|
+
added_position = true;
|
2199
|
+
}
|
2200
|
+
else {
|
2201
|
+
phq_append_multi_term(q, token->text);
|
2202
|
+
}
|
2203
|
+
}
|
2204
|
+
}
|
2205
|
+
}
|
2206
|
+
}
|
2172
2207
|
}
|
2173
|
-
|
2174
|
-
|
2175
|
-
|
2208
|
+
return q;
|
2209
|
+
}
|
2210
|
+
|
2211
|
+
static Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop_str)
|
2212
|
+
{
|
2213
|
+
Query *q;
|
2214
|
+
FLDS(q, get_phrase_query(qp, field, phrase, slop_str));
|
2215
|
+
ph_destroy(phrase);
|
2216
|
+
return q;
|
2176
2217
|
}
|
2177
2218
|
|
2178
|
-
Query *get_range_q(char *field, char *from, char *to,
|
2219
|
+
static Query *get_range_q(const char *field, const char *from, const char *to,
|
2220
|
+
bool inc_lower, bool inc_upper)
|
2179
2221
|
{
|
2180
|
-
|
2222
|
+
return rq_new(field, from, to, inc_lower, inc_upper);
|
2181
2223
|
}
|
2182
2224
|
|
2183
2225
|
void qp_destroy(QParser *self)
|
2184
2226
|
{
|
2185
|
-
|
2186
|
-
|
2187
|
-
|
2188
|
-
|
2189
|
-
|
2227
|
+
if (self->close_def_fields) {
|
2228
|
+
hs_destroy(self->def_fields);
|
2229
|
+
}
|
2230
|
+
hs_destroy(self->all_fields);
|
2231
|
+
hs_destroy(self->fields_buf);
|
2232
|
+
h_destroy(self->field_cache);
|
2233
|
+
h_destroy(self->ts_cache);
|
2234
|
+
a_deref(self->analyzer);
|
2235
|
+
free(self);
|
2190
2236
|
}
|
2191
2237
|
|
2192
|
-
QParser *
|
2238
|
+
QParser *qp_new(HashSet *all_fields, HashSet *def_fields, Analyzer *analyzer)
|
2193
2239
|
{
|
2194
|
-
|
2195
|
-
|
2196
|
-
|
2197
|
-
|
2198
|
-
|
2199
|
-
|
2200
|
-
|
2201
|
-
|
2202
|
-
|
2203
|
-
|
2204
|
-
|
2205
|
-
|
2206
|
-
|
2207
|
-
|
2208
|
-
|
2209
|
-
|
2240
|
+
int i;
|
2241
|
+
QParser *self = ALLOC(QParser);
|
2242
|
+
self->or_default = true;
|
2243
|
+
self->wild_lower = true;
|
2244
|
+
self->clean_str = false;
|
2245
|
+
self->max_clauses = QP_MAX_CLAUSES;
|
2246
|
+
self->handle_parse_errors = false;
|
2247
|
+
self->allow_any_fields = false;
|
2248
|
+
self->def_slop = 0;
|
2249
|
+
self->fields_buf = hs_new_str(NULL);
|
2250
|
+
self->all_fields = all_fields;
|
2251
|
+
if (def_fields) {
|
2252
|
+
self->def_fields = def_fields;
|
2253
|
+
for (i = 0; i < self->def_fields->size; i++) {
|
2254
|
+
if (!hs_exists(self->all_fields, self->def_fields->elems[i])) {
|
2255
|
+
hs_add(self->all_fields, estrdup(self->def_fields->elems[i]));
|
2256
|
+
}
|
2257
|
+
}
|
2258
|
+
self->close_def_fields = true;
|
2210
2259
|
}
|
2211
|
-
|
2212
|
-
|
2213
|
-
|
2214
|
-
|
2215
|
-
|
2216
|
-
|
2217
|
-
|
2218
|
-
|
2219
|
-
|
2220
|
-
|
2260
|
+
else {
|
2261
|
+
self->def_fields = all_fields;
|
2262
|
+
self->close_def_fields = false;
|
2263
|
+
}
|
2264
|
+
self->field_cache = h_new_str((free_ft)NULL, &free);
|
2265
|
+
for (i = 0; i < self->all_fields->size; i++) {
|
2266
|
+
char *field = estrdup(self->all_fields->elems[i]);
|
2267
|
+
h_set(self->field_cache, field, field);
|
2268
|
+
}
|
2269
|
+
self->fields = self->def_fields;
|
2270
|
+
/* make sure all_fields contains the default fields */
|
2271
|
+
self->analyzer = analyzer;
|
2272
|
+
self->ts_cache = h_new_str(&free, (free_ft)&ts_deref);
|
2273
|
+
self->buf_index = 0;
|
2274
|
+
mutex_init(&self->mutex, NULL);
|
2275
|
+
return self;
|
2221
2276
|
}
|
2222
2277
|
|
2223
2278
|
/* these chars have meaning within phrases */
|
2224
2279
|
static const char *PHRASE_CHARS = "<>|\"";
|
2225
2280
|
|
2226
|
-
void str_insert(char *str, int len, char chr)
|
2281
|
+
static void str_insert(char *str, int len, char chr)
|
2227
2282
|
{
|
2228
|
-
|
2229
|
-
|
2283
|
+
memmove(str+1, str, len*sizeof(char));
|
2284
|
+
*str = chr;
|
2230
2285
|
}
|
2231
2286
|
|
2232
2287
|
char *qp_clean_str(char *str)
|
2233
2288
|
{
|
2234
|
-
|
2235
|
-
|
2236
|
-
|
2237
|
-
|
2238
|
-
|
2239
|
-
|
2240
|
-
|
2241
|
-
|
2242
|
-
|
2243
|
-
|
2244
|
-
|
2245
|
-
|
2246
|
-
|
2247
|
-
|
2248
|
-
|
2249
|
-
|
2250
|
-
|
2251
|
-
|
2252
|
-
|
2253
|
-
continue;
|
2254
|
-
}
|
2255
|
-
switch (b) {
|
2256
|
-
case '\\':
|
2257
|
-
if (!quote_open) /* We do our own escaping below */
|
2258
|
-
*nsp++ = b;
|
2259
|
-
break;
|
2260
|
-
case '"':
|
2261
|
-
quote_open = !quote_open;
|
2262
|
-
*nsp++ = b;
|
2263
|
-
break;
|
2264
|
-
case '(':
|
2265
|
-
if (!quote_open) {
|
2266
|
-
br_cnt++;
|
2267
|
-
} else {
|
2268
|
-
*nsp++ = '\\';
|
2269
|
-
}
|
2270
|
-
*nsp++ = b;
|
2271
|
-
break;
|
2272
|
-
case ')':
|
2273
|
-
if (!quote_open) {
|
2274
|
-
if (br_cnt == 0) {
|
2275
|
-
str_insert(new_str, (int)(nsp - new_str), '(');
|
2276
|
-
nsp++;
|
2277
|
-
} else {
|
2278
|
-
br_cnt--;
|
2279
|
-
}
|
2280
|
-
} else {
|
2281
|
-
*nsp++ = '\\';
|
2282
|
-
}
|
2283
|
-
*nsp++ = b;
|
2284
|
-
break;
|
2285
|
-
case '>':
|
2286
|
-
if (quote_open) {
|
2287
|
-
if (pb == '<') {
|
2288
|
-
/* remove the escape character */
|
2289
|
-
nsp--;
|
2290
|
-
nsp[-1] = '<';
|
2291
|
-
} else {
|
2292
|
-
*nsp++ = '\\';
|
2293
|
-
}
|
2289
|
+
int b, pb = -1;
|
2290
|
+
int br_cnt = 0;
|
2291
|
+
bool quote_open = false;
|
2292
|
+
char *sp, *nsp;
|
2293
|
+
|
2294
|
+
/* leave a little extra */
|
2295
|
+
char *new_str = ALLOC_N(char, strlen(str)*2 + 1);
|
2296
|
+
|
2297
|
+
for (sp = str, nsp = new_str; *sp; sp++) {
|
2298
|
+
b = *sp;
|
2299
|
+
/* ignore escaped characters */
|
2300
|
+
if (pb == '\\') {
|
2301
|
+
if (quote_open && strrchr(PHRASE_CHARS, b)) {
|
2302
|
+
*nsp++ = '\\'; /* this was left off the first time through */
|
2303
|
+
}
|
2304
|
+
*nsp++ = b;
|
2305
|
+
/* \\ has escaped itself so has no power. Assign pb random char : */
|
2306
|
+
pb = ((b == '\\') ? ':' : b);
|
2307
|
+
continue;
|
2294
2308
|
}
|
2295
|
-
|
2296
|
-
|
2297
|
-
|
2298
|
-
|
2299
|
-
|
2300
|
-
|
2301
|
-
|
2309
|
+
switch (b) {
|
2310
|
+
case '\\':
|
2311
|
+
if (!quote_open) { /* We do our own escaping below */
|
2312
|
+
*nsp++ = b;
|
2313
|
+
}
|
2314
|
+
break;
|
2315
|
+
case '"':
|
2316
|
+
quote_open = !quote_open;
|
2317
|
+
*nsp++ = b;
|
2318
|
+
break;
|
2319
|
+
case '(':
|
2320
|
+
if (!quote_open) {
|
2321
|
+
br_cnt++;
|
2322
|
+
}
|
2323
|
+
else {
|
2324
|
+
*nsp++ = '\\';
|
2325
|
+
}
|
2326
|
+
*nsp++ = b;
|
2327
|
+
break;
|
2328
|
+
case ')':
|
2329
|
+
if (!quote_open) {
|
2330
|
+
if (br_cnt == 0) {
|
2331
|
+
str_insert(new_str, (int)(nsp - new_str), '(');
|
2332
|
+
nsp++;
|
2333
|
+
}
|
2334
|
+
else {
|
2335
|
+
br_cnt--;
|
2336
|
+
}
|
2337
|
+
}
|
2338
|
+
else {
|
2339
|
+
*nsp++ = '\\';
|
2340
|
+
}
|
2341
|
+
*nsp++ = b;
|
2342
|
+
break;
|
2343
|
+
case '>':
|
2344
|
+
if (quote_open) {
|
2345
|
+
if (pb == '<') {
|
2346
|
+
/* remove the escape character */
|
2347
|
+
nsp--;
|
2348
|
+
nsp[-1] = '<';
|
2349
|
+
}
|
2350
|
+
else {
|
2351
|
+
*nsp++ = '\\';
|
2352
|
+
}
|
2353
|
+
}
|
2354
|
+
*nsp++ = b;
|
2355
|
+
break;
|
2356
|
+
default:
|
2357
|
+
if (quote_open) {
|
2358
|
+
if (strrchr(special_char, b) && b != '|') {
|
2359
|
+
*nsp++ = '\\';
|
2360
|
+
}
|
2361
|
+
}
|
2362
|
+
*nsp++ = b;
|
2302
2363
|
}
|
2303
|
-
|
2364
|
+
pb = b;
|
2365
|
+
}
|
2366
|
+
if (quote_open) {
|
2367
|
+
*nsp++ = '"';
|
2304
2368
|
}
|
2305
|
-
|
2306
|
-
|
2307
|
-
|
2308
|
-
|
2309
|
-
|
2310
|
-
}
|
2311
|
-
*nsp = '\0';
|
2312
|
-
return new_str;
|
2369
|
+
for (;br_cnt > 0; br_cnt--) {
|
2370
|
+
*nsp++ = ')';
|
2371
|
+
}
|
2372
|
+
*nsp = '\0';
|
2373
|
+
return new_str;
|
2313
2374
|
}
|
2314
2375
|
|
2315
2376
|
Query *qp_get_bad_query(QParser *qp, char *str)
|
2316
2377
|
{
|
2317
|
-
|
2318
|
-
|
2319
|
-
|
2378
|
+
Query *q;
|
2379
|
+
FLDS(q, get_term_q(qp, field, str));
|
2380
|
+
return q;
|
2320
2381
|
}
|
2321
2382
|
|
2322
2383
|
Query *qp_parse(QParser *self, char *qstr)
|
2323
2384
|
{
|
2324
|
-
|
2325
|
-
self->
|
2326
|
-
|
2327
|
-
|
2328
|
-
|
2329
|
-
|
2330
|
-
|
2331
|
-
|
2332
|
-
|
2333
|
-
self->result =
|
2334
|
-
|
2335
|
-
|
2336
|
-
|
2385
|
+
Query *result;
|
2386
|
+
mutex_lock(&self->mutex);
|
2387
|
+
if (self->clean_str) {
|
2388
|
+
self->qstrp = self->qstr = qp_clean_str(qstr);
|
2389
|
+
}
|
2390
|
+
else {
|
2391
|
+
self->qstrp = self->qstr = qstr;
|
2392
|
+
}
|
2393
|
+
self->fields = self->def_fields;
|
2394
|
+
self->result = NULL;
|
2395
|
+
|
2396
|
+
yyparse(self);
|
2397
|
+
|
2398
|
+
result = self->result;
|
2399
|
+
if (!result && self->handle_parse_errors) {
|
2400
|
+
result = qp_get_bad_query(self, self->qstr);
|
2401
|
+
}
|
2402
|
+
if (!result) {
|
2403
|
+
result = bq_new(false);
|
2404
|
+
}
|
2405
|
+
if (self->clean_str) {
|
2406
|
+
free(self->qstr);
|
2407
|
+
}
|
2408
|
+
|
2409
|
+
mutex_unlock(&self->mutex);
|
2410
|
+
return result;
|
2337
2411
|
}
|
2338
2412
|
|
2339
2413
|
|