ferret 0.11.6 → 0.11.8.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README +10 -22
- data/RELEASE_CHANGES +137 -0
- data/RELEASE_NOTES +60 -0
- data/Rakefile +379 -274
- data/TODO +100 -8
- data/bin/ferret-browser +0 -0
- data/ext/BZLIB_blocksort.c +1094 -0
- data/ext/BZLIB_bzlib.c +1578 -0
- data/ext/BZLIB_compress.c +672 -0
- data/ext/BZLIB_crctable.c +104 -0
- data/ext/BZLIB_decompress.c +626 -0
- data/ext/BZLIB_huffman.c +205 -0
- data/ext/BZLIB_randtable.c +84 -0
- data/ext/{api.c → STEMMER_api.c} +7 -10
- data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
- data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
- data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
- data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
- data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
- data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
- data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
- data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
- data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
- data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
- data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
- data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
- data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
- data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
- data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
- data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
- data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
- data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
- data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
- data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
- data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
- data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
- data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
- data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
- data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
- data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
- data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
- data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
- data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
- data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
- data/ext/analysis.c +276 -121
- data/ext/analysis.h +190 -143
- data/ext/api.h +3 -4
- data/ext/array.c +5 -3
- data/ext/array.h +52 -43
- data/ext/bitvector.c +38 -482
- data/ext/bitvector.h +446 -124
- data/ext/bzlib.h +282 -0
- data/ext/bzlib_private.h +503 -0
- data/ext/compound_io.c +23 -22
- data/ext/config.h +21 -11
- data/ext/document.c +43 -40
- data/ext/document.h +31 -21
- data/ext/except.c +20 -38
- data/ext/except.h +89 -76
- data/ext/extconf.rb +3 -2
- data/ext/ferret.c +49 -35
- data/ext/ferret.h +14 -11
- data/ext/field_index.c +262 -0
- data/ext/field_index.h +52 -0
- data/ext/filter.c +11 -10
- data/ext/fs_store.c +65 -47
- data/ext/global.c +245 -165
- data/ext/global.h +252 -54
- data/ext/hash.c +200 -243
- data/ext/hash.h +205 -163
- data/ext/hashset.c +118 -96
- data/ext/hashset.h +110 -82
- data/ext/header.h +19 -19
- data/ext/helper.c +11 -10
- data/ext/helper.h +14 -6
- data/ext/index.c +745 -366
- data/ext/index.h +503 -529
- data/ext/internal.h +1020 -0
- data/ext/lang.c +10 -0
- data/ext/lang.h +35 -15
- data/ext/mempool.c +5 -4
- data/ext/mempool.h +30 -22
- data/ext/modules.h +35 -7
- data/ext/multimapper.c +43 -2
- data/ext/multimapper.h +32 -23
- data/ext/posh.c +0 -0
- data/ext/posh.h +4 -38
- data/ext/priorityqueue.c +10 -12
- data/ext/priorityqueue.h +33 -21
- data/ext/q_boolean.c +22 -9
- data/ext/q_const_score.c +3 -2
- data/ext/q_filtered_query.c +15 -12
- data/ext/q_fuzzy.c +147 -135
- data/ext/q_match_all.c +3 -2
- data/ext/q_multi_term.c +28 -32
- data/ext/q_parser.c +451 -173
- data/ext/q_phrase.c +158 -79
- data/ext/q_prefix.c +16 -18
- data/ext/q_range.c +363 -31
- data/ext/q_span.c +130 -141
- data/ext/q_term.c +21 -21
- data/ext/q_wildcard.c +19 -23
- data/ext/r_analysis.c +369 -242
- data/ext/r_index.c +421 -434
- data/ext/r_qparser.c +142 -92
- data/ext/r_search.c +790 -407
- data/ext/r_store.c +44 -44
- data/ext/r_utils.c +264 -96
- data/ext/ram_store.c +29 -23
- data/ext/scanner.c +895 -0
- data/ext/scanner.h +36 -0
- data/ext/scanner_mb.c +6701 -0
- data/ext/scanner_utf8.c +4415 -0
- data/ext/search.c +210 -87
- data/ext/search.h +556 -488
- data/ext/similarity.c +17 -16
- data/ext/similarity.h +51 -44
- data/ext/sort.c +157 -354
- data/ext/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/stem_UTF_8_hungarian.h +16 -0
- data/ext/stem_UTF_8_romanian.h +16 -0
- data/ext/stem_UTF_8_turkish.h +16 -0
- data/ext/stopwords.c +287 -278
- data/ext/store.c +57 -51
- data/ext/store.h +308 -286
- data/ext/symbol.c +10 -0
- data/ext/symbol.h +23 -0
- data/ext/term_vectors.c +14 -293
- data/ext/threading.h +22 -22
- data/ext/win32.h +12 -4
- data/lib/ferret.rb +2 -1
- data/lib/ferret/browser.rb +1 -1
- data/lib/ferret/field_symbol.rb +94 -0
- data/lib/ferret/index.rb +221 -34
- data/lib/ferret/number_tools.rb +6 -6
- data/lib/ferret/version.rb +3 -0
- data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
- data/test/test_helper.rb +7 -2
- data/test/test_installed.rb +1 -0
- data/test/threading/thread_safety_index_test.rb +10 -1
- data/test/threading/thread_safety_read_write_test.rb +4 -7
- data/test/threading/thread_safety_test.rb +0 -0
- data/test/unit/analysis/tc_analyzer.rb +29 -27
- data/test/unit/analysis/tc_token_stream.rb +23 -16
- data/test/unit/index/tc_index.rb +116 -11
- data/test/unit/index/tc_index_reader.rb +27 -27
- data/test/unit/index/tc_index_writer.rb +10 -0
- data/test/unit/index/th_doc.rb +38 -21
- data/test/unit/search/tc_filter.rb +31 -10
- data/test/unit/search/tc_index_searcher.rb +6 -0
- data/test/unit/search/tm_searcher.rb +53 -1
- data/test/unit/store/tc_fs_store.rb +40 -2
- data/test/unit/store/tc_ram_store.rb +0 -0
- data/test/unit/store/tm_store.rb +0 -0
- data/test/unit/store/tm_store_lock.rb +7 -6
- data/test/unit/tc_field_symbol.rb +26 -0
- data/test/unit/ts_analysis.rb +0 -0
- data/test/unit/ts_index.rb +0 -0
- data/test/unit/ts_store.rb +0 -0
- data/test/unit/ts_utils.rb +0 -0
- data/test/unit/utils/tc_number_tools.rb +0 -0
- data/test/utils/content_generator.rb +226 -0
- metadata +262 -221
- data/ext/inc/lang.h +0 -48
- data/ext/inc/threading.h +0 -31
- data/ext/stem_ISO_8859_1_english.c +0 -1156
- data/ext/stem_ISO_8859_1_french.c +0 -1276
- data/ext/stem_ISO_8859_1_italian.c +0 -1091
- data/ext/stem_ISO_8859_1_norwegian.c +0 -296
- data/ext/stem_ISO_8859_1_spanish.c +0 -1119
- data/ext/stem_ISO_8859_1_swedish.c +0 -307
- data/ext/stem_UTF_8_danish.c +0 -344
- data/ext/stem_UTF_8_english.c +0 -1176
- data/ext/stem_UTF_8_french.c +0 -1296
- data/ext/stem_UTF_8_italian.c +0 -1113
- data/ext/stem_UTF_8_norwegian.c +0 -302
- data/ext/stem_UTF_8_portuguese.c +0 -1055
- data/ext/stem_UTF_8_russian.c +0 -709
- data/ext/stem_UTF_8_spanish.c +0 -1137
- data/ext/stem_UTF_8_swedish.c +0 -313
- data/lib/ferret_version.rb +0 -3
data/ext/q_parser.c
CHANGED
@@ -58,6 +58,14 @@
|
|
58
58
|
/* Using locations. */
|
59
59
|
#define YYLSP_NEEDED 0
|
60
60
|
|
61
|
+
/* Substitute the variable and function names. */
|
62
|
+
#define yyparse frt_parse
|
63
|
+
#define yylex frt_lex
|
64
|
+
#define yyerror frt_error
|
65
|
+
#define yylval frt_lval
|
66
|
+
#define yychar frt_char
|
67
|
+
#define yydebug frt_debug
|
68
|
+
#define yynerrs frt_nerrs
|
61
69
|
|
62
70
|
|
63
71
|
/* Tokens. */
|
@@ -90,14 +98,17 @@
|
|
90
98
|
|
91
99
|
|
92
100
|
/* Copy the first part of user declarations. */
|
93
|
-
#line
|
101
|
+
#line 84 "src/q_parser.y"
|
94
102
|
|
95
103
|
#include <string.h>
|
96
104
|
#include <ctype.h>
|
97
105
|
#include <wctype.h>
|
106
|
+
#include <assert.h>
|
98
107
|
#include "except.h"
|
99
108
|
#include "search.h"
|
100
109
|
#include "array.h"
|
110
|
+
#include "symbol.h"
|
111
|
+
#include "internal.h"
|
101
112
|
|
102
113
|
typedef struct Phrase {
|
103
114
|
int size;
|
@@ -138,7 +149,7 @@ int qp_default_fuzzy_pre_len = 0;
|
|
138
149
|
|
139
150
|
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
|
140
151
|
typedef union YYSTYPE
|
141
|
-
#line
|
152
|
+
#line 113 "src/q_parser.y"
|
142
153
|
{
|
143
154
|
Query *query;
|
144
155
|
BooleanClause *bcls;
|
@@ -148,7 +159,7 @@ typedef union YYSTYPE
|
|
148
159
|
char *str;
|
149
160
|
}
|
150
161
|
/* Line 187 of yacc.c. */
|
151
|
-
#line
|
162
|
+
#line 163 "src/q_parser.c"
|
152
163
|
YYSTYPE;
|
153
164
|
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
|
154
165
|
# define YYSTYPE_IS_DECLARED 1
|
@@ -158,7 +169,7 @@ typedef union YYSTYPE
|
|
158
169
|
|
159
170
|
|
160
171
|
/* Copy the second part of user declarations. */
|
161
|
-
#line
|
172
|
+
#line 121 "src/q_parser.y"
|
162
173
|
|
163
174
|
static int yylex(YYSTYPE *lvalp, QParser *qp);
|
164
175
|
static int yyerror(QParser *qp, char const *msg);
|
@@ -169,17 +180,19 @@ static Query *get_bool_q(BCArray *bca);
|
|
169
180
|
static BCArray *first_cls(BooleanClause *boolean_clause);
|
170
181
|
static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause);
|
171
182
|
static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause);
|
172
|
-
static BCArray *add_default_cls(QParser *qp, BCArray *bca,
|
183
|
+
static BCArray *add_default_cls(QParser *qp, BCArray *bca,
|
184
|
+
BooleanClause *clause);
|
173
185
|
static void bca_destroy(BCArray *bca);
|
174
186
|
|
175
|
-
static BooleanClause *get_bool_cls(Query *q,
|
187
|
+
static BooleanClause *get_bool_cls(Query *q, BCType occur);
|
176
188
|
|
177
|
-
static Query *get_term_q(QParser *qp,
|
178
|
-
static Query *get_fuzzy_q(QParser *qp,
|
179
|
-
|
189
|
+
static Query *get_term_q(QParser *qp, Symbol field, char *word);
|
190
|
+
static Query *get_fuzzy_q(QParser *qp, Symbol field, char *word,
|
191
|
+
char *slop);
|
192
|
+
static Query *get_wild_q(QParser *qp, Symbol field, char *pattern);
|
180
193
|
|
181
|
-
static HashSet *first_field(QParser *qp, char *field);
|
182
|
-
static HashSet *add_field(QParser *qp, char *field);
|
194
|
+
static HashSet *first_field(QParser *qp, const char *field);
|
195
|
+
static HashSet *add_field(QParser *qp, const char *field);
|
183
196
|
|
184
197
|
static Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop);
|
185
198
|
|
@@ -188,22 +201,32 @@ static Phrase *ph_add_word(Phrase *self, char *word);
|
|
188
201
|
static Phrase *ph_add_multi_word(Phrase *self, char *word);
|
189
202
|
static void ph_destroy(Phrase *self);
|
190
203
|
|
191
|
-
static Query *get_r_q(QParser *qp,
|
204
|
+
static Query *get_r_q(QParser *qp, Symbol field, char *from, char *to,
|
192
205
|
bool inc_lower, bool inc_upper);
|
193
206
|
|
207
|
+
static void qp_push_fields(QParser *self, HashSet *fields, bool destroy);
|
208
|
+
static void qp_pop_fields(QParser *self);
|
209
|
+
|
210
|
+
/**
|
211
|
+
* +FLDS+ calls +func+ for all fields on top of the field stack. +func+
|
212
|
+
* must return a query. If there is more than one field on top of FieldStack
|
213
|
+
* then +FLDS+ will combing all the queries returned by +func+ into a single
|
214
|
+
* BooleanQuery which it than assigns to +q+. If there is only one field, the
|
215
|
+
* return value of +func+ is assigned to +q+ directly.
|
216
|
+
*/
|
194
217
|
#define FLDS(q, func) do {\
|
195
218
|
TRY {\
|
196
|
-
|
219
|
+
Symbol field;\
|
197
220
|
if (qp->fields->size == 0) {\
|
198
221
|
q = NULL;\
|
199
222
|
} else if (qp->fields->size == 1) {\
|
200
|
-
field = (
|
223
|
+
field = (Symbol)qp->fields->first->elem;\
|
201
224
|
q = func;\
|
202
225
|
} else {\
|
203
|
-
|
226
|
+
Query *volatile sq; HashSetEntry *volatile hse;\
|
204
227
|
q = bq_new_max(false, qp->max_clauses);\
|
205
|
-
for (
|
206
|
-
field = (
|
228
|
+
for (hse = qp->fields->first; hse; hse = hse->next) {\
|
229
|
+
field = (Symbol)hse->elem;\
|
207
230
|
sq = func;\
|
208
231
|
TRY\
|
209
232
|
if (sq) bq_add_query_nr(q, sq, BC_SHOULD);\
|
@@ -234,7 +257,7 @@ static Query *get_r_q(QParser *qp, char *field, char *from, char *to,
|
|
234
257
|
|
235
258
|
|
236
259
|
/* Line 216 of yacc.c. */
|
237
|
-
#line
|
260
|
+
#line 261 "src/q_parser.c"
|
238
261
|
|
239
262
|
#ifdef short
|
240
263
|
# undef short
|
@@ -535,14 +558,14 @@ static const yytype_int8 yyrhs[] =
|
|
535
558
|
};
|
536
559
|
|
537
560
|
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
|
538
|
-
static const
|
561
|
+
static const yytype_uint16 yyrline[] =
|
539
562
|
{
|
540
|
-
0,
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
563
|
+
0, 226, 226, 227, 229, 230, 231, 232, 234, 235,
|
564
|
+
236, 238, 239, 241, 242, 243, 244, 245, 246, 247,
|
565
|
+
249, 250, 251, 253, 255, 255, 257, 257, 257, 260,
|
566
|
+
261, 263, 264, 265, 266, 268, 269, 270, 271, 272,
|
567
|
+
274, 275, 276, 277, 278, 279, 280, 281, 282, 283,
|
568
|
+
284, 285
|
546
569
|
};
|
547
570
|
#endif
|
548
571
|
|
@@ -1201,59 +1224,59 @@ yydestruct (yymsg, yytype, yyvaluep, qp)
|
|
1201
1224
|
switch (yytype)
|
1202
1225
|
{
|
1203
1226
|
case 27: /* "bool_q" */
|
1204
|
-
#line
|
1227
|
+
#line 221 "src/q_parser.y"
|
1205
1228
|
{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
|
1206
|
-
#line
|
1229
|
+
#line 1230 "src/q_parser.c"
|
1207
1230
|
break;
|
1208
1231
|
case 28: /* "bool_clss" */
|
1209
|
-
#line
|
1232
|
+
#line 223 "src/q_parser.y"
|
1210
1233
|
{ if ((yyvaluep->bclss) && qp->destruct) bca_destroy((yyvaluep->bclss)); };
|
1211
|
-
#line
|
1234
|
+
#line 1235 "src/q_parser.c"
|
1212
1235
|
break;
|
1213
1236
|
case 29: /* "bool_cls" */
|
1214
|
-
#line
|
1237
|
+
#line 222 "src/q_parser.y"
|
1215
1238
|
{ if ((yyvaluep->bcls) && qp->destruct) bc_deref((yyvaluep->bcls)); };
|
1216
|
-
#line
|
1239
|
+
#line 1240 "src/q_parser.c"
|
1217
1240
|
break;
|
1218
1241
|
case 30: /* "boosted_q" */
|
1219
|
-
#line
|
1242
|
+
#line 221 "src/q_parser.y"
|
1220
1243
|
{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
|
1221
|
-
#line
|
1244
|
+
#line 1245 "src/q_parser.c"
|
1222
1245
|
break;
|
1223
1246
|
case 31: /* "q" */
|
1224
|
-
#line
|
1247
|
+
#line 221 "src/q_parser.y"
|
1225
1248
|
{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
|
1226
|
-
#line
|
1249
|
+
#line 1250 "src/q_parser.c"
|
1227
1250
|
break;
|
1228
1251
|
case 32: /* "term_q" */
|
1229
|
-
#line
|
1252
|
+
#line 221 "src/q_parser.y"
|
1230
1253
|
{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
|
1231
|
-
#line
|
1254
|
+
#line 1255 "src/q_parser.c"
|
1232
1255
|
break;
|
1233
1256
|
case 33: /* "wild_q" */
|
1234
|
-
#line
|
1257
|
+
#line 221 "src/q_parser.y"
|
1235
1258
|
{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
|
1236
|
-
#line
|
1259
|
+
#line 1260 "src/q_parser.c"
|
1237
1260
|
break;
|
1238
1261
|
case 34: /* "field_q" */
|
1239
|
-
#line
|
1262
|
+
#line 221 "src/q_parser.y"
|
1240
1263
|
{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
|
1241
|
-
#line
|
1264
|
+
#line 1265 "src/q_parser.c"
|
1242
1265
|
break;
|
1243
1266
|
case 39: /* "phrase_q" */
|
1244
|
-
#line
|
1267
|
+
#line 221 "src/q_parser.y"
|
1245
1268
|
{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
|
1246
|
-
#line
|
1269
|
+
#line 1270 "src/q_parser.c"
|
1247
1270
|
break;
|
1248
1271
|
case 40: /* "ph_words" */
|
1249
|
-
#line
|
1272
|
+
#line 224 "src/q_parser.y"
|
1250
1273
|
{ if ((yyvaluep->phrase) && qp->destruct) ph_destroy((yyvaluep->phrase)); };
|
1251
|
-
#line
|
1274
|
+
#line 1275 "src/q_parser.c"
|
1252
1275
|
break;
|
1253
1276
|
case 41: /* "range_q" */
|
1254
|
-
#line
|
1277
|
+
#line 221 "src/q_parser.y"
|
1255
1278
|
{ if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); };
|
1256
|
-
#line
|
1279
|
+
#line 1280 "src/q_parser.c"
|
1257
1280
|
break;
|
1258
1281
|
|
1259
1282
|
default:
|
@@ -1562,228 +1585,228 @@ yyreduce:
|
|
1562
1585
|
switch (yyn)
|
1563
1586
|
{
|
1564
1587
|
case 2:
|
1565
|
-
#line
|
1588
|
+
#line 226 "src/q_parser.y"
|
1566
1589
|
{ qp->result = (yyval.query) = NULL; }
|
1567
1590
|
break;
|
1568
1591
|
|
1569
1592
|
case 3:
|
1570
|
-
#line
|
1593
|
+
#line 227 "src/q_parser.y"
|
1571
1594
|
{ T qp->result = (yyval.query) = get_bool_q((yyvsp[(1) - (1)].bclss)); E }
|
1572
1595
|
break;
|
1573
1596
|
|
1574
1597
|
case 4:
|
1575
|
-
#line
|
1598
|
+
#line 229 "src/q_parser.y"
|
1576
1599
|
{ T (yyval.bclss) = first_cls((yyvsp[(1) - (1)].bcls)); E }
|
1577
1600
|
break;
|
1578
1601
|
|
1579
1602
|
case 5:
|
1580
|
-
#line
|
1603
|
+
#line 230 "src/q_parser.y"
|
1581
1604
|
{ T (yyval.bclss) = add_and_cls((yyvsp[(1) - (3)].bclss), (yyvsp[(3) - (3)].bcls)); E }
|
1582
1605
|
break;
|
1583
1606
|
|
1584
1607
|
case 6:
|
1585
|
-
#line
|
1608
|
+
#line 231 "src/q_parser.y"
|
1586
1609
|
{ T (yyval.bclss) = add_or_cls((yyvsp[(1) - (3)].bclss), (yyvsp[(3) - (3)].bcls)); E }
|
1587
1610
|
break;
|
1588
1611
|
|
1589
1612
|
case 7:
|
1590
|
-
#line
|
1613
|
+
#line 232 "src/q_parser.y"
|
1591
1614
|
{ T (yyval.bclss) = add_default_cls(qp, (yyvsp[(1) - (2)].bclss), (yyvsp[(2) - (2)].bcls)); E }
|
1592
1615
|
break;
|
1593
1616
|
|
1594
1617
|
case 8:
|
1595
|
-
#line
|
1618
|
+
#line 234 "src/q_parser.y"
|
1596
1619
|
{ T (yyval.bcls) = get_bool_cls((yyvsp[(2) - (2)].query), BC_MUST); E }
|
1597
1620
|
break;
|
1598
1621
|
|
1599
1622
|
case 9:
|
1600
|
-
#line
|
1623
|
+
#line 235 "src/q_parser.y"
|
1601
1624
|
{ T (yyval.bcls) = get_bool_cls((yyvsp[(2) - (2)].query), BC_MUST_NOT); E }
|
1602
1625
|
break;
|
1603
1626
|
|
1604
1627
|
case 10:
|
1605
|
-
#line
|
1628
|
+
#line 236 "src/q_parser.y"
|
1606
1629
|
{ T (yyval.bcls) = get_bool_cls((yyvsp[(1) - (1)].query), BC_SHOULD); E }
|
1607
1630
|
break;
|
1608
1631
|
|
1609
1632
|
case 12:
|
1610
|
-
#line
|
1633
|
+
#line 239 "src/q_parser.y"
|
1611
1634
|
{ T if ((yyvsp[(1) - (3)].query)) sscanf((yyvsp[(3) - (3)].str),"%f",&((yyvsp[(1) - (3)].query)->boost)); (yyval.query)=(yyvsp[(1) - (3)].query); E }
|
1612
1635
|
break;
|
1613
1636
|
|
1614
1637
|
case 14:
|
1615
|
-
#line
|
1638
|
+
#line 242 "src/q_parser.y"
|
1616
1639
|
{ T (yyval.query) = bq_new_max(true, qp->max_clauses); E }
|
1617
1640
|
break;
|
1618
1641
|
|
1619
1642
|
case 15:
|
1620
|
-
#line
|
1643
|
+
#line 243 "src/q_parser.y"
|
1621
1644
|
{ T (yyval.query) = get_bool_q((yyvsp[(2) - (3)].bclss)); E }
|
1622
1645
|
break;
|
1623
1646
|
|
1624
1647
|
case 20:
|
1625
|
-
#line
|
1648
|
+
#line 249 "src/q_parser.y"
|
1626
1649
|
{ FLDS((yyval.query), get_term_q(qp, field, (yyvsp[(1) - (1)].str))); Y}
|
1627
1650
|
break;
|
1628
1651
|
|
1629
1652
|
case 21:
|
1630
|
-
#line
|
1653
|
+
#line 250 "src/q_parser.y"
|
1631
1654
|
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[(1) - (3)].str), (yyvsp[(3) - (3)].str))); Y}
|
1632
1655
|
break;
|
1633
1656
|
|
1634
1657
|
case 22:
|
1635
|
-
#line
|
1658
|
+
#line 251 "src/q_parser.y"
|
1636
1659
|
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[(1) - (2)].str), NULL)); Y}
|
1637
1660
|
break;
|
1638
1661
|
|
1639
1662
|
case 23:
|
1640
|
-
#line
|
1663
|
+
#line 253 "src/q_parser.y"
|
1641
1664
|
{ FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[(1) - (1)].str))); Y}
|
1642
1665
|
break;
|
1643
1666
|
|
1644
1667
|
case 24:
|
1645
|
-
#line
|
1646
|
-
{ qp
|
1668
|
+
#line 255 "src/q_parser.y"
|
1669
|
+
{ qp_pop_fields(qp); }
|
1647
1670
|
break;
|
1648
1671
|
|
1649
1672
|
case 25:
|
1650
|
-
#line
|
1673
|
+
#line 256 "src/q_parser.y"
|
1651
1674
|
{ (yyval.query) = (yyvsp[(3) - (4)].query); }
|
1652
1675
|
break;
|
1653
1676
|
|
1654
1677
|
case 26:
|
1655
|
-
#line
|
1656
|
-
{ qp
|
1678
|
+
#line 257 "src/q_parser.y"
|
1679
|
+
{ qp_push_fields(qp, qp->all_fields, false); }
|
1657
1680
|
break;
|
1658
1681
|
|
1659
1682
|
case 27:
|
1660
|
-
#line
|
1661
|
-
{
|
1683
|
+
#line 257 "src/q_parser.y"
|
1684
|
+
{ qp_pop_fields(qp); }
|
1662
1685
|
break;
|
1663
1686
|
|
1664
1687
|
case 28:
|
1665
|
-
#line
|
1688
|
+
#line 258 "src/q_parser.y"
|
1666
1689
|
{ (yyval.query) = (yyvsp[(4) - (5)].query); }
|
1667
1690
|
break;
|
1668
1691
|
|
1669
1692
|
case 29:
|
1670
|
-
#line
|
1693
|
+
#line 260 "src/q_parser.y"
|
1671
1694
|
{ (yyval.hashset) = first_field(qp, (yyvsp[(1) - (1)].str)); }
|
1672
1695
|
break;
|
1673
1696
|
|
1674
1697
|
case 30:
|
1675
|
-
#line
|
1698
|
+
#line 261 "src/q_parser.y"
|
1676
1699
|
{ (yyval.hashset) = add_field(qp, (yyvsp[(3) - (3)].str));}
|
1677
1700
|
break;
|
1678
1701
|
|
1679
1702
|
case 31:
|
1680
|
-
#line
|
1703
|
+
#line 263 "src/q_parser.y"
|
1681
1704
|
{ (yyval.query) = get_phrase_q(qp, (yyvsp[(2) - (3)].phrase), NULL); }
|
1682
1705
|
break;
|
1683
1706
|
|
1684
1707
|
case 32:
|
1685
|
-
#line
|
1708
|
+
#line 264 "src/q_parser.y"
|
1686
1709
|
{ (yyval.query) = get_phrase_q(qp, (yyvsp[(2) - (5)].phrase), (yyvsp[(5) - (5)].str)); }
|
1687
1710
|
break;
|
1688
1711
|
|
1689
1712
|
case 33:
|
1690
|
-
#line
|
1713
|
+
#line 265 "src/q_parser.y"
|
1691
1714
|
{ (yyval.query) = NULL; }
|
1692
1715
|
break;
|
1693
1716
|
|
1694
1717
|
case 34:
|
1695
|
-
#line
|
1718
|
+
#line 266 "src/q_parser.y"
|
1696
1719
|
{ (yyval.query) = NULL; (void)(yyvsp[(4) - (4)].str);}
|
1697
1720
|
break;
|
1698
1721
|
|
1699
1722
|
case 35:
|
1700
|
-
#line
|
1723
|
+
#line 268 "src/q_parser.y"
|
1701
1724
|
{ (yyval.phrase) = ph_first_word((yyvsp[(1) - (1)].str)); }
|
1702
1725
|
break;
|
1703
1726
|
|
1704
1727
|
case 36:
|
1705
|
-
#line
|
1728
|
+
#line 269 "src/q_parser.y"
|
1706
1729
|
{ (yyval.phrase) = ph_first_word(NULL); }
|
1707
1730
|
break;
|
1708
1731
|
|
1709
1732
|
case 37:
|
1710
|
-
#line
|
1733
|
+
#line 270 "src/q_parser.y"
|
1711
1734
|
{ (yyval.phrase) = ph_add_word((yyvsp[(1) - (2)].phrase), (yyvsp[(2) - (2)].str)); }
|
1712
1735
|
break;
|
1713
1736
|
|
1714
1737
|
case 38:
|
1715
|
-
#line
|
1738
|
+
#line 271 "src/q_parser.y"
|
1716
1739
|
{ (yyval.phrase) = ph_add_word((yyvsp[(1) - (3)].phrase), NULL); }
|
1717
1740
|
break;
|
1718
1741
|
|
1719
1742
|
case 39:
|
1720
|
-
#line
|
1743
|
+
#line 272 "src/q_parser.y"
|
1721
1744
|
{ (yyval.phrase) = ph_add_multi_word((yyvsp[(1) - (3)].phrase), (yyvsp[(3) - (3)].str)); }
|
1722
1745
|
break;
|
1723
1746
|
|
1724
1747
|
case 40:
|
1725
|
-
#line
|
1748
|
+
#line 274 "src/q_parser.y"
|
1726
1749
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str), (yyvsp[(3) - (4)].str), true, true)); Y}
|
1727
1750
|
break;
|
1728
1751
|
|
1729
1752
|
case 41:
|
1730
|
-
#line
|
1753
|
+
#line 275 "src/q_parser.y"
|
1731
1754
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str), (yyvsp[(3) - (4)].str), true, false)); Y}
|
1732
1755
|
break;
|
1733
1756
|
|
1734
1757
|
case 42:
|
1735
|
-
#line
|
1758
|
+
#line 276 "src/q_parser.y"
|
1736
1759
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str), (yyvsp[(3) - (4)].str), false, true)); Y}
|
1737
1760
|
break;
|
1738
1761
|
|
1739
1762
|
case 43:
|
1740
|
-
#line
|
1763
|
+
#line 277 "src/q_parser.y"
|
1741
1764
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str), (yyvsp[(3) - (4)].str), false, false)); Y}
|
1742
1765
|
break;
|
1743
1766
|
|
1744
1767
|
case 44:
|
1745
|
-
#line
|
1768
|
+
#line 278 "src/q_parser.y"
|
1746
1769
|
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(2) - (3)].str), false, false)); Y}
|
1747
1770
|
break;
|
1748
1771
|
|
1749
1772
|
case 45:
|
1750
|
-
#line
|
1773
|
+
#line 279 "src/q_parser.y"
|
1751
1774
|
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(2) - (3)].str), false, true)); Y}
|
1752
1775
|
break;
|
1753
1776
|
|
1754
1777
|
case 46:
|
1755
|
-
#line
|
1778
|
+
#line 280 "src/q_parser.y"
|
1756
1779
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (3)].str), NULL,true, false)); Y}
|
1757
1780
|
break;
|
1758
1781
|
|
1759
1782
|
case 47:
|
1760
|
-
#line
|
1783
|
+
#line 281 "src/q_parser.y"
|
1761
1784
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (3)].str), NULL,false, false)); Y}
|
1762
1785
|
break;
|
1763
1786
|
|
1764
1787
|
case 48:
|
1765
|
-
#line
|
1788
|
+
#line 282 "src/q_parser.y"
|
1766
1789
|
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(2) - (2)].str), false, false)); Y}
|
1767
1790
|
break;
|
1768
1791
|
|
1769
1792
|
case 49:
|
1770
|
-
#line
|
1793
|
+
#line 283 "src/q_parser.y"
|
1771
1794
|
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(3) - (3)].str), false, true)); Y}
|
1772
1795
|
break;
|
1773
1796
|
|
1774
1797
|
case 50:
|
1775
|
-
#line
|
1798
|
+
#line 284 "src/q_parser.y"
|
1776
1799
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(3) - (3)].str), NULL,true, false)); Y}
|
1777
1800
|
break;
|
1778
1801
|
|
1779
1802
|
case 51:
|
1780
|
-
#line
|
1803
|
+
#line 285 "src/q_parser.y"
|
1781
1804
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (2)].str), NULL,false, false)); Y}
|
1782
1805
|
break;
|
1783
1806
|
|
1784
1807
|
|
1785
1808
|
/* Line 1267 of yacc.c. */
|
1786
|
-
#line
|
1809
|
+
#line 1810 "src/q_parser.c"
|
1787
1810
|
default: break;
|
1788
1811
|
}
|
1789
1812
|
YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
|
@@ -1997,12 +2020,22 @@ yyreturn:
|
|
1997
2020
|
}
|
1998
2021
|
|
1999
2022
|
|
2000
|
-
#line
|
2023
|
+
#line 287 "src/q_parser.y"
|
2001
2024
|
|
2002
2025
|
|
2003
|
-
const char *special_char = "&:()[]{}!\"~^|<>=*?+-";
|
2004
|
-
const char *not_word = " \t()[]{}!\"~^|<>=";
|
2026
|
+
static const char *special_char = "&:()[]{}!\"~^|<>=*?+-";
|
2027
|
+
static const char *not_word = " \t()[]{}!\"~^|<>=";
|
2005
2028
|
|
2029
|
+
/**
|
2030
|
+
* +get_word+ gets the next query-word from the query string. A query-word is
|
2031
|
+
* basically a string of non-special or escaped special characters. It is
|
2032
|
+
* Analyzer agnostic. It is up to the get_*_q methods to tokenize the word and
|
2033
|
+
* turn it into a +Query+. See the documentation for each get_*_q method to
|
2034
|
+
* see how it handles tokenization.
|
2035
|
+
*
|
2036
|
+
* Note that +get_word+ is also responsible for returning field names and
|
2037
|
+
* matching the special tokens 'AND', 'NOT', 'REQ' and 'OR'.
|
2038
|
+
*/
|
2006
2039
|
static int get_word(YYSTYPE *lvalp, QParser *qp)
|
2007
2040
|
{
|
2008
2041
|
bool is_wild = false;
|
@@ -2046,8 +2079,10 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
|
|
2046
2079
|
default:
|
2047
2080
|
*bufp++ = c;
|
2048
2081
|
}
|
2049
|
-
/* we've exceeded the static buffer. switch to the dynamic
|
2050
|
-
|
2082
|
+
/* we've exceeded the static buffer. switch to the dynamic one. The
|
2083
|
+
* dynamic buffer is allocated enough space to hold the whole query
|
2084
|
+
* string so it's capacity doesn't need to be checked again once
|
2085
|
+
* allocated. */
|
2051
2086
|
if (!qp->dynbuf && ((bufp - buf) == MAX_WORD_SIZE)) {
|
2052
2087
|
qp->dynbuf = ALLOC_AND_ZERO_N(char, strlen(qp->qstr) + 1);
|
2053
2088
|
strncpy(qp->dynbuf, buf, MAX_WORD_SIZE);
|
@@ -2057,8 +2092,8 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
|
|
2057
2092
|
}
|
2058
2093
|
get_word_done:
|
2059
2094
|
qp->qstrp--;
|
2060
|
-
/* check for keywords. There are only four so we have a bit of a hack
|
2061
|
-
* just checks for all of them. */
|
2095
|
+
/* check for keywords. There are only four so we have a bit of a hack
|
2096
|
+
* which just checks for all of them. */
|
2062
2097
|
*bufp = '\0';
|
2063
2098
|
len = (int)(bufp - buf);
|
2064
2099
|
if (qp->use_keywords) {
|
@@ -2078,6 +2113,33 @@ get_word_done:
|
|
2078
2113
|
return QWRD;
|
2079
2114
|
}
|
2080
2115
|
|
2116
|
+
/**
|
2117
|
+
* +yylex+ is the lexing method called by the QueryParser. It breaks the
|
2118
|
+
* query up into special characters;
|
2119
|
+
*
|
2120
|
+
* ( "&:()[]{}!\"~^|<>=*?+-" )
|
2121
|
+
*
|
2122
|
+
* and tokens;
|
2123
|
+
*
|
2124
|
+
* - QWRD
|
2125
|
+
* - WILD_STR
|
2126
|
+
* - AND['AND', '&&']
|
2127
|
+
* - OR['OR', '||']
|
2128
|
+
* - REQ['REQ', '+']
|
2129
|
+
* - NOT['NOT', '-', '~']
|
2130
|
+
*
|
2131
|
+
* QWRD tokens are query word tokens which are made up of characters other
|
2132
|
+
* than the special characters. They can also contain special characters when
|
2133
|
+
* escaped with a backslash '\'. WILD_STR is the same as QWRD except that it
|
2134
|
+
* may also contain '?' and '*' characters.
|
2135
|
+
*
|
2136
|
+
* If any of the special chars are seen they will usually be returned straight
|
2137
|
+
* away. The exceptions are the wild chars '*' and '?', and '&' which will be
|
2138
|
+
* treated as a plain old word character unless followed by another '&'.
|
2139
|
+
*
|
2140
|
+
* If no special characters or tokens are found then yylex delegates to
|
2141
|
+
* +get_word+ which will fetch the next query-word.
|
2142
|
+
*/
|
2081
2143
|
static int yylex(YYSTYPE *lvalp, QParser *qp)
|
2082
2144
|
{
|
2083
2145
|
char c, nc;
|
@@ -2116,6 +2178,11 @@ static int yylex(YYSTYPE *lvalp, QParser *qp)
|
|
2116
2178
|
return get_word(lvalp, qp);
|
2117
2179
|
}
|
2118
2180
|
|
2181
|
+
/**
|
2182
|
+
* yyerror gets called if there is an parse error with the yacc parser.
|
2183
|
+
* It is responsible for clearing any memory that was allocated during the
|
2184
|
+
* parsing process.
|
2185
|
+
*/
|
2119
2186
|
static int yyerror(QParser *qp, char const *msg)
|
2120
2187
|
{
|
2121
2188
|
qp->destruct = true;
|
@@ -2131,19 +2198,30 @@ static int yyerror(QParser *qp, char const *msg)
|
|
2131
2198
|
"couldn't parse query ``%s''. Error message "
|
2132
2199
|
" was %s", buf, (char *)msg);
|
2133
2200
|
}
|
2201
|
+
while (qp->fields_top->next != NULL) {
|
2202
|
+
qp_pop_fields(qp);
|
2203
|
+
}
|
2134
2204
|
return 0;
|
2135
2205
|
}
|
2136
2206
|
|
2137
2207
|
#define BQ(query) ((BooleanQuery *)(query))
|
2138
2208
|
|
2139
|
-
|
2209
|
+
/**
|
2210
|
+
* The QueryParser caches a tokenizer for each field so that it doesn't need
|
2211
|
+
* to allocate a new tokenizer for each term in the query. This would be quite
|
2212
|
+
* expensive as tokenizers use quite a large hunk of memory.
|
2213
|
+
*
|
2214
|
+
* This method returns the query parser for a particular field and sets it up
|
2215
|
+
* with the text to be tokenized.
|
2216
|
+
*/
|
2217
|
+
static TokenStream *get_cached_ts(QParser *qp, Symbol field, char *text)
|
2140
2218
|
{
|
2141
2219
|
TokenStream *ts;
|
2142
|
-
if (
|
2143
|
-
ts = h_get(qp->ts_cache, field);
|
2220
|
+
if (hs_exists(qp->tokenized_fields, field)) {
|
2221
|
+
ts = (TokenStream *)h_get(qp->ts_cache, field);
|
2144
2222
|
if (!ts) {
|
2145
2223
|
ts = a_get_ts(qp->analyzer, field, text);
|
2146
|
-
h_set(qp->ts_cache,
|
2224
|
+
h_set(qp->ts_cache, field, ts);
|
2147
2225
|
}
|
2148
2226
|
else {
|
2149
2227
|
ts->reset(ts, text);
|
@@ -2156,16 +2234,11 @@ static TokenStream *get_cached_ts(QParser *qp, char *field, char *text)
|
|
2156
2234
|
return ts;
|
2157
2235
|
}
|
2158
2236
|
|
2159
|
-
|
2160
|
-
|
2161
|
-
|
2162
|
-
|
2163
|
-
|
2164
|
-
h_set(field_cache, cached_field, cached_field);
|
2165
|
-
}
|
2166
|
-
return cached_field;
|
2167
|
-
}
|
2168
|
-
|
2237
|
+
/**
|
2238
|
+
* Turns a BooleanClause array into a BooleanQuery. It will optimize the query
|
2239
|
+
* if 0 or 1 clauses are present to NULL or the actual query in the clause
|
2240
|
+
* respectively.
|
2241
|
+
*/
|
2169
2242
|
static Query *get_bool_q(BCArray *bca)
|
2170
2243
|
{
|
2171
2244
|
Query *q;
|
@@ -2201,6 +2274,10 @@ static Query *get_bool_q(BCArray *bca)
|
|
2201
2274
|
return q;
|
2202
2275
|
}
|
2203
2276
|
|
2277
|
+
/**
|
2278
|
+
* Base method for appending BooleanClauses to a BooleanClause array. This
|
2279
|
+
* method doesn't care about the type of clause (MUST, SHOULD, MUST_NOT).
|
2280
|
+
*/
|
2204
2281
|
static void bca_add_clause(BCArray *bca, BooleanClause *clause)
|
2205
2282
|
{
|
2206
2283
|
if (bca->size >= bca->capa) {
|
@@ -2211,6 +2288,10 @@ static void bca_add_clause(BCArray *bca, BooleanClause *clause)
|
|
2211
2288
|
bca->size++;
|
2212
2289
|
}
|
2213
2290
|
|
2291
|
+
/**
|
2292
|
+
* Add the first clause to a BooleanClause array. This method is also
|
2293
|
+
* responsible for allocating a new BooleanClause array.
|
2294
|
+
*/
|
2214
2295
|
static BCArray *first_cls(BooleanClause *clause)
|
2215
2296
|
{
|
2216
2297
|
BCArray *bca = ALLOC_AND_ZERO(BCArray);
|
@@ -2222,6 +2303,12 @@ static BCArray *first_cls(BooleanClause *clause)
|
|
2222
2303
|
return bca;
|
2223
2304
|
}
|
2224
2305
|
|
2306
|
+
/**
|
2307
|
+
* Add AND clause to the BooleanClause array. The means that it will set the
|
2308
|
+
* clause being added and the previously added clause from SHOULD clauses to
|
2309
|
+
* MUST clauses. (If they are currently MUST_NOT clauses they stay as they
|
2310
|
+
* are.)
|
2311
|
+
*/
|
2225
2312
|
static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause)
|
2226
2313
|
{
|
2227
2314
|
if (clause) {
|
@@ -2238,6 +2325,9 @@ static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause)
|
|
2238
2325
|
return bca;
|
2239
2326
|
}
|
2240
2327
|
|
2328
|
+
/**
|
2329
|
+
* Add SHOULD clause to the BooleanClause array.
|
2330
|
+
*/
|
2241
2331
|
static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause)
|
2242
2332
|
{
|
2243
2333
|
if (clause) {
|
@@ -2246,6 +2336,10 @@ static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause)
|
|
2246
2336
|
return bca;
|
2247
2337
|
}
|
2248
2338
|
|
2339
|
+
/**
|
2340
|
+
* Add AND or OR clause to the BooleanClause array, depending on the default
|
2341
|
+
* clause type.
|
2342
|
+
*/
|
2249
2343
|
static BCArray *add_default_cls(QParser *qp, BCArray *bca,
|
2250
2344
|
BooleanClause *clause)
|
2251
2345
|
{
|
@@ -2258,6 +2352,9 @@ static BCArray *add_default_cls(QParser *qp, BCArray *bca,
|
|
2258
2352
|
return bca;
|
2259
2353
|
}
|
2260
2354
|
|
2355
|
+
/**
|
2356
|
+
* destroy array of BooleanClauses
|
2357
|
+
*/
|
2261
2358
|
static void bca_destroy(BCArray *bca)
|
2262
2359
|
{
|
2263
2360
|
int i;
|
@@ -2268,7 +2365,10 @@ static void bca_destroy(BCArray *bca)
|
|
2268
2365
|
free(bca);
|
2269
2366
|
}
|
2270
2367
|
|
2271
|
-
|
2368
|
+
/**
|
2369
|
+
* Turn a query into a BooleanClause for addition to a BooleanQuery.
|
2370
|
+
*/
|
2371
|
+
static BooleanClause *get_bool_cls(Query *q, BCType occur)
|
2272
2372
|
{
|
2273
2373
|
if (q) {
|
2274
2374
|
return bc_new(q, occur);
|
@@ -2278,7 +2378,15 @@ static BooleanClause *get_bool_cls(Query *q, unsigned int occur)
|
|
2278
2378
|
}
|
2279
2379
|
}
|
2280
2380
|
|
2281
|
-
|
2381
|
+
/**
|
2382
|
+
* Create a TermQuery. The word will be tokenized and if the tokenization
|
2383
|
+
* produces more than one token, a PhraseQuery will be returned. For example,
|
2384
|
+
* if the word is dbalmain@gmail.com and a LetterTokenizer is used then a
|
2385
|
+
* PhraseQuery "dbalmain gmail com" will be returned which is actually exactly
|
2386
|
+
* what we want as it will match any documents containing the same email
|
2387
|
+
* address and tokenized with the same tokenizer.
|
2388
|
+
*/
|
2389
|
+
static Query *get_term_q(QParser *qp, Symbol field, char *word)
|
2282
2390
|
{
|
2283
2391
|
Query *q;
|
2284
2392
|
Token *token;
|
@@ -2311,7 +2419,13 @@ static Query *get_term_q(QParser *qp, char *field, char *word)
|
|
2311
2419
|
return q;
|
2312
2420
|
}
|
2313
2421
|
|
2314
|
-
|
2422
|
+
/**
|
2423
|
+
* Create a FuzzyQuery. The word will be tokenized and only the first token
|
2424
|
+
* will be used. If there are any more tokens after tokenization, they will be
|
2425
|
+
* ignored.
|
2426
|
+
*/
|
2427
|
+
static Query *get_fuzzy_q(QParser *qp, Symbol field, char *word,
|
2428
|
+
char *slop_str)
|
2315
2429
|
{
|
2316
2430
|
Query *q;
|
2317
2431
|
Token *token;
|
@@ -2332,6 +2446,10 @@ static Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
|
|
2332
2446
|
return q;
|
2333
2447
|
}
|
2334
2448
|
|
2449
|
+
/**
|
2450
|
+
* Downcase a string taking locale into account and works for multibyte
|
2451
|
+
* character sets.
|
2452
|
+
*/
|
2335
2453
|
static char *lower_str(char *str)
|
2336
2454
|
{
|
2337
2455
|
const int max_len = (int)strlen(str) + 1;
|
@@ -2357,7 +2475,16 @@ static char *lower_str(char *str)
|
|
2357
2475
|
return str;
|
2358
2476
|
}
|
2359
2477
|
|
2360
|
-
|
2478
|
+
/**
|
2479
|
+
* Create a WildCardQuery. No tokenization will be performed on the pattern
|
2480
|
+
* but the pattern will be downcased if +qp->wild_lower+ is set to true and
|
2481
|
+
* the field in question is a tokenized field.
|
2482
|
+
*
|
2483
|
+
* Note: this method will not always return a WildCardQuery. It could be
|
2484
|
+
* optimized to a MatchAllQuery if the pattern is '*' or a PrefixQuery if the
|
2485
|
+
* only wild char (*, ?) in the pattern is a '*' at the end of the pattern.
|
2486
|
+
*/
|
2487
|
+
static Query *get_wild_q(QParser *qp, Symbol field, char *pattern)
|
2361
2488
|
{
|
2362
2489
|
Query *q;
|
2363
2490
|
bool is_prefix = false;
|
@@ -2398,22 +2525,32 @@ static Query *get_wild_q(QParser *qp, char *field, char *pattern)
|
|
2398
2525
|
return q;
|
2399
2526
|
}
|
2400
2527
|
|
2401
|
-
|
2528
|
+
/**
|
2529
|
+
* Adds another field to the top of the FieldStack.
|
2530
|
+
*/
|
2531
|
+
static HashSet *add_field(QParser *qp, const char *field_name)
|
2402
2532
|
{
|
2533
|
+
Symbol field = intern(field_name);
|
2403
2534
|
if (qp->allow_any_fields || hs_exists(qp->all_fields, field)) {
|
2404
|
-
hs_add(qp->fields,
|
2535
|
+
hs_add(qp->fields, field);
|
2405
2536
|
}
|
2406
2537
|
return qp->fields;
|
2407
2538
|
}
|
2408
2539
|
|
2409
|
-
|
2540
|
+
/**
|
2541
|
+
* The method gets called when a field modifier ("field1|field2:") is seen. It
|
2542
|
+
* will push a new FieldStack object onto the stack and add +field+ to its
|
2543
|
+
* fields set.
|
2544
|
+
*/
|
2545
|
+
static HashSet *first_field(QParser *qp, const char *field)
|
2410
2546
|
{
|
2411
|
-
qp
|
2412
|
-
qp->fields->size = 0;
|
2413
|
-
h_clear(qp->fields->ht);
|
2547
|
+
qp_push_fields(qp, hs_new_ptr(NULL), true);
|
2414
2548
|
return add_field(qp, field);
|
2415
2549
|
}
|
2416
2550
|
|
2551
|
+
/**
|
2552
|
+
* Destroy a phrase object freeing all allocated memory.
|
2553
|
+
*/
|
2417
2554
|
static void ph_destroy(Phrase *self)
|
2418
2555
|
{
|
2419
2556
|
int i;
|
@@ -2425,6 +2562,9 @@ static void ph_destroy(Phrase *self)
|
|
2425
2562
|
}
|
2426
2563
|
|
2427
2564
|
|
2565
|
+
/**
|
2566
|
+
* Allocate a new Phrase object
|
2567
|
+
*/
|
2428
2568
|
static Phrase *ph_new()
|
2429
2569
|
{
|
2430
2570
|
Phrase *self = ALLOC_AND_ZERO(Phrase);
|
@@ -2433,6 +2573,10 @@ static Phrase *ph_new()
|
|
2433
2573
|
return self;
|
2434
2574
|
}
|
2435
2575
|
|
2576
|
+
/**
|
2577
|
+
* Add the first word to the phrase. This method is also in charge of
|
2578
|
+
* allocating a new Phrase object.
|
2579
|
+
*/
|
2436
2580
|
static Phrase *ph_first_word(char *word)
|
2437
2581
|
{
|
2438
2582
|
Phrase *self = ph_new();
|
@@ -2444,6 +2588,9 @@ static Phrase *ph_first_word(char *word)
|
|
2444
2588
|
return self;
|
2445
2589
|
}
|
2446
2590
|
|
2591
|
+
/**
|
2592
|
+
* Add a new word to the Phrase
|
2593
|
+
*/
|
2447
2594
|
static Phrase *ph_add_word(Phrase *self, char *word)
|
2448
2595
|
{
|
2449
2596
|
if (word) {
|
@@ -2466,6 +2613,10 @@ static Phrase *ph_add_word(Phrase *self, char *word)
|
|
2466
2613
|
return self;
|
2467
2614
|
}
|
2468
2615
|
|
2616
|
+
/**
|
2617
|
+
* Adds a word to the Phrase object in the same position as the previous word
|
2618
|
+
* added to the Phrase. This will later be turned into a multi-PhraseQuery.
|
2619
|
+
*/
|
2469
2620
|
static Phrase *ph_add_multi_word(Phrase *self, char *word)
|
2470
2621
|
{
|
2471
2622
|
const int index = self->size - 1;
|
@@ -2477,7 +2628,35 @@ static Phrase *ph_add_multi_word(Phrase *self, char *word)
|
|
2477
2628
|
return self;
|
2478
2629
|
}
|
2479
2630
|
|
2480
|
-
|
2631
|
+
/**
|
2632
|
+
* Build a phrase query for a single field. It might seem like a better idea
|
2633
|
+
* to build the PhraseQuery once and duplicate it for each field but this
|
2634
|
+
* would be buggy in the case of PerFieldAnalyzers in which case a different
|
2635
|
+
* tokenizer could be used for each field.
|
2636
|
+
*
|
2637
|
+
* Note that the query object returned by this method is not always a
|
2638
|
+
* PhraseQuery. If there is only one term in the query then the query is
|
2639
|
+
* simplified to a TermQuery. If there are multiple terms but only a single
|
2640
|
+
* position, then a MultiTermQuery is retured.
|
2641
|
+
*
|
2642
|
+
* Note that each word in the query gets tokenized. Unlike get_term_q, if the
|
2643
|
+
* word gets tokenized into more than one token, the rest of the tokens are
|
2644
|
+
* ignored. For example, if you have the phrase;
|
2645
|
+
*
|
2646
|
+
* "email: dbalmain@gmail.com"
|
2647
|
+
*
|
2648
|
+
* the Phrase object will contain to positions with the words 'email:' and
|
2649
|
+
* 'dbalmain@gmail.com'. Now, if you are using a LetterTokenizer then the
|
2650
|
+
* second word will be tokenized into the tokens ['dbalmain', 'gmail', 'com']
|
2651
|
+
* and only the first token will be used, so the resulting phrase query will
|
2652
|
+
* actually look like this;
|
2653
|
+
*
|
2654
|
+
* "email dbalmain"
|
2655
|
+
*
|
2656
|
+
* This problem can easily be solved by using the StandardTokenizer or any
|
2657
|
+
* custom tokenizer which will leave dbalmain@gmail.com as a single token.
|
2658
|
+
*/
|
2659
|
+
static Query *get_phrase_query(QParser *qp, Symbol field,
|
2481
2660
|
Phrase *phrase, char *slop_str)
|
2482
2661
|
{
|
2483
2662
|
const int pos_cnt = phrase->size;
|
@@ -2497,13 +2676,14 @@ static Query *get_phrase_query(QParser *qp, char *field,
|
|
2497
2676
|
|
2498
2677
|
for (i = 0; i < word_count; i++) {
|
2499
2678
|
token = ts_next(get_cached_ts(qp, field, words[i]));
|
2500
|
-
free(words[i]);
|
2501
2679
|
if (token) {
|
2680
|
+
free(words[i]);
|
2502
2681
|
last_word = words[i] = estrdup(token->text);
|
2503
2682
|
++term_cnt;
|
2504
2683
|
}
|
2505
2684
|
else {
|
2506
|
-
words
|
2685
|
+
/* empty words will later be ignored */
|
2686
|
+
words[i][0] = '\0';
|
2507
2687
|
}
|
2508
2688
|
}
|
2509
2689
|
|
@@ -2517,6 +2697,7 @@ static Query *get_phrase_query(QParser *qp, char *field,
|
|
2517
2697
|
default:
|
2518
2698
|
q = multi_tq_new_conf(field, term_cnt, 0.0);
|
2519
2699
|
for (i = 0; i < word_count; i++) {
|
2700
|
+
/* ignore empty words */
|
2520
2701
|
if (words[i][0]) {
|
2521
2702
|
multi_tq_add_term(q, words[i]);
|
2522
2703
|
}
|
@@ -2582,19 +2763,31 @@ static Query *get_phrase_query(QParser *qp, char *field,
|
|
2582
2763
|
return q;
|
2583
2764
|
}
|
2584
2765
|
|
2766
|
+
/**
|
2767
|
+
* Get a phrase query from the Phrase object. The Phrase object is built up by
|
2768
|
+
* the query parser as the all PhraseQuery didn't work well for this. Once the
|
2769
|
+
* PhraseQuery has been built the Phrase object needs to be destroyed.
|
2770
|
+
*/
|
2585
2771
|
static Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop_str)
|
2586
2772
|
{
|
2587
|
-
Query *q = NULL;
|
2773
|
+
Query *volatile q = NULL;
|
2588
2774
|
FLDS(q, get_phrase_query(qp, field, phrase, slop_str));
|
2589
2775
|
ph_destroy(phrase);
|
2590
2776
|
return q;
|
2591
2777
|
}
|
2592
2778
|
|
2593
|
-
|
2779
|
+
/**
|
2780
|
+
* Gets a RangeQuery object.
|
2781
|
+
*
|
2782
|
+
* Just like with WildCardQuery, RangeQuery needs to downcase its terms if the
|
2783
|
+
* tokenizer also downcased its terms.
|
2784
|
+
*/
|
2785
|
+
static Query *get_r_q(QParser *qp, Symbol field, char *from, char *to,
|
2594
2786
|
bool inc_lower, bool inc_upper)
|
2595
2787
|
{
|
2596
2788
|
Query *rq;
|
2597
|
-
if (qp->wild_lower
|
2789
|
+
if (qp->wild_lower
|
2790
|
+
&& (!qp->tokenized_fields || hs_exists(qp->tokenized_fields, field))) {
|
2598
2791
|
if (from) {
|
2599
2792
|
lower_str(from);
|
2600
2793
|
}
|
@@ -2603,6 +2796,9 @@ static Query *get_r_q(QParser *qp, char *field, char *from, char *to,
|
|
2603
2796
|
}
|
2604
2797
|
}
|
2605
2798
|
/*
|
2799
|
+
* terms don't get tokenized as it doesn't really make sense to do so for
|
2800
|
+
* range queries.
|
2801
|
+
|
2606
2802
|
if (from) {
|
2607
2803
|
TokenStream *stream = get_cached_ts(qp, field, from);
|
2608
2804
|
Token *token = ts_next(stream);
|
@@ -2615,34 +2811,81 @@ static Query *get_r_q(QParser *qp, char *field, char *from, char *to,
|
|
2615
2811
|
}
|
2616
2812
|
*/
|
2617
2813
|
|
2618
|
-
rq =
|
2814
|
+
rq = qp->use_typed_range_query ?
|
2815
|
+
trq_new(field, from, to, inc_lower, inc_upper) :
|
2816
|
+
rq_new(field, from, to, inc_lower, inc_upper);
|
2619
2817
|
return rq;
|
2620
2818
|
}
|
2621
2819
|
|
2622
|
-
|
2820
|
+
/**
|
2821
|
+
* Every time the query parser sees a new field modifier ("field1|field2:")
|
2822
|
+
* it pushes a new FieldStack object onto the stack and sets its fields to the
|
2823
|
+
* fields specified in the fields modifier. If the field modifier is '*',
|
2824
|
+
* fs->fields is set to all_fields. fs->fields is set to +qp->def_field+ at
|
2825
|
+
* the bottom of the stack (ie the very first set of fields pushed onto the
|
2826
|
+
* stack).
|
2827
|
+
*/
|
2828
|
+
static void qp_push_fields(QParser *self, HashSet *fields, bool destroy)
|
2623
2829
|
{
|
2624
|
-
|
2625
|
-
|
2830
|
+
FieldStack *fs = ALLOC(FieldStack);
|
2831
|
+
|
2832
|
+
fs->next = self->fields_top;
|
2833
|
+
fs->fields = fields;
|
2834
|
+
fs->destroy = destroy;
|
2835
|
+
|
2836
|
+
self->fields_top = fs;
|
2837
|
+
self->fields = fields;
|
2838
|
+
}
|
2839
|
+
|
2840
|
+
/**
|
2841
|
+
* Pops the top of the fields stack and frees any memory used by it. This will
|
2842
|
+
* get called when query modified by a field modifier ("field1|field2:") has
|
2843
|
+
* been fully parsed and the field specifier no longer applies.
|
2844
|
+
*/
|
2845
|
+
static void qp_pop_fields(QParser *self)
|
2846
|
+
{
|
2847
|
+
FieldStack *fs = self->fields_top;
|
2848
|
+
|
2849
|
+
if (fs->destroy) {
|
2850
|
+
hs_destroy(fs->fields);
|
2851
|
+
}
|
2852
|
+
self->fields_top = fs->next;
|
2853
|
+
if (self->fields_top) {
|
2854
|
+
self->fields = self->fields_top->fields;
|
2626
2855
|
}
|
2627
|
-
|
2856
|
+
free(fs);
|
2857
|
+
}
|
2858
|
+
|
2859
|
+
/**
|
2860
|
+
* Free all memory allocated by the QueryParser.
|
2861
|
+
*/
|
2862
|
+
void qp_destroy(QParser *self)
|
2863
|
+
{
|
2864
|
+
if (self->tokenized_fields != self->all_fields) {
|
2628
2865
|
hs_destroy(self->tokenized_fields);
|
2629
2866
|
}
|
2630
|
-
if (self->
|
2631
|
-
|
2867
|
+
if (self->def_fields != self->all_fields) {
|
2868
|
+
hs_destroy(self->def_fields);
|
2632
2869
|
}
|
2633
2870
|
hs_destroy(self->all_fields);
|
2634
|
-
|
2635
|
-
|
2871
|
+
|
2872
|
+
qp_pop_fields(self);
|
2873
|
+
assert(NULL == self->fields_top);
|
2874
|
+
|
2636
2875
|
h_destroy(self->ts_cache);
|
2637
2876
|
tk_destroy(self->non_tokenizer);
|
2638
2877
|
a_deref(self->analyzer);
|
2639
2878
|
free(self);
|
2640
2879
|
}
|
2641
2880
|
|
2642
|
-
|
2643
|
-
|
2881
|
+
/**
|
2882
|
+
* Creates a new QueryParser setting all boolean parameters to their defaults.
|
2883
|
+
* If +def_fields+ is NULL then +all_fields+ is used in place of +def_fields+.
|
2884
|
+
* Not also that this method ensures that all fields that exist in
|
2885
|
+
* +def_fields+ must also exist in +all_fields+. This should make sense.
|
2886
|
+
*/
|
2887
|
+
QParser *qp_new(Analyzer *analyzer)
|
2644
2888
|
{
|
2645
|
-
int i;
|
2646
2889
|
QParser *self = ALLOC(QParser);
|
2647
2890
|
self->or_default = true;
|
2648
2891
|
self->wild_lower = true;
|
@@ -2651,48 +2894,66 @@ QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
|
|
2651
2894
|
self->handle_parse_errors = false;
|
2652
2895
|
self->allow_any_fields = false;
|
2653
2896
|
self->use_keywords = true;
|
2897
|
+
self->use_typed_range_query = false;
|
2654
2898
|
self->def_slop = 0;
|
2655
|
-
|
2656
|
-
self->
|
2657
|
-
self->
|
2658
|
-
|
2659
|
-
|
2660
|
-
|
2661
|
-
|
2662
|
-
|
2663
|
-
}
|
2664
|
-
}
|
2665
|
-
self->close_def_fields = true;
|
2666
|
-
}
|
2667
|
-
else {
|
2668
|
-
self->def_fields = all_fields;
|
2669
|
-
self->close_def_fields = false;
|
2670
|
-
}
|
2671
|
-
self->field_cache = h_new_str((free_ft)NULL, &free);
|
2672
|
-
for (i = 0; i < self->all_fields->size; i++) {
|
2673
|
-
char *field = estrdup(self->all_fields->elems[i]);
|
2674
|
-
h_set(self->field_cache, field, field);
|
2675
|
-
}
|
2676
|
-
self->fields = self->def_fields;
|
2899
|
+
|
2900
|
+
self->tokenized_fields = hs_new_ptr(NULL);
|
2901
|
+
self->all_fields = hs_new_ptr(NULL);
|
2902
|
+
self->def_fields = hs_new_ptr(NULL);
|
2903
|
+
|
2904
|
+
self->fields_top = NULL;
|
2905
|
+
qp_push_fields(self, self->def_fields, false);
|
2906
|
+
|
2677
2907
|
/* make sure all_fields contains the default fields */
|
2678
2908
|
self->analyzer = analyzer;
|
2679
|
-
self->ts_cache =
|
2909
|
+
self->ts_cache = h_new_ptr((free_ft)&ts_deref);
|
2680
2910
|
self->buf_index = 0;
|
2681
|
-
self->dynbuf =
|
2911
|
+
self->dynbuf = NULL;
|
2682
2912
|
self->non_tokenizer = non_tokenizer_new();
|
2683
2913
|
mutex_init(&self->mutex, NULL);
|
2684
2914
|
return self;
|
2685
2915
|
}
|
2686
2916
|
|
2917
|
+
void qp_add_field(QParser *self,
|
2918
|
+
Symbol field,
|
2919
|
+
bool is_default,
|
2920
|
+
bool is_tokenized)
|
2921
|
+
{
|
2922
|
+
hs_add(self->all_fields, field);
|
2923
|
+
if (is_default) {
|
2924
|
+
hs_add(self->def_fields, field);
|
2925
|
+
}
|
2926
|
+
if (is_tokenized) {
|
2927
|
+
hs_add(self->tokenized_fields, field);
|
2928
|
+
}
|
2929
|
+
}
|
2930
|
+
|
2687
2931
|
/* these chars have meaning within phrases */
|
2688
2932
|
static const char *PHRASE_CHARS = "<>|\"";
|
2689
2933
|
|
2690
|
-
|
2934
|
+
/**
|
2935
|
+
* +str_insert_char+ inserts a character at the beginning of a string by
|
2936
|
+
* shifting the rest of the string right.
|
2937
|
+
*/
|
2938
|
+
static void str_insert_char(char *str, int len, char chr)
|
2691
2939
|
{
|
2692
2940
|
memmove(str+1, str, len*sizeof(char));
|
2693
2941
|
*str = chr;
|
2694
2942
|
}
|
2695
2943
|
|
2944
|
+
/**
|
2945
|
+
* +qp_clean_str+ basically scans the query string and ensures that all open
|
2946
|
+
* and close parentheses '()' and quotes '"' are balanced. It does this by
|
2947
|
+
* inserting or appending extra parentheses or quotes to the string. This
|
2948
|
+
* obviously won't necessarily be exactly what the user wanted but we are
|
2949
|
+
* never going to know that anyway. The main job of this method is to help the
|
2950
|
+
* query at least parse correctly.
|
2951
|
+
*
|
2952
|
+
* It also checks that all special characters within phrases (ie between
|
2953
|
+
* quotes) are escaped correctly unless they have meaning within a phrase
|
2954
|
+
* ( <>,|," ). Note that '<' and '>' will also be escaped unless the appear
|
2955
|
+
* together like so; '<>'.
|
2956
|
+
*/
|
2696
2957
|
char *qp_clean_str(char *str)
|
2697
2958
|
{
|
2698
2959
|
int b, pb = -1;
|
@@ -2711,8 +2972,8 @@ char *qp_clean_str(char *str)
|
|
2711
2972
|
*nsp++ = '\\'; /* this was left off the first time through */
|
2712
2973
|
}
|
2713
2974
|
*nsp++ = b;
|
2714
|
-
/*
|
2715
|
-
pb = ((b == '\\') ? '
|
2975
|
+
/* \ has escaped itself so has no power. Assign pb random char 'r' */
|
2976
|
+
pb = ((b == '\\') ? 'r' : b);
|
2716
2977
|
continue;
|
2717
2978
|
}
|
2718
2979
|
switch (b) {
|
@@ -2737,7 +2998,7 @@ char *qp_clean_str(char *str)
|
|
2737
2998
|
case ')':
|
2738
2999
|
if (!quote_open) {
|
2739
3000
|
if (br_cnt == 0) {
|
2740
|
-
|
3001
|
+
str_insert_char(new_str, (int)(nsp - new_str), '(');
|
2741
3002
|
nsp++;
|
2742
3003
|
}
|
2743
3004
|
else {
|
@@ -2782,18 +3043,35 @@ char *qp_clean_str(char *str)
|
|
2782
3043
|
return new_str;
|
2783
3044
|
}
|
2784
3045
|
|
2785
|
-
|
3046
|
+
/**
|
3047
|
+
* Takes a string and finds whatever tokens it can using the QueryParser's
|
3048
|
+
* analyzer. It then turns these tokens (if any) into a boolean query. If it
|
3049
|
+
* fails to find any tokens, this method will return NULL.
|
3050
|
+
*/
|
3051
|
+
static Query *qp_get_bad_query(QParser *qp, char *str)
|
2786
3052
|
{
|
2787
3053
|
Query *volatile q = NULL;
|
2788
3054
|
qp->recovering = true;
|
3055
|
+
assert(qp->fields_top->next == NULL);
|
2789
3056
|
FLDS(q, get_term_q(qp, field, str));
|
2790
3057
|
return q;
|
2791
3058
|
}
|
2792
3059
|
|
3060
|
+
/**
|
3061
|
+
* +qp_parse+ takes a string and turns it into a Query object using Ferret's
|
3062
|
+
* query language. It must either raise an error or return a query object. It
|
3063
|
+
* must not return NULL. If the yacc parser fails it will use a very basic
|
3064
|
+
* boolean query parser which takes whatever tokens it can find in the query
|
3065
|
+
* and terns them into a boolean query on the default fields.
|
3066
|
+
*/
|
2793
3067
|
Query *qp_parse(QParser *self, char *qstr)
|
2794
3068
|
{
|
2795
3069
|
Query *result = NULL;
|
2796
3070
|
mutex_lock(&self->mutex);
|
3071
|
+
/* if qp->fields_top->next is not NULL we have a left over field-stack
|
3072
|
+
* object that was not popped during the last query parse */
|
3073
|
+
assert(NULL == self->fields_top->next);
|
3074
|
+
|
2797
3075
|
self->recovering = self->destruct = false;
|
2798
3076
|
if (self->clean_str) {
|
2799
3077
|
self->qstrp = self->qstr = qp_clean_str(qstr);
|