ferret 0.3.2 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
@@ -0,0 +1,343 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+
4
+ /***************************************************************************
5
+ *
6
+ * MultiPhraseWeight
7
+ *
8
+ ***************************************************************************/
9
+
10
+ char *mphw_to_s(Weight *self)
11
+ {
12
+ char dbuf[32];
13
+ dbl_to_s(dbuf, self->value);
14
+ return epstrdup("MultiPhraseWeight(%s)", strlen(dbuf), dbuf);
15
+ }
16
+
17
+ Scorer *mphw_scorer(Weight *self, IndexReader *ir)
18
+ {
19
+ Scorer *phsc;
20
+ MultiPhraseQuery *mphq = (MultiPhraseQuery *)self->query->data;
21
+ int i;
22
+ if (mphq->t_cnt == 0) return NULL; // optimize zero-term case
23
+
24
+ TermDocEnum **tps = ALLOC_N(TermDocEnum *, mphq->t_cnt);
25
+
26
+ for (i = 0; i < mphq->t_cnt; i++) {
27
+ if (mphq->pt_cnt[i] == 1) {
28
+ tps[i] = ir_term_positions_for(ir, mphq->terms[i][0]);
29
+ } else {
30
+ tps[i] = mtdpe_create(ir, mphq->terms[i], mphq->pt_cnt[i]);
31
+ }
32
+ if (tps[i] == NULL) {
33
+ // free everything we just created and return NULL
34
+ int j;
35
+ for (j = 0; j < i; j++) {
36
+ tps[i]->close(tps[i]);
37
+ }
38
+ free(tps);
39
+ return NULL;
40
+ }
41
+ }
42
+
43
+ if (mphq->slop == 0) { // optimize exact case
44
+ phsc = exact_phrase_scorer_create(self, tps, mphq->positions, mphq->t_cnt,
45
+ self->similarity,
46
+ ir->get_norms(ir, mphq->field));
47
+ } else {
48
+ phsc = sloppy_phrase_scorer_create(self, tps, mphq->positions, mphq->t_cnt,
49
+ self->similarity,
50
+ mphq->slop,
51
+ ir->get_norms(ir, mphq->field));
52
+ }
53
+ free(tps);
54
+ return phsc;
55
+ }
56
+
57
+ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
58
+ {
59
+ char *query_str = self->query->to_s(self->query, "");
60
+ MultiPhraseQuery *mphq = (MultiPhraseQuery *)self->query->data;
61
+ int i, j;
62
+ char *doc_freqs = NULL;
63
+ int len = 0, pos = 0;
64
+
65
+ Explanation *expl = expl_create(0.0,
66
+ epstrdup("weight(%s in %d), product of:",
67
+ strlen(query_str) + 20,
68
+ query_str, doc_num));
69
+
70
+ for (i = 0; i < mphq->t_cnt; i++) {
71
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
72
+ len += strlen(mphq->terms[i][j]->text) + 30;
73
+ }
74
+ }
75
+ doc_freqs = ALLOC_N(char, len);
76
+ for (i = 0; i < mphq->t_cnt; i++) {
77
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
78
+ Term *term = mphq->terms[i][j];
79
+ sprintf(doc_freqs + pos, "%s=%d, ", term->text, ir->doc_freq(ir, term));
80
+ pos += strlen(doc_freqs + pos);
81
+ }
82
+ }
83
+ pos -= 2; // remove ", " from the end
84
+ doc_freqs[pos] = 0;
85
+
86
+ Explanation *idf_expl1 = expl_create(self->idf,
87
+ epstrdup("idf(%s:<%s>)", strlen(mphq->field) + pos, mphq->field, doc_freqs));
88
+ Explanation *idf_expl2 = expl_create(self->idf,
89
+ epstrdup("idf(%s:<%s>)", strlen(mphq->field) + pos, mphq->field, doc_freqs));
90
+ free(doc_freqs);
91
+
92
+ // explain query weight
93
+ Explanation *query_expl = expl_create(0.0,
94
+ epstrdup("query_weight(%s), product of:", strlen(query_str), query_str));
95
+
96
+ if (self->query->boost != 1.0) {
97
+ expl_add_detail(query_expl, expl_create(self->query->boost, estrdup("boost")));
98
+ }
99
+ expl_add_detail(query_expl, idf_expl1);
100
+
101
+ Explanation *qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
102
+ expl_add_detail(query_expl, qnorm_expl);
103
+
104
+ query_expl->value = self->query->boost * self->idf * self->qnorm;
105
+
106
+ expl_add_detail(expl, query_expl);
107
+
108
+ // explain field weight
109
+ Explanation *field_expl = expl_create(0.0,
110
+ epstrdup("field_weight(%s in %d), product of:",
111
+ strlen(query_str) + 20, query_str, doc_num));
112
+ free(query_str);
113
+
114
+ Scorer *scorer = self->scorer(self, ir);
115
+ Explanation *tf_expl = scorer->explain(scorer, doc_num);
116
+ scorer->destroy(scorer);
117
+ expl_add_detail(field_expl, tf_expl);
118
+ expl_add_detail(field_expl, idf_expl2);
119
+
120
+ uchar *field_norms = ir->get_norms(ir, mphq->field);
121
+ float field_norm = (field_norms != NULL)
122
+ ? sim_decode_norm(self->similarity, field_norms[doc_num])
123
+ : 0.0;
124
+ Explanation *field_norm_expl = expl_create(field_norm,
125
+ epstrdup("field_norm(field=%s, doc=%d)",
126
+ strlen(mphq->field) + 20, mphq->field, doc_num));
127
+
128
+ expl_add_detail(field_expl, field_norm_expl);
129
+
130
+ field_expl->value = tf_expl->value * self->idf * field_norm;
131
+
132
+ // combine them
133
+ if (query_expl->value == 1.0) {
134
+ expl_destoy(expl);
135
+ return field_expl;
136
+ } else {
137
+ expl->value = (query_expl->value * field_expl->value);
138
+ expl_add_detail(expl, field_expl);
139
+ return expl;
140
+ }
141
+ }
142
+
143
+
144
+
145
+
146
+ Weight *mphw_create(Query *query, Searcher *searcher)
147
+ {
148
+ MultiPhraseQuery *mphq = (MultiPhraseQuery *)query->data;
149
+ Weight *self = ALLOC(Weight);
150
+ int i, j;
151
+ ZEROSET(self, Weight, 1);
152
+ self->get_query = &w_get_query;
153
+ self->get_value = &w_get_value;
154
+ self->normalize = &w_normalize;
155
+ self->scorer = &mphw_scorer;
156
+ self->explain = &mphw_explain;
157
+ self->to_s = &mphw_to_s;
158
+ self->destroy = &free;
159
+ self->sum_of_squared_weights = &w_sum_of_squared_weights;
160
+
161
+ self->similarity = query->get_similarity(query, searcher);
162
+ self->query = query;
163
+ self->value = query->boost;
164
+
165
+ self->idf = 0.0;
166
+ for (i = 0; i < mphq->t_cnt; i++) {
167
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
168
+ self->idf += sim_idf_term(self->similarity, mphq->terms[i][j], searcher);
169
+ }
170
+ }
171
+
172
+ return self;
173
+ }
174
+
175
+ /***************************************************************************
176
+ *
177
+ * MultiPhraseQuery
178
+ *
179
+ ***************************************************************************/
180
+
181
+ #define GET_MPHQ MultiPhraseQuery *mphq = (MultiPhraseQuery *)self->data
182
+
183
+ /**
184
+ * NOTE: terms must be allocated and it will be freed when the query is
185
+ * destroyed.
186
+ */
187
+ void mphq_add_terms(Query *self, Term **terms, int t_cnt, int pos_inc)
188
+ {
189
+ GET_MPHQ;
190
+ int position, index = mphq->t_cnt;
191
+ if (index >= mphq->t_capa) {
192
+ mphq->t_capa <<= 1;
193
+ REALLOC_N(mphq->terms, Term **, mphq->t_capa);
194
+ REALLOC_N(mphq->positions, int, mphq->t_capa);
195
+ REALLOC_N(mphq->pt_cnt, int, mphq->t_capa);
196
+ }
197
+ if (index == 0) {
198
+ position = 0;
199
+ mphq->field = terms[0]->field;
200
+ } else {
201
+ int i;
202
+ position = mphq->positions[index - 1] + pos_inc;
203
+ for (i = 0; i < t_cnt; i++) {
204
+ if (strcmp(terms[i]->field, mphq->field) != 0) {
205
+ eprintf(ARG_ERROR, "All phrase terms must be in the same field. Current phrase is %s, tried to add %s\n", mphq->field, terms[i]->field);
206
+ }
207
+ }
208
+ }
209
+
210
+ mphq->terms[index] = terms;
211
+ mphq->pt_cnt[index] = t_cnt;
212
+ mphq->positions[index] = position;
213
+ mphq->t_cnt++;
214
+ }
215
+
216
+ void mphq_destroy(void *p)
217
+ {
218
+ Query *self = (Query *)p;
219
+
220
+ GET_MPHQ;
221
+ int i, j;
222
+ if (self->destroy_all) {
223
+ for (i = 0; i < mphq->t_cnt; i++) {
224
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
225
+ term_destroy(mphq->terms[i][j]);
226
+ }
227
+ free(mphq->terms[i]);
228
+ }
229
+ }
230
+ free(mphq->terms);
231
+ free(mphq->positions);
232
+ free(mphq->pt_cnt);
233
+ free(mphq);
234
+
235
+ q_destroy(self);
236
+ }
237
+
238
+ void mphq_extract_terms(Query *self, Array *terms)
239
+ {
240
+ GET_MPHQ;
241
+ int i, j;
242
+ for (i = 0; i < mphq->t_cnt; i++) {
243
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
244
+ ary_append(terms, mphq->terms[i][j]);
245
+ }
246
+ }
247
+ }
248
+
249
+ char *mphq_to_s(Query *self, char *field)
250
+ {
251
+ GET_MPHQ;
252
+ int i, j, buf_index = 0, len = 0, pos, last_pos = -1;
253
+ char *buffer;
254
+ if (!mphq->t_cnt) return NULL;
255
+ len = strlen(mphq->field) + 1;
256
+ for (i = 0; i < mphq->t_cnt; i++) {
257
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
258
+ len += strlen(mphq->terms[i][j]->text) + 1;
259
+ }
260
+ }
261
+
262
+ // add space for extra characters and boost and slop
263
+ len += 100 + 3 * mphq->positions[mphq->t_cnt - 1];
264
+
265
+ buffer = ALLOC_N(char, len);
266
+
267
+ if (strcmp(field, mphq->field) != 0) {
268
+ len = strlen(mphq->field);
269
+ memcpy(buffer, mphq->field, len);
270
+ buffer[len] = ':';
271
+ buf_index += len + 1;
272
+ }
273
+ buffer[buf_index++] = '"';
274
+
275
+ for (i = 0; i < mphq->t_cnt; i++) {
276
+ pos = mphq->positions[i];
277
+ for (j = last_pos; j < pos - 1; j++) {
278
+ memcpy(buffer + buf_index, "<> ", 3);
279
+ buf_index += 3;
280
+ }
281
+ last_pos = pos;
282
+
283
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
284
+ Term *term = mphq->terms[i][j];
285
+ len = strlen(term->text);
286
+ memcpy(buffer + buf_index, term->text, len);
287
+ buf_index += len;
288
+ buffer[buf_index++] = '|';
289
+ }
290
+ buffer[buf_index-1] = ' '; /* change last '|' to ' ' */
291
+ }
292
+
293
+ if (buffer[buf_index-1] == ' ') buf_index--;
294
+ buffer[buf_index++] = '"';
295
+ buffer[buf_index] = 0;
296
+ if (mphq->slop != 0) {
297
+ sprintf(buffer + buf_index, "~%d", mphq->slop);
298
+ buf_index += strlen(buffer + buf_index);
299
+ }
300
+ if (self->boost != 1.0) {
301
+ buffer[buf_index] = '^';
302
+ dbl_to_s(buffer + buf_index + 1, self->boost);
303
+ }
304
+ return buffer;
305
+ }
306
+
307
+ Query *mphq_rewrite(Query *self, IndexReader *ir)
308
+ {
309
+ GET_MPHQ;
310
+ if (mphq->t_cnt == 1) { // optimize one-term case
311
+ Term **terms = mphq->terms[0];
312
+ Query *bq = bq_create(true);
313
+ int i;
314
+ for (i = 0; i < mphq->pt_cnt[0]; i++) {
315
+ bq_add_query(bq, tq_create(term_clone(terms[i])), BC_SHOULD);
316
+ }
317
+ bq->boost = self->boost;
318
+ if (self->rewritten) self->rewritten->destroy(self->rewritten);
319
+ return self->rewritten = bq;
320
+ } else {
321
+ return self;
322
+ }
323
+ }
324
+
325
+ Query *mphq_create()
326
+ {
327
+ Query *self = q_create();
328
+ MultiPhraseQuery *mphq = ALLOC(MultiPhraseQuery);
329
+ ZEROSET(mphq, MultiPhraseQuery, 1);
330
+ mphq->t_capa = PHQ_INIT_CAPA;
331
+ mphq->terms = ALLOC_N(Term **, PHQ_INIT_CAPA);
332
+ mphq->positions = ALLOC_N(int, PHQ_INIT_CAPA);
333
+ mphq->pt_cnt = ALLOC_N(int, PHQ_INIT_CAPA);
334
+ self->data = mphq;
335
+
336
+ self->create_weight = &mphw_create;
337
+ self->extract_terms = &mphq_extract_terms;
338
+ self->to_s = &mphq_to_s;
339
+ self->destroy = &mphq_destroy;
340
+ self->rewrite = &mphq_rewrite;
341
+ self->type = PHRASE_QUERY;
342
+ return self;
343
+ }
data/ext/q_parser.c ADDED
@@ -0,0 +1,2180 @@
1
+ /* A Bison parser, made by GNU Bison 2.0. */
2
+
3
+ /* Skeleton parser for Yacc-like parsing with Bison,
4
+ Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
5
+
6
+ This program is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2, or (at your option)
9
+ any later version.
10
+
11
+ This program is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with this program; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place - Suite 330,
19
+ Boston, MA 02111-1307, USA. */
20
+
21
+ /* As a special exception, when this file is copied by Bison into a
22
+ Bison output file, you may use that output file without restriction.
23
+ This special exception was added by the Free Software Foundation
24
+ in version 1.24 of Bison. */
25
+
26
+ /* Written by Richard Stallman by simplifying the original so called
27
+ ``semantic'' parser. */
28
+
29
+ /* All symbols defined below should begin with yy or YY, to avoid
30
+ infringing on user name space. This should be done even for local
31
+ variables, as they might otherwise be expanded by user macros.
32
+ There are some unavoidable exceptions within include files to
33
+ define necessary library symbols; they are noted "INFRINGES ON
34
+ USER NAME SPACE" below. */
35
+
36
+ /* Identify Bison output. */
37
+ #define YYBISON 1
38
+
39
+ /* Skeleton name. */
40
+ #define YYSKELETON_NAME "yacc.c"
41
+
42
+ /* Pure parsers. */
43
+ #define YYPURE 1
44
+
45
+ /* Using locations. */
46
+ #define YYLSP_NEEDED 0
47
+
48
+
49
+
50
+ /* Tokens. */
51
+ #ifndef YYTOKENTYPE
52
+ # define YYTOKENTYPE
53
+ /* Put the tokens into the symbol table, so that GDB and other debuggers
54
+ know about them. */
55
+ enum yytokentype {
56
+ WORD = 258,
57
+ WILD_STR = 259,
58
+ LOW = 260,
59
+ OR = 261,
60
+ AND = 262,
61
+ NOT = 263,
62
+ REQ = 264,
63
+ HIGH = 265
64
+ };
65
+ #endif
66
+ #define WORD 258
67
+ #define WILD_STR 259
68
+ #define LOW 260
69
+ #define OR 261
70
+ #define AND 262
71
+ #define NOT 263
72
+ #define REQ 264
73
+ #define HIGH 265
74
+
75
+
76
+
77
+
78
+ /* Copy the first part of user declarations. */
79
+ #line 1 "src/query_parser/q_parser.y"
80
+
81
+ #include <string.h>
82
+ #include "search.h"
83
+
84
+ typedef struct Phrase {
85
+ int cnt;
86
+ int capa;
87
+ char ***words;
88
+ int *w_cnt;
89
+ int *w_capa;
90
+ } Phrase;
91
+
92
+
93
+
94
+ /* Enabling traces. */
95
+ #ifndef YYDEBUG
96
+ # define YYDEBUG 0
97
+ #endif
98
+
99
+ /* Enabling verbose error messages. */
100
+ #ifdef YYERROR_VERBOSE
101
+ # undef YYERROR_VERBOSE
102
+ # define YYERROR_VERBOSE 1
103
+ #else
104
+ # define YYERROR_VERBOSE 0
105
+ #endif
106
+
107
+ #if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
108
+ #line 14 "src/query_parser/q_parser.y"
109
+ typedef union YYSTYPE {
110
+ Query *query;
111
+ BooleanClause *bcls;
112
+ Array *array;
113
+ HashSet *hashset;
114
+ Phrase *phrase;
115
+ char *str;
116
+ } YYSTYPE;
117
+ /* Line 190 of yacc.c. */
118
+ #line 119 "y.tab.c"
119
+ # define yystype YYSTYPE /* obsolescent; will be withdrawn */
120
+ # define YYSTYPE_IS_DECLARED 1
121
+ # define YYSTYPE_IS_TRIVIAL 1
122
+ #endif
123
+
124
+
125
+
126
+ /* Copy the second part of user declarations. */
127
+ #line 22 "src/query_parser/q_parser.y"
128
+
129
+ int yylex(YYSTYPE *lvalp, QParser *qp);
130
+ int yyerror(QParser *qp, char const *msg);
131
+
132
+ #define PHRASE_INIT_CAPA 4
133
+ Query *get_bool_q(Array *bclauses);
134
+
135
+ Array *first_cls(BooleanClause *cls);
136
+ Array *add_and_cls(Array *clauses, BooleanClause *cls);
137
+ Array *add_or_cls(Array *clauses, BooleanClause *cls);
138
+ Array *add_default_cls(QParser *qp, Array *clauses, BooleanClause *cls);
139
+
140
+ BooleanClause *get_bool_cls(Query *q, unsigned int occur);
141
+
142
+ Query *get_term_q(QParser *qp, char *field, char *word);
143
+ Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop);
144
+ Query *get_wild_q(QParser *qp, char *field, char *pattern);
145
+
146
+ HashSet *first_field(QParser *qp, char *field);
147
+ HashSet *add_field(QParser *qp, char *field);
148
+
149
+ Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop);
150
+
151
+ Phrase *ph_first_word(char *word);
152
+ Phrase *ph_add_word(Phrase *self, char *word);
153
+ Phrase *ph_add_multi_word(Phrase *self, char *word);
154
+
155
+ Query *get_range_q(char *field, char *from, char *to,
156
+ bool inc_lower, bool inc_upper);
157
+
158
+ #define FLDS(q, func) do {\
159
+ char *field;\
160
+ if (qp->fields->size == 0) {\
161
+ q = NULL;\
162
+ } else if (qp->fields->size == 1) {\
163
+ field = (char *)qp->fields->elems[0];\
164
+ q = func;\
165
+ } else {\
166
+ int i;Query *sq;\
167
+ q = bq_create(false);\
168
+ for (i = 0; i < qp->fields->size; i++) {\
169
+ field = (char *)qp->fields->elems[i];\
170
+ sq = func;\
171
+ if (sq) bq_add_query(q, sq, BC_SHOULD);\
172
+ }\
173
+ }\
174
+ } while (0)
175
+
176
+
177
+ /* Line 213 of yacc.c. */
178
+ #line 179 "y.tab.c"
179
+
180
+ #if ! defined (yyoverflow) || YYERROR_VERBOSE
181
+
182
+ # ifndef YYFREE
183
+ # define YYFREE free
184
+ # endif
185
+ # ifndef YYMALLOC
186
+ # define YYMALLOC malloc
187
+ # endif
188
+
189
+ /* The parser invokes alloca or malloc; define the necessary symbols. */
190
+
191
+ # ifdef YYSTACK_USE_ALLOCA
192
+ # if YYSTACK_USE_ALLOCA
193
+ # ifdef __GNUC__
194
+ # define YYSTACK_ALLOC __builtin_alloca
195
+ # else
196
+ # define YYSTACK_ALLOC alloca
197
+ # endif
198
+ # endif
199
+ # endif
200
+
201
+ # ifdef YYSTACK_ALLOC
202
+ /* Pacify GCC's `empty if-body' warning. */
203
+ # define YYSTACK_FREE(Ptr) do { /* empty */; } while (0)
204
+ # else
205
+ # if defined (__STDC__) || defined (__cplusplus)
206
+ # include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
207
+ # define YYSIZE_T size_t
208
+ # endif
209
+ # define YYSTACK_ALLOC YYMALLOC
210
+ # define YYSTACK_FREE YYFREE
211
+ # endif
212
+ #endif /* ! defined (yyoverflow) || YYERROR_VERBOSE */
213
+
214
+
215
+ #if (! defined (yyoverflow) \
216
+ && (! defined (__cplusplus) \
217
+ || (defined (YYSTYPE_IS_TRIVIAL) && YYSTYPE_IS_TRIVIAL)))
218
+
219
+ /* A type that is properly aligned for any stack member. */
220
+ union yyalloc
221
+ {
222
+ short int yyss;
223
+ YYSTYPE yyvs;
224
+ };
225
+
226
+ /* The size of the maximum gap between one aligned stack and the next. */
227
+ # define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1)
228
+
229
+ /* The size of an array large to enough to hold all stacks, each with
230
+ N elements. */
231
+ # define YYSTACK_BYTES(N) \
232
+ ((N) * (sizeof (short int) + sizeof (YYSTYPE)) \
233
+ + YYSTACK_GAP_MAXIMUM)
234
+
235
+ /* Copy COUNT objects from FROM to TO. The source and destination do
236
+ not overlap. */
237
+ # ifndef YYCOPY
238
+ # if defined (__GNUC__) && 1 < __GNUC__
239
+ # define YYCOPY(To, From, Count) \
240
+ __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
241
+ # else
242
+ # define YYCOPY(To, From, Count) \
243
+ do \
244
+ { \
245
+ register YYSIZE_T yyi; \
246
+ for (yyi = 0; yyi < (Count); yyi++) \
247
+ (To)[yyi] = (From)[yyi]; \
248
+ } \
249
+ while (0)
250
+ # endif
251
+ # endif
252
+
253
+ /* Relocate STACK from its old location to the new one. The
254
+ local variables YYSIZE and YYSTACKSIZE give the old and new number of
255
+ elements in the stack, and YYPTR gives the new location of the
256
+ stack. Advance YYPTR to a properly aligned location for the next
257
+ stack. */
258
+ # define YYSTACK_RELOCATE(Stack) \
259
+ do \
260
+ { \
261
+ YYSIZE_T yynewbytes; \
262
+ YYCOPY (&yyptr->Stack, Stack, yysize); \
263
+ Stack = &yyptr->Stack; \
264
+ yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
265
+ yyptr += yynewbytes / sizeof (*yyptr); \
266
+ } \
267
+ while (0)
268
+
269
+ #endif
270
+
271
+ #if defined (__STDC__) || defined (__cplusplus)
272
+ typedef signed char yysigned_char;
273
+ #else
274
+ typedef short int yysigned_char;
275
+ #endif
276
+
277
+ /* YYFINAL -- State number of the termination state. */
278
+ #define YYFINAL 38
279
+ /* YYLAST -- Last index in YYTABLE. */
280
+ #define YYLAST 98
281
+
282
+ /* YYNTOKENS -- Number of terminals. */
283
+ #define YYNTOKENS 26
284
+ /* YYNNTS -- Number of nonterminals. */
285
+ #define YYNNTS 16
286
+ /* YYNRULES -- Number of rules. */
287
+ #define YYNRULES 50
288
+ /* YYNRULES -- Number of states. */
289
+ #define YYNSTATES 79
290
+
291
+ /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
292
+ #define YYUNDEFTOK 2
293
+ #define YYMAXUTOK 265
294
+
295
+ #define YYTRANSLATE(YYX) \
296
+ ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
297
+
298
+ /* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */
299
+ static const unsigned char yytranslate[] =
300
+ {
301
+ 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
302
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
303
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
304
+ 2, 2, 2, 2, 18, 2, 2, 2, 2, 2,
305
+ 13, 14, 16, 2, 2, 2, 2, 2, 2, 2,
306
+ 2, 2, 2, 2, 2, 2, 2, 2, 10, 2,
307
+ 19, 25, 20, 2, 2, 2, 2, 2, 2, 2,
308
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
309
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
310
+ 2, 21, 2, 22, 12, 2, 2, 2, 2, 2,
311
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
312
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
313
+ 2, 2, 2, 24, 17, 23, 15, 2, 2, 2,
314
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
315
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
316
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
317
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
318
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
319
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
320
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
321
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
322
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
323
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
324
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
325
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
326
+ 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
327
+ 5, 6, 7, 8, 9, 11
328
+ };
329
+
330
+ #if YYDEBUG
331
+ /* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
332
+ YYRHS. */
333
+ static const unsigned char yyprhs[] =
334
+ {
335
+ 0, 0, 3, 4, 6, 8, 12, 16, 19, 22,
336
+ 25, 27, 29, 33, 35, 39, 41, 43, 45, 47,
337
+ 49, 53, 56, 58, 59, 64, 65, 66, 72, 74,
338
+ 78, 82, 88, 91, 96, 98, 101, 104, 108, 112,
339
+ 117, 122, 127, 132, 136, 140, 144, 148, 151, 155,
340
+ 159
341
+ };
342
+
343
+ /* YYRHS -- A `-1'-separated list of the rules' RHS. */
344
+ static const yysigned_char yyrhs[] =
345
+ {
346
+ 27, 0, -1, -1, 28, -1, 29, -1, 28, 7,
347
+ 29, -1, 28, 6, 29, -1, 28, 29, -1, 9,
348
+ 30, -1, 8, 30, -1, 30, -1, 31, -1, 31,
349
+ 12, 3, -1, 32, -1, 13, 28, 14, -1, 34,
350
+ -1, 39, -1, 41, -1, 33, -1, 3, -1, 3,
351
+ 15, 3, -1, 3, 15, -1, 4, -1, -1, 38,
352
+ 10, 31, 35, -1, -1, -1, 16, 36, 10, 31,
353
+ 37, -1, 3, -1, 38, 17, 3, -1, 18, 40,
354
+ 18, -1, 18, 40, 18, 15, 3, -1, 18, 18,
355
+ -1, 18, 18, 15, 3, -1, 3, -1, 19, 20,
356
+ -1, 40, 3, -1, 40, 19, 20, -1, 40, 17,
357
+ 3, -1, 21, 3, 3, 22, -1, 21, 3, 3,
358
+ 23, -1, 24, 3, 3, 22, -1, 24, 3, 3,
359
+ 23, -1, 19, 3, 23, -1, 19, 3, 22, -1,
360
+ 21, 3, 20, -1, 24, 3, 20, -1, 19, 3,
361
+ -1, 19, 25, 3, -1, 20, 25, 3, -1, 20,
362
+ 3, -1
363
+ };
364
+
365
+ /* YYRLINE[YYN] -- source line where rule number YYN was defined. */
366
+ static const unsigned char yyrline[] =
367
+ {
368
+ 0, 86, 86, 87, 89, 90, 91, 92, 94, 95,
369
+ 96, 98, 99, 101, 102, 103, 104, 105, 106, 108,
370
+ 109, 110, 112, 114, 114, 116, 116, 116, 119, 120,
371
+ 122, 123, 124, 125, 127, 128, 129, 130, 131, 133,
372
+ 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
373
+ 144
374
+ };
375
+ #endif
376
+
377
+ #if YYDEBUG || YYERROR_VERBOSE
378
+ /* YYTNME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
379
+ First, the terminals, then, starting at YYNTOKENS, nonterminals. */
380
+ static const char *const yytname[] =
381
+ {
382
+ "$end", "error", "$undefined", "WORD", "WILD_STR", "LOW", "OR", "AND",
383
+ "NOT", "REQ", "':'", "HIGH", "'^'", "'('", "')'", "'~'", "'*'", "'|'",
384
+ "'\"'", "'<'", "'>'", "'['", "']'", "'}'", "'{'", "'='", "$accept",
385
+ "bool_q", "bool_clss", "bool_cls", "boosted_q", "q", "term_q", "wild_q",
386
+ "field_q", "@1", "@2", "@3", "field", "phrase_q", "ph_words", "range_q", 0
387
+ };
388
+ #endif
389
+
390
+ # ifdef YYPRINT
391
+ /* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
392
+ token YYLEX-NUM. */
393
+ static const unsigned short int yytoknum[] =
394
+ {
395
+ 0, 256, 257, 258, 259, 260, 261, 262, 263, 264,
396
+ 58, 265, 94, 40, 41, 126, 42, 124, 34, 60,
397
+ 62, 91, 93, 125, 123, 61
398
+ };
399
+ # endif
400
+
401
+ /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
402
+ static const unsigned char yyr1[] =
403
+ {
404
+ 0, 26, 27, 27, 28, 28, 28, 28, 29, 29,
405
+ 29, 30, 30, 31, 31, 31, 31, 31, 31, 32,
406
+ 32, 32, 33, 35, 34, 36, 37, 34, 38, 38,
407
+ 39, 39, 39, 39, 40, 40, 40, 40, 40, 41,
408
+ 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
409
+ 41
410
+ };
411
+
412
+ /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
413
+ static const unsigned char yyr2[] =
414
+ {
415
+ 0, 2, 0, 1, 1, 3, 3, 2, 2, 2,
416
+ 1, 1, 3, 1, 3, 1, 1, 1, 1, 1,
417
+ 3, 2, 1, 0, 4, 0, 0, 5, 1, 3,
418
+ 3, 5, 2, 4, 1, 2, 2, 3, 3, 4,
419
+ 4, 4, 4, 3, 3, 3, 3, 2, 3, 3,
420
+ 2
421
+ };
422
+
423
+ /* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
424
+ STATE-NUM when YYTABLE doesn't specify something else to do. Zero
425
+ means the default is an error. */
426
+ static const unsigned char yydefact[] =
427
+ {
428
+ 2, 19, 22, 0, 0, 0, 25, 0, 0, 0,
429
+ 0, 0, 0, 3, 4, 10, 11, 13, 18, 15,
430
+ 0, 16, 17, 21, 9, 8, 0, 0, 34, 32,
431
+ 0, 0, 47, 0, 50, 0, 0, 0, 1, 0,
432
+ 0, 7, 0, 0, 0, 20, 14, 0, 0, 35,
433
+ 36, 0, 30, 0, 44, 43, 48, 49, 0, 45,
434
+ 0, 46, 6, 5, 12, 23, 29, 26, 33, 38,
435
+ 0, 37, 39, 40, 41, 42, 24, 27, 31
436
+ };
437
+
438
+ /* YYDEFGOTO[NTERM-NUM]. */
439
+ static const yysigned_char yydefgoto[] =
440
+ {
441
+ -1, 12, 13, 14, 15, 16, 17, 18, 19, 76,
442
+ 27, 77, 20, 21, 31, 22
443
+ };
444
+
445
+ /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
446
+ STATE-NUM. */
447
+ #define YYPACT_NINF -32
448
+ static const yysigned_char yypact[] =
449
+ {
450
+ 44, 75, -32, 63, 63, 44, -32, 55, -2, -1,
451
+ 0, 3, 11, 25, -32, -32, 18, -32, -32, -32,
452
+ 76, -32, -32, 33, -32, -32, 1, 32, -32, 41,
453
+ 39, 52, 17, 58, -32, 69, 15, 34, -32, 44,
454
+ 44, -32, 72, 63, 77, -32, -32, 63, 88, -32,
455
+ -32, 91, 80, 78, -32, -32, -32, -32, 28, -32,
456
+ 66, -32, -32, -32, -32, -32, -32, -32, -32, -32,
457
+ 93, -32, -32, -32, -32, -32, -32, -32, -32
458
+ };
459
+
460
+ /* YYPGOTO[NTERM-NUM]. */
461
+ static const yysigned_char yypgoto[] =
462
+ {
463
+ -32, -32, 92, -13, 74, -31, -32, -32, -32, -32,
464
+ -32, -32, -32, -32, -32, -32
465
+ };
466
+
467
+ /* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
468
+ positive, shift that token. If negative, reduce the rule which
469
+ number is the opposite. If zero, do what YYDEFACT says.
470
+ If YYTABLE_NINF, syntax error. */
471
+ #define YYTABLE_NINF -29
472
+ static const yysigned_char yytable[] =
473
+ {
474
+ 41, 32, 34, 36, 1, 2, 37, 39, 40, 3,
475
+ 4, 38, 65, 41, 5, 46, 67, 6, 58, 7,
476
+ 8, 9, 10, 33, 35, 11, 62, 63, 1, 2,
477
+ 42, 39, 40, 3, 4, 59, 45, 60, 5, 54,
478
+ 55, 6, 47, 7, 8, 9, 10, 1, 2, 11,
479
+ 72, 73, 3, 4, 61, 50, 48, 5, 28, 49,
480
+ 6, 56, 7, 8, 9, 10, 1, 2, 11, 51,
481
+ 52, 53, 57, 29, 30, 64, 5, 24, 25, 6,
482
+ 66, 7, 8, 9, 10, -28, 43, 11, 74, 75,
483
+ 23, 68, -28, 44, 69, 70, 78, 26, 71
484
+ };
485
+
486
+ static const unsigned char yycheck[] =
487
+ {
488
+ 13, 3, 3, 3, 3, 4, 3, 6, 7, 8,
489
+ 9, 0, 43, 26, 13, 14, 47, 16, 3, 18,
490
+ 19, 20, 21, 25, 25, 24, 39, 40, 3, 4,
491
+ 12, 6, 7, 8, 9, 20, 3, 3, 13, 22,
492
+ 23, 16, 10, 18, 19, 20, 21, 3, 4, 24,
493
+ 22, 23, 8, 9, 20, 3, 15, 13, 3, 20,
494
+ 16, 3, 18, 19, 20, 21, 3, 4, 24, 17,
495
+ 18, 19, 3, 18, 19, 3, 13, 3, 4, 16,
496
+ 3, 18, 19, 20, 21, 10, 10, 24, 22, 23,
497
+ 15, 3, 17, 17, 3, 15, 3, 5, 20
498
+ };
499
+
500
+ /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
501
+ symbol of state STATE-NUM. */
502
+ static const unsigned char yystos[] =
503
+ {
504
+ 0, 3, 4, 8, 9, 13, 16, 18, 19, 20,
505
+ 21, 24, 27, 28, 29, 30, 31, 32, 33, 34,
506
+ 38, 39, 41, 15, 30, 30, 28, 36, 3, 18,
507
+ 19, 40, 3, 25, 3, 25, 3, 3, 0, 6,
508
+ 7, 29, 12, 10, 17, 3, 14, 10, 15, 20,
509
+ 3, 17, 18, 19, 22, 23, 3, 3, 3, 20,
510
+ 3, 20, 29, 29, 3, 31, 3, 31, 3, 3,
511
+ 15, 20, 22, 23, 22, 23, 35, 37, 3
512
+ };
513
+
514
+ #if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__)
515
+ # define YYSIZE_T __SIZE_TYPE__
516
+ #endif
517
+ #if ! defined (YYSIZE_T) && defined (size_t)
518
+ # define YYSIZE_T size_t
519
+ #endif
520
+ #if ! defined (YYSIZE_T)
521
+ # if defined (__STDC__) || defined (__cplusplus)
522
+ # include <stddef.h> /* INFRINGES ON USER NAME SPACE */
523
+ # define YYSIZE_T size_t
524
+ # endif
525
+ #endif
526
+ #if ! defined (YYSIZE_T)
527
+ # define YYSIZE_T unsigned int
528
+ #endif
529
+
530
+ #define yyerrok (yyerrstatus = 0)
531
+ #define yyclearin (yychar = YYEMPTY)
532
+ #define YYEMPTY (-2)
533
+ #define YYEOF 0
534
+
535
+ #define YYACCEPT goto yyacceptlab
536
+ #define YYABORT goto yyabortlab
537
+ #define YYERROR goto yyerrorlab
538
+
539
+
540
+ /* Like YYERROR except do call yyerror. This remains here temporarily
541
+ to ease the transition to the new meaning of YYERROR, for GCC.
542
+ Once GCC version 2 has supplanted version 1, this can go. */
543
+
544
+ #define YYFAIL goto yyerrlab
545
+
546
+ #define YYRECOVERING() (!!yyerrstatus)
547
+
548
+ #define YYBACKUP(Token, Value) \
549
+ do \
550
+ if (yychar == YYEMPTY && yylen == 1) \
551
+ { \
552
+ yychar = (Token); \
553
+ yylval = (Value); \
554
+ yytoken = YYTRANSLATE (yychar); \
555
+ YYPOPSTACK; \
556
+ goto yybackup; \
557
+ } \
558
+ else \
559
+ { \
560
+ yyerror (qp, "syntax error: cannot back up");\
561
+ YYERROR; \
562
+ } \
563
+ while (0)
564
+
565
+
566
+ #define YYTERROR 1
567
+ #define YYERRCODE 256
568
+
569
+
570
+ /* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
571
+ If N is 0, then set CURRENT to the empty location which ends
572
+ the previous symbol: RHS[0] (always defined). */
573
+
574
+ #define YYRHSLOC(Rhs, K) ((Rhs)[K])
575
+ #ifndef YYLLOC_DEFAULT
576
+ # define YYLLOC_DEFAULT(Current, Rhs, N) \
577
+ do \
578
+ if (N) \
579
+ { \
580
+ (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
581
+ (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
582
+ (Current).last_line = YYRHSLOC (Rhs, N).last_line; \
583
+ (Current).last_column = YYRHSLOC (Rhs, N).last_column; \
584
+ } \
585
+ else \
586
+ { \
587
+ (Current).first_line = (Current).last_line = \
588
+ YYRHSLOC (Rhs, 0).last_line; \
589
+ (Current).first_column = (Current).last_column = \
590
+ YYRHSLOC (Rhs, 0).last_column; \
591
+ } \
592
+ while (0)
593
+ #endif
594
+
595
+
596
+ /* YY_LOCATION_PRINT -- Print the location on the stream.
597
+ This macro was not mandated originally: define only if we know
598
+ we won't break user code: when these are the locations we know. */
599
+
600
+ #ifndef YY_LOCATION_PRINT
601
+ # if YYLTYPE_IS_TRIVIAL
602
+ # define YY_LOCATION_PRINT(File, Loc) \
603
+ fprintf (File, "%d.%d-%d.%d", \
604
+ (Loc).first_line, (Loc).first_column, \
605
+ (Loc).last_line, (Loc).last_column)
606
+ # else
607
+ # define YY_LOCATION_PRINT(File, Loc) ((void) 0)
608
+ # endif
609
+ #endif
610
+
611
+
612
+ /* YYLEX -- calling `yylex' with the right arguments. */
613
+
614
+ #ifdef YYLEX_PARAM
615
+ # define YYLEX yylex (&yylval, YYLEX_PARAM)
616
+ #else
617
+ # define YYLEX yylex (&yylval, qp)
618
+ #endif
619
+
620
+ /* Enable debugging if requested. */
621
+ #if YYDEBUG
622
+
623
+ # ifndef YYFPRINTF
624
+ # include <stdio.h> /* INFRINGES ON USER NAME SPACE */
625
+ # define YYFPRINTF fprintf
626
+ # endif
627
+
628
+ # define YYDPRINTF(Args) \
629
+ do { \
630
+ if (yydebug) \
631
+ YYFPRINTF Args; \
632
+ } while (0)
633
+
634
+ # define YY_SYMBOL_PRINT(Title, Type, Value, Location) \
635
+ do { \
636
+ if (yydebug) \
637
+ { \
638
+ YYFPRINTF (stderr, "%s ", Title); \
639
+ yysymprint (stderr, \
640
+ Type, Value); \
641
+ YYFPRINTF (stderr, "\n"); \
642
+ } \
643
+ } while (0)
644
+
645
+ /*------------------------------------------------------------------.
646
+ | yy_stack_print -- Print the state stack from its BOTTOM up to its |
647
+ | TOP (included). |
648
+ `------------------------------------------------------------------*/
649
+
650
+ #if defined (__STDC__) || defined (__cplusplus)
651
+ static void
652
+ yy_stack_print (short int *bottom, short int *top)
653
+ #else
654
+ static void
655
+ yy_stack_print (bottom, top)
656
+ short int *bottom;
657
+ short int *top;
658
+ #endif
659
+ {
660
+ YYFPRINTF (stderr, "Stack now");
661
+ for (/* Nothing. */; bottom <= top; ++bottom)
662
+ YYFPRINTF (stderr, " %d", *bottom);
663
+ YYFPRINTF (stderr, "\n");
664
+ }
665
+
666
+ # define YY_STACK_PRINT(Bottom, Top) \
667
+ do { \
668
+ if (yydebug) \
669
+ yy_stack_print ((Bottom), (Top)); \
670
+ } while (0)
671
+
672
+
673
+ /*------------------------------------------------.
674
+ | Report that the YYRULE is going to be reduced. |
675
+ `------------------------------------------------*/
676
+
677
+ #if defined (__STDC__) || defined (__cplusplus)
678
+ static void
679
+ yy_reduce_print (int yyrule)
680
+ #else
681
+ static void
682
+ yy_reduce_print (yyrule)
683
+ int yyrule;
684
+ #endif
685
+ {
686
+ int yyi;
687
+ unsigned int yylno = yyrline[yyrule];
688
+ YYFPRINTF (stderr, "Reducing stack by rule %d (line %u), ",
689
+ yyrule - 1, yylno);
690
+ /* Print the symbols being reduced, and their result. */
691
+ for (yyi = yyprhs[yyrule]; 0 <= yyrhs[yyi]; yyi++)
692
+ YYFPRINTF (stderr, "%s ", yytname [yyrhs[yyi]]);
693
+ YYFPRINTF (stderr, "-> %s\n", yytname [yyr1[yyrule]]);
694
+ }
695
+
696
+ # define YY_REDUCE_PRINT(Rule) \
697
+ do { \
698
+ if (yydebug) \
699
+ yy_reduce_print (Rule); \
700
+ } while (0)
701
+
702
+ /* Nonzero means print parse trace. It is left uninitialized so that
703
+ multiple parsers can coexist. */
704
+ int yydebug;
705
+ #else /* !YYDEBUG */
706
+ # define YYDPRINTF(Args)
707
+ # define YY_SYMBOL_PRINT(Title, Type, Value, Location)
708
+ # define YY_STACK_PRINT(Bottom, Top)
709
+ # define YY_REDUCE_PRINT(Rule)
710
+ #endif /* !YYDEBUG */
711
+
712
+
713
+ /* YYINITDEPTH -- initial size of the parser's stacks. */
714
+ #ifndef YYINITDEPTH
715
+ # define YYINITDEPTH 200
716
+ #endif
717
+
718
+ /* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
719
+ if the built-in stack extension method is used).
720
+
721
+ Do not make this value too large; the results are undefined if
722
+ SIZE_MAX < YYSTACK_BYTES (YYMAXDEPTH)
723
+ evaluated with infinite-precision integer arithmetic. */
724
+
725
+ #ifndef YYMAXDEPTH
726
+ # define YYMAXDEPTH 10000
727
+ #endif
728
+
729
+
730
+
731
+ #if YYERROR_VERBOSE
732
+
733
+ # ifndef yystrlen
734
+ # if defined (__GLIBC__) && defined (_STRING_H)
735
+ # define yystrlen strlen
736
+ # else
737
+ /* Return the length of YYSTR. */
738
+ static YYSIZE_T
739
+ # if defined (__STDC__) || defined (__cplusplus)
740
+ yystrlen (const char *yystr)
741
+ # else
742
+ yystrlen (yystr)
743
+ const char *yystr;
744
+ # endif
745
+ {
746
+ register const char *yys = yystr;
747
+
748
+ while (*yys++ != '\0')
749
+ continue;
750
+
751
+ return yys - yystr - 1;
752
+ }
753
+ # endif
754
+ # endif
755
+
756
+ # ifndef yystpcpy
757
+ # if defined (__GLIBC__) && defined (_STRING_H) && defined (_GNU_SOURCE)
758
+ # define yystpcpy stpcpy
759
+ # else
760
+ /* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
761
+ YYDEST. */
762
+ static char *
763
+ # if defined (__STDC__) || defined (__cplusplus)
764
+ yystpcpy (char *yydest, const char *yysrc)
765
+ # else
766
+ yystpcpy (yydest, yysrc)
767
+ char *yydest;
768
+ const char *yysrc;
769
+ # endif
770
+ {
771
+ register char *yyd = yydest;
772
+ register const char *yys = yysrc;
773
+
774
+ while ((*yyd++ = *yys++) != '\0')
775
+ continue;
776
+
777
+ return yyd - 1;
778
+ }
779
+ # endif
780
+ # endif
781
+
782
+ #endif /* !YYERROR_VERBOSE */
783
+
784
+
785
+
786
+ #if YYDEBUG
787
+ /*--------------------------------.
788
+ | Print this symbol on YYOUTPUT. |
789
+ `--------------------------------*/
790
+
791
+ #if defined (__STDC__) || defined (__cplusplus)
792
+ static void
793
+ yysymprint (FILE *yyoutput, int yytype, YYSTYPE *yyvaluep)
794
+ #else
795
+ static void
796
+ yysymprint (yyoutput, yytype, yyvaluep)
797
+ FILE *yyoutput;
798
+ int yytype;
799
+ YYSTYPE *yyvaluep;
800
+ #endif
801
+ {
802
+ /* Pacify ``unused variable'' warnings. */
803
+ (void) yyvaluep;
804
+
805
+ if (yytype < YYNTOKENS)
806
+ YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
807
+ else
808
+ YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
809
+
810
+
811
+ # ifdef YYPRINT
812
+ if (yytype < YYNTOKENS)
813
+ YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
814
+ # endif
815
+ switch (yytype)
816
+ {
817
+ default:
818
+ break;
819
+ }
820
+ YYFPRINTF (yyoutput, ")");
821
+ }
822
+
823
+ #endif /* ! YYDEBUG */
824
+ /*-----------------------------------------------.
825
+ | Release the memory associated to this symbol. |
826
+ `-----------------------------------------------*/
827
+
828
+ #if defined (__STDC__) || defined (__cplusplus)
829
+ static void
830
+ yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
831
+ #else
832
+ static void
833
+ yydestruct (yymsg, yytype, yyvaluep)
834
+ const char *yymsg;
835
+ int yytype;
836
+ YYSTYPE *yyvaluep;
837
+ #endif
838
+ {
839
+ /* Pacify ``unused variable'' warnings. */
840
+ (void) yyvaluep;
841
+
842
+ if (!yymsg)
843
+ yymsg = "Deleting";
844
+ YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
845
+
846
+ switch (yytype)
847
+ {
848
+
849
+ default:
850
+ break;
851
+ }
852
+ }
853
+
854
+
855
+ /* Prevent warnings from -Wmissing-prototypes. */
856
+
857
+ #ifdef YYPARSE_PARAM
858
+ # if defined (__STDC__) || defined (__cplusplus)
859
+ int yyparse (void *YYPARSE_PARAM);
860
+ # else
861
+ int yyparse ();
862
+ # endif
863
+ #else /* ! YYPARSE_PARAM */
864
+ #if defined (__STDC__) || defined (__cplusplus)
865
+ int yyparse (QParser *qp);
866
+ #else
867
+ int yyparse ();
868
+ #endif
869
+ #endif /* ! YYPARSE_PARAM */
870
+
871
+
872
+
873
+
874
+
875
+
876
+ /*----------.
877
+ | yyparse. |
878
+ `----------*/
879
+
880
+ #ifdef YYPARSE_PARAM
881
+ # if defined (__STDC__) || defined (__cplusplus)
882
+ int yyparse (void *YYPARSE_PARAM)
883
+ # else
884
+ int yyparse (YYPARSE_PARAM)
885
+ void *YYPARSE_PARAM;
886
+ # endif
887
+ #else /* ! YYPARSE_PARAM */
888
+ #if defined (__STDC__) || defined (__cplusplus)
889
+ int
890
+ yyparse (QParser *qp)
891
+ #else
892
+ int
893
+ yyparse (qp)
894
+ QParser *qp;
895
+ #endif
896
+ #endif
897
+ {
898
+ /* The look-ahead symbol. */
899
+ int yychar;
900
+
901
+ /* The semantic value of the look-ahead symbol. */
902
+ YYSTYPE yylval;
903
+
904
+ /* Number of syntax errors so far. */
905
+ int yynerrs;
906
+
907
+ register int yystate;
908
+ register int yyn;
909
+ int yyresult;
910
+ /* Number of tokens to shift before error messages enabled. */
911
+ int yyerrstatus;
912
+ /* Look-ahead token as an internal (translated) token number. */
913
+ int yytoken = 0;
914
+
915
+ /* Three stacks and their tools:
916
+ `yyss': related to states,
917
+ `yyvs': related to semantic values,
918
+ `yyls': related to locations.
919
+
920
+ Refer to the stacks thru separate pointers, to allow yyoverflow
921
+ to reallocate them elsewhere. */
922
+
923
+ /* The state stack. */
924
+ short int yyssa[YYINITDEPTH];
925
+ short int *yyss = yyssa;
926
+ register short int *yyssp;
927
+
928
+ /* The semantic value stack. */
929
+ YYSTYPE yyvsa[YYINITDEPTH];
930
+ YYSTYPE *yyvs = yyvsa;
931
+ register YYSTYPE *yyvsp;
932
+
933
+
934
+
935
+ #define YYPOPSTACK (yyvsp--, yyssp--)
936
+
937
+ YYSIZE_T yystacksize = YYINITDEPTH;
938
+
939
+ /* The variables used to return semantic value and location from the
940
+ action routines. */
941
+ YYSTYPE yyval;
942
+
943
+
944
+ /* When reducing, the number of symbols on the RHS of the reduced
945
+ rule. */
946
+ int yylen;
947
+
948
+ YYDPRINTF ((stderr, "Starting parse\n"));
949
+
950
+ yystate = 0;
951
+ yyerrstatus = 0;
952
+ yynerrs = 0;
953
+ yychar = YYEMPTY; /* Cause a token to be read. */
954
+
955
+ /* Initialize stack pointers.
956
+ Waste one element of value and location stack
957
+ so that they stay on the same level as the state stack.
958
+ The wasted elements are never initialized. */
959
+
960
+ yyssp = yyss;
961
+ yyvsp = yyvs;
962
+
963
+
964
+ yyvsp[0] = yylval;
965
+
966
+ goto yysetstate;
967
+
968
+ /*------------------------------------------------------------.
969
+ | yynewstate -- Push a new state, which is found in yystate. |
970
+ `------------------------------------------------------------*/
971
+ yynewstate:
972
+ /* In all cases, when you get here, the value and location stacks
973
+ have just been pushed. so pushing a state here evens the stacks.
974
+ */
975
+ yyssp++;
976
+
977
+ yysetstate:
978
+ *yyssp = yystate;
979
+
980
+ if (yyss + yystacksize - 1 <= yyssp)
981
+ {
982
+ /* Get the current used size of the three stacks, in elements. */
983
+ YYSIZE_T yysize = yyssp - yyss + 1;
984
+
985
+ #ifdef yyoverflow
986
+ {
987
+ /* Give user a chance to reallocate the stack. Use copies of
988
+ these so that the &'s don't force the real ones into
989
+ memory. */
990
+ YYSTYPE *yyvs1 = yyvs;
991
+ short int *yyss1 = yyss;
992
+
993
+
994
+ /* Each stack pointer address is followed by the size of the
995
+ data in use in that stack, in bytes. This used to be a
996
+ conditional around just the two extra args, but that might
997
+ be undefined if yyoverflow is a macro. */
998
+ yyoverflow ("parser stack overflow",
999
+ &yyss1, yysize * sizeof (*yyssp),
1000
+ &yyvs1, yysize * sizeof (*yyvsp),
1001
+
1002
+ &yystacksize);
1003
+
1004
+ yyss = yyss1;
1005
+ yyvs = yyvs1;
1006
+ }
1007
+ #else /* no yyoverflow */
1008
+ # ifndef YYSTACK_RELOCATE
1009
+ goto yyoverflowlab;
1010
+ # else
1011
+ /* Extend the stack our own way. */
1012
+ if (YYMAXDEPTH <= yystacksize)
1013
+ goto yyoverflowlab;
1014
+ yystacksize *= 2;
1015
+ if (YYMAXDEPTH < yystacksize)
1016
+ yystacksize = YYMAXDEPTH;
1017
+
1018
+ {
1019
+ short int *yyss1 = yyss;
1020
+ union yyalloc *yyptr =
1021
+ (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
1022
+ if (! yyptr)
1023
+ goto yyoverflowlab;
1024
+ YYSTACK_RELOCATE (yyss);
1025
+ YYSTACK_RELOCATE (yyvs);
1026
+
1027
+ # undef YYSTACK_RELOCATE
1028
+ if (yyss1 != yyssa)
1029
+ YYSTACK_FREE (yyss1);
1030
+ }
1031
+ # endif
1032
+ #endif /* no yyoverflow */
1033
+
1034
+ yyssp = yyss + yysize - 1;
1035
+ yyvsp = yyvs + yysize - 1;
1036
+
1037
+
1038
+ YYDPRINTF ((stderr, "Stack size increased to %lu\n",
1039
+ (unsigned long int) yystacksize));
1040
+
1041
+ if (yyss + yystacksize - 1 <= yyssp)
1042
+ YYABORT;
1043
+ }
1044
+
1045
+ YYDPRINTF ((stderr, "Entering state %d\n", yystate));
1046
+
1047
+ goto yybackup;
1048
+
1049
+ /*-----------.
1050
+ | yybackup. |
1051
+ `-----------*/
1052
+ yybackup:
1053
+
1054
+ /* Do appropriate processing given the current state. */
1055
+ /* Read a look-ahead token if we need one and don't already have one. */
1056
+ /* yyresume: */
1057
+
1058
+ /* First try to decide what to do without reference to look-ahead token. */
1059
+
1060
+ yyn = yypact[yystate];
1061
+ if (yyn == YYPACT_NINF)
1062
+ goto yydefault;
1063
+
1064
+ /* Not known => get a look-ahead token if don't already have one. */
1065
+
1066
+ /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol. */
1067
+ if (yychar == YYEMPTY)
1068
+ {
1069
+ YYDPRINTF ((stderr, "Reading a token: "));
1070
+ yychar = YYLEX;
1071
+ }
1072
+
1073
+ if (yychar <= YYEOF)
1074
+ {
1075
+ yychar = yytoken = YYEOF;
1076
+ YYDPRINTF ((stderr, "Now at end of input.\n"));
1077
+ }
1078
+ else
1079
+ {
1080
+ yytoken = YYTRANSLATE (yychar);
1081
+ YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
1082
+ }
1083
+
1084
+ /* If the proper action on seeing token YYTOKEN is to reduce or to
1085
+ detect an error, take that action. */
1086
+ yyn += yytoken;
1087
+ if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
1088
+ goto yydefault;
1089
+ yyn = yytable[yyn];
1090
+ if (yyn <= 0)
1091
+ {
1092
+ if (yyn == 0 || yyn == YYTABLE_NINF)
1093
+ goto yyerrlab;
1094
+ yyn = -yyn;
1095
+ goto yyreduce;
1096
+ }
1097
+
1098
+ if (yyn == YYFINAL)
1099
+ YYACCEPT;
1100
+
1101
+ /* Shift the look-ahead token. */
1102
+ YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
1103
+
1104
+ /* Discard the token being shifted unless it is eof. */
1105
+ if (yychar != YYEOF)
1106
+ yychar = YYEMPTY;
1107
+
1108
+ *++yyvsp = yylval;
1109
+
1110
+
1111
+ /* Count tokens shifted since error; after three, turn off error
1112
+ status. */
1113
+ if (yyerrstatus)
1114
+ yyerrstatus--;
1115
+
1116
+ yystate = yyn;
1117
+ goto yynewstate;
1118
+
1119
+
1120
+ /*-----------------------------------------------------------.
1121
+ | yydefault -- do the default action for the current state. |
1122
+ `-----------------------------------------------------------*/
1123
+ yydefault:
1124
+ yyn = yydefact[yystate];
1125
+ if (yyn == 0)
1126
+ goto yyerrlab;
1127
+ goto yyreduce;
1128
+
1129
+
1130
+ /*-----------------------------.
1131
+ | yyreduce -- Do a reduction. |
1132
+ `-----------------------------*/
1133
+ yyreduce:
1134
+ /* yyn is the number of a rule to reduce with. */
1135
+ yylen = yyr2[yyn];
1136
+
1137
+ /* If YYLEN is nonzero, implement the default value of the action:
1138
+ `$$ = $1'.
1139
+
1140
+ Otherwise, the following line sets YYVAL to garbage.
1141
+ This behavior is undocumented and Bison
1142
+ users should not rely upon it. Assigning to YYVAL
1143
+ unconditionally makes the parser a bit smaller, and it avoids a
1144
+ GCC warning that YYVAL may be used uninitialized. */
1145
+ yyval = yyvsp[1-yylen];
1146
+
1147
+
1148
+ YY_REDUCE_PRINT (yyn);
1149
+ switch (yyn)
1150
+ {
1151
+ case 2:
1152
+ #line 86 "src/query_parser/q_parser.y"
1153
+ { qp->result = (yyval.query) = NULL; }
1154
+ break;
1155
+
1156
+ case 3:
1157
+ #line 87 "src/query_parser/q_parser.y"
1158
+ { qp->result = (yyval.query) = get_bool_q((yyvsp[0].array)); }
1159
+ break;
1160
+
1161
+ case 4:
1162
+ #line 89 "src/query_parser/q_parser.y"
1163
+ { (yyval.array) = first_cls((yyvsp[0].bcls)); }
1164
+ break;
1165
+
1166
+ case 5:
1167
+ #line 90 "src/query_parser/q_parser.y"
1168
+ { (yyval.array) = add_and_cls((yyvsp[-2].array), (yyvsp[0].bcls)); }
1169
+ break;
1170
+
1171
+ case 6:
1172
+ #line 91 "src/query_parser/q_parser.y"
1173
+ { (yyval.array) = add_or_cls((yyvsp[-2].array), (yyvsp[0].bcls)); }
1174
+ break;
1175
+
1176
+ case 7:
1177
+ #line 92 "src/query_parser/q_parser.y"
1178
+ { (yyval.array) = add_default_cls(qp, (yyvsp[-1].array), (yyvsp[0].bcls)); }
1179
+ break;
1180
+
1181
+ case 8:
1182
+ #line 94 "src/query_parser/q_parser.y"
1183
+ { (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST); }
1184
+ break;
1185
+
1186
+ case 9:
1187
+ #line 95 "src/query_parser/q_parser.y"
1188
+ { (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST_NOT); }
1189
+ break;
1190
+
1191
+ case 10:
1192
+ #line 96 "src/query_parser/q_parser.y"
1193
+ { (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_SHOULD); }
1194
+ break;
1195
+
1196
+ case 12:
1197
+ #line 99 "src/query_parser/q_parser.y"
1198
+ { if ((yyvsp[-2].query)) sscanf((yyvsp[0].str),"%f",&((yyvsp[-2].query)->boost)); (yyval.query)=(yyvsp[-2].query); }
1199
+ break;
1200
+
1201
+ case 14:
1202
+ #line 102 "src/query_parser/q_parser.y"
1203
+ { (yyval.query) = get_bool_q((yyvsp[-1].array)); }
1204
+ break;
1205
+
1206
+ case 19:
1207
+ #line 108 "src/query_parser/q_parser.y"
1208
+ { FLDS((yyval.query), get_term_q(qp, field, (yyvsp[0].str))); }
1209
+ break;
1210
+
1211
+ case 20:
1212
+ #line 109 "src/query_parser/q_parser.y"
1213
+ { FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-2].str), (yyvsp[0].str))); }
1214
+ break;
1215
+
1216
+ case 21:
1217
+ #line 110 "src/query_parser/q_parser.y"
1218
+ { FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-1].str), NULL)); }
1219
+ break;
1220
+
1221
+ case 22:
1222
+ #line 112 "src/query_parser/q_parser.y"
1223
+ { FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[0].str))); }
1224
+ break;
1225
+
1226
+ case 23:
1227
+ #line 114 "src/query_parser/q_parser.y"
1228
+ { qp->fields = qp->def_fields; }
1229
+ break;
1230
+
1231
+ case 24:
1232
+ #line 115 "src/query_parser/q_parser.y"
1233
+ { (yyval.query) = (yyvsp[-1].query); }
1234
+ break;
1235
+
1236
+ case 25:
1237
+ #line 116 "src/query_parser/q_parser.y"
1238
+ { qp->fields = qp->all_fields; }
1239
+ break;
1240
+
1241
+ case 26:
1242
+ #line 116 "src/query_parser/q_parser.y"
1243
+ {qp->fields = qp->def_fields;}
1244
+ break;
1245
+
1246
+ case 27:
1247
+ #line 117 "src/query_parser/q_parser.y"
1248
+ { (yyval.query) = (yyvsp[-1].query); }
1249
+ break;
1250
+
1251
+ case 28:
1252
+ #line 119 "src/query_parser/q_parser.y"
1253
+ { (yyval.hashset) = first_field(qp, (yyvsp[0].str)); }
1254
+ break;
1255
+
1256
+ case 29:
1257
+ #line 120 "src/query_parser/q_parser.y"
1258
+ { (yyval.hashset) = add_field(qp, (yyvsp[0].str));}
1259
+ break;
1260
+
1261
+ case 30:
1262
+ #line 122 "src/query_parser/q_parser.y"
1263
+ { (yyval.query) = get_phrase_q(qp, (yyvsp[-1].phrase), NULL); }
1264
+ break;
1265
+
1266
+ case 31:
1267
+ #line 123 "src/query_parser/q_parser.y"
1268
+ { (yyval.query) = get_phrase_q(qp, (yyvsp[-3].phrase), (yyvsp[0].str)); }
1269
+ break;
1270
+
1271
+ case 32:
1272
+ #line 124 "src/query_parser/q_parser.y"
1273
+ { (yyval.query) = NULL; }
1274
+ break;
1275
+
1276
+ case 33:
1277
+ #line 125 "src/query_parser/q_parser.y"
1278
+ { (yyval.query) = NULL; }
1279
+ break;
1280
+
1281
+ case 34:
1282
+ #line 127 "src/query_parser/q_parser.y"
1283
+ { (yyval.phrase) = ph_first_word((yyvsp[0].str)); }
1284
+ break;
1285
+
1286
+ case 35:
1287
+ #line 128 "src/query_parser/q_parser.y"
1288
+ { (yyval.phrase) = ph_first_word(NULL); }
1289
+ break;
1290
+
1291
+ case 36:
1292
+ #line 129 "src/query_parser/q_parser.y"
1293
+ { (yyval.phrase) = ph_add_word((yyvsp[-1].phrase), (yyvsp[0].str)); }
1294
+ break;
1295
+
1296
+ case 37:
1297
+ #line 130 "src/query_parser/q_parser.y"
1298
+ { (yyval.phrase) = ph_add_word((yyvsp[-2].phrase), NULL); }
1299
+ break;
1300
+
1301
+ case 38:
1302
+ #line 131 "src/query_parser/q_parser.y"
1303
+ { (yyval.phrase) = ph_add_multi_word((yyvsp[-2].phrase), (yyvsp[0].str)); }
1304
+ break;
1305
+
1306
+ case 39:
1307
+ #line 133 "src/query_parser/q_parser.y"
1308
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, true)); }
1309
+ break;
1310
+
1311
+ case 40:
1312
+ #line 134 "src/query_parser/q_parser.y"
1313
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, false)); }
1314
+ break;
1315
+
1316
+ case 41:
1317
+ #line 135 "src/query_parser/q_parser.y"
1318
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, true)); }
1319
+ break;
1320
+
1321
+ case 42:
1322
+ #line 136 "src/query_parser/q_parser.y"
1323
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, false)); }
1324
+ break;
1325
+
1326
+ case 43:
1327
+ #line 137 "src/query_parser/q_parser.y"
1328
+ { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, false)); }
1329
+ break;
1330
+
1331
+ case 44:
1332
+ #line 138 "src/query_parser/q_parser.y"
1333
+ { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, true)); }
1334
+ break;
1335
+
1336
+ case 45:
1337
+ #line 139 "src/query_parser/q_parser.y"
1338
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,true, false)); }
1339
+ break;
1340
+
1341
+ case 46:
1342
+ #line 140 "src/query_parser/q_parser.y"
1343
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,false, false)); }
1344
+ break;
1345
+
1346
+ case 47:
1347
+ #line 141 "src/query_parser/q_parser.y"
1348
+ { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, false)); }
1349
+ break;
1350
+
1351
+ case 48:
1352
+ #line 142 "src/query_parser/q_parser.y"
1353
+ { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, true)); }
1354
+ break;
1355
+
1356
+ case 49:
1357
+ #line 143 "src/query_parser/q_parser.y"
1358
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,true, false)); }
1359
+ break;
1360
+
1361
+ case 50:
1362
+ #line 144 "src/query_parser/q_parser.y"
1363
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,false, false)); }
1364
+ break;
1365
+
1366
+
1367
+ }
1368
+
1369
+ /* Line 1037 of yacc.c. */
1370
+ #line 1371 "y.tab.c"
1371
+
1372
+ yyvsp -= yylen;
1373
+ yyssp -= yylen;
1374
+
1375
+
1376
+ YY_STACK_PRINT (yyss, yyssp);
1377
+
1378
+ *++yyvsp = yyval;
1379
+
1380
+
1381
+ /* Now `shift' the result of the reduction. Determine what state
1382
+ that goes to, based on the state we popped back to and the rule
1383
+ number reduced by. */
1384
+
1385
+ yyn = yyr1[yyn];
1386
+
1387
+ yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
1388
+ if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
1389
+ yystate = yytable[yystate];
1390
+ else
1391
+ yystate = yydefgoto[yyn - YYNTOKENS];
1392
+
1393
+ goto yynewstate;
1394
+
1395
+
1396
+ /*------------------------------------.
1397
+ | yyerrlab -- here on detecting error |
1398
+ `------------------------------------*/
1399
+ yyerrlab:
1400
+ /* If not already recovering from an error, report this error. */
1401
+ if (!yyerrstatus)
1402
+ {
1403
+ ++yynerrs;
1404
+ #if YYERROR_VERBOSE
1405
+ yyn = yypact[yystate];
1406
+
1407
+ if (YYPACT_NINF < yyn && yyn < YYLAST)
1408
+ {
1409
+ YYSIZE_T yysize = 0;
1410
+ int yytype = YYTRANSLATE (yychar);
1411
+ const char* yyprefix;
1412
+ char *yymsg;
1413
+ int yyx;
1414
+
1415
+ /* Start YYX at -YYN if negative to avoid negative indexes in
1416
+ YYCHECK. */
1417
+ int yyxbegin = yyn < 0 ? -yyn : 0;
1418
+
1419
+ /* Stay within bounds of both yycheck and yytname. */
1420
+ int yychecklim = YYLAST - yyn;
1421
+ int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
1422
+ int yycount = 0;
1423
+
1424
+ yyprefix = ", expecting ";
1425
+ for (yyx = yyxbegin; yyx < yyxend; ++yyx)
1426
+ if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
1427
+ {
1428
+ yysize += yystrlen (yyprefix) + yystrlen (yytname [yyx]);
1429
+ yycount += 1;
1430
+ if (yycount == 5)
1431
+ {
1432
+ yysize = 0;
1433
+ break;
1434
+ }
1435
+ }
1436
+ yysize += (sizeof ("syntax error, unexpected ")
1437
+ + yystrlen (yytname[yytype]));
1438
+ yymsg = (char *) YYSTACK_ALLOC (yysize);
1439
+ if (yymsg != 0)
1440
+ {
1441
+ char *yyp = yystpcpy (yymsg, "syntax error, unexpected ");
1442
+ yyp = yystpcpy (yyp, yytname[yytype]);
1443
+
1444
+ if (yycount < 5)
1445
+ {
1446
+ yyprefix = ", expecting ";
1447
+ for (yyx = yyxbegin; yyx < yyxend; ++yyx)
1448
+ if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
1449
+ {
1450
+ yyp = yystpcpy (yyp, yyprefix);
1451
+ yyp = yystpcpy (yyp, yytname[yyx]);
1452
+ yyprefix = " or ";
1453
+ }
1454
+ }
1455
+ yyerror (qp, yymsg);
1456
+ YYSTACK_FREE (yymsg);
1457
+ }
1458
+ else
1459
+ yyerror (qp, "syntax error; also virtual memory exhausted");
1460
+ }
1461
+ else
1462
+ #endif /* YYERROR_VERBOSE */
1463
+ yyerror (qp, "syntax error");
1464
+ }
1465
+
1466
+
1467
+
1468
+ if (yyerrstatus == 3)
1469
+ {
1470
+ /* If just tried and failed to reuse look-ahead token after an
1471
+ error, discard it. */
1472
+
1473
+ if (yychar <= YYEOF)
1474
+ {
1475
+ /* If at end of input, pop the error token,
1476
+ then the rest of the stack, then return failure. */
1477
+ if (yychar == YYEOF)
1478
+ for (;;)
1479
+ {
1480
+
1481
+ YYPOPSTACK;
1482
+ if (yyssp == yyss)
1483
+ YYABORT;
1484
+ yydestruct ("Error: popping",
1485
+ yystos[*yyssp], yyvsp);
1486
+ }
1487
+ }
1488
+ else
1489
+ {
1490
+ yydestruct ("Error: discarding", yytoken, &yylval);
1491
+ yychar = YYEMPTY;
1492
+ }
1493
+ }
1494
+
1495
+ /* Else will try to reuse look-ahead token after shifting the error
1496
+ token. */
1497
+ goto yyerrlab1;
1498
+
1499
+
1500
+ /*---------------------------------------------------.
1501
+ | yyerrorlab -- error raised explicitly by YYERROR. |
1502
+ `---------------------------------------------------*/
1503
+ yyerrorlab:
1504
+
1505
+ #ifdef __GNUC__
1506
+ /* Pacify GCC when the user code never invokes YYERROR and the label
1507
+ yyerrorlab therefore never appears in user code. */
1508
+ if (0)
1509
+ goto yyerrorlab;
1510
+ #endif
1511
+
1512
+ yyvsp -= yylen;
1513
+ yyssp -= yylen;
1514
+ yystate = *yyssp;
1515
+ goto yyerrlab1;
1516
+
1517
+
1518
+ /*-------------------------------------------------------------.
1519
+ | yyerrlab1 -- common code for both syntax error and YYERROR. |
1520
+ `-------------------------------------------------------------*/
1521
+ yyerrlab1:
1522
+ yyerrstatus = 3; /* Each real token shifted decrements this. */
1523
+
1524
+ for (;;)
1525
+ {
1526
+ yyn = yypact[yystate];
1527
+ if (yyn != YYPACT_NINF)
1528
+ {
1529
+ yyn += YYTERROR;
1530
+ if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
1531
+ {
1532
+ yyn = yytable[yyn];
1533
+ if (0 < yyn)
1534
+ break;
1535
+ }
1536
+ }
1537
+
1538
+ /* Pop the current state because it cannot handle the error token. */
1539
+ if (yyssp == yyss)
1540
+ YYABORT;
1541
+
1542
+
1543
+ yydestruct ("Error: popping", yystos[yystate], yyvsp);
1544
+ YYPOPSTACK;
1545
+ yystate = *yyssp;
1546
+ YY_STACK_PRINT (yyss, yyssp);
1547
+ }
1548
+
1549
+ if (yyn == YYFINAL)
1550
+ YYACCEPT;
1551
+
1552
+ *++yyvsp = yylval;
1553
+
1554
+
1555
+ /* Shift the error token. */
1556
+ YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
1557
+
1558
+ yystate = yyn;
1559
+ goto yynewstate;
1560
+
1561
+
1562
+ /*-------------------------------------.
1563
+ | yyacceptlab -- YYACCEPT comes here. |
1564
+ `-------------------------------------*/
1565
+ yyacceptlab:
1566
+ yyresult = 0;
1567
+ goto yyreturn;
1568
+
1569
+ /*-----------------------------------.
1570
+ | yyabortlab -- YYABORT comes here. |
1571
+ `-----------------------------------*/
1572
+ yyabortlab:
1573
+ yydestruct ("Error: discarding lookahead",
1574
+ yytoken, &yylval);
1575
+ yychar = YYEMPTY;
1576
+ yyresult = 1;
1577
+ goto yyreturn;
1578
+
1579
+ #ifndef yyoverflow
1580
+ /*----------------------------------------------.
1581
+ | yyoverflowlab -- parser overflow comes here. |
1582
+ `----------------------------------------------*/
1583
+ yyoverflowlab:
1584
+ yyerror (qp, "parser stack overflow");
1585
+ yyresult = 2;
1586
+ /* Fall through. */
1587
+ #endif
1588
+
1589
+ yyreturn:
1590
+ #ifndef yyoverflow
1591
+ if (yyss != yyssa)
1592
+ YYSTACK_FREE (yyss);
1593
+ #endif
1594
+ return yyresult;
1595
+ }
1596
+
1597
+
1598
+ #line 146 "src/query_parser/q_parser.y"
1599
+
1600
+
1601
+ const char *special_char = "&:()[]{}!+\"~^-|<>=*?";
1602
+ const char *not_word = " \t&:()[]{}!+\"~^-|<>=";
1603
+
1604
+ int get_word(YYSTYPE *lvalp, QParser *qp)
1605
+ {
1606
+ bool is_wild = false;
1607
+ int len;
1608
+ char c;
1609
+ char *buf = qp->buf[qp->buf_index];
1610
+ char *bufp = buf;
1611
+ qp->buf_index = (qp->buf_index + 1) % CONC_WORDS;
1612
+
1613
+ qp->qstrp--; /* need to back up one character */
1614
+
1615
+ while (!strchr(not_word, (c=*qp->qstrp++))) {
1616
+ switch (c) {
1617
+ case '\\':
1618
+ if ((c=*qp->qstrp) == ' ' && c != '\t' && c != '\0') {
1619
+ *bufp++ = '\\';
1620
+ } else {
1621
+ *bufp++ = c;
1622
+ qp->qstrp++;
1623
+ }
1624
+ break;
1625
+ case '*': case '?':
1626
+ is_wild = true;
1627
+ default:
1628
+ *bufp++ = c;
1629
+ }
1630
+ }
1631
+ qp->qstrp--;
1632
+ /* check for keywords. There are only four so we have a bit of a hack which
1633
+ * just checks for all of them. */
1634
+ *bufp = '\0';
1635
+ len = bufp - buf;
1636
+ if (len == 3) {
1637
+ if (buf[0] == 'A' && buf[1] == 'N' && buf[2] == 'D') return AND;
1638
+ if (buf[0] == 'N' && buf[1] == 'O' && buf[2] == 'T') return NOT;
1639
+ if (buf[0] == 'R' && buf[1] == 'E' && buf[2] == 'Q') return REQ;
1640
+ }
1641
+ if (len == 2 && buf[0] == 'O' && buf[1] == 'R') return OR;
1642
+
1643
+ /* found a word so return it. */
1644
+ lvalp->str = buf;
1645
+ if (is_wild) return WILD_STR;
1646
+ return WORD;
1647
+ }
1648
+
1649
+ int yylex(YYSTYPE *lvalp, QParser *qp)
1650
+ {
1651
+ char c, nc;
1652
+
1653
+ while ((c=*qp->qstrp++) == ' ' || c == '\t')
1654
+ ;
1655
+ if (c == '\0')
1656
+ return 0;
1657
+
1658
+ if (strchr(special_char, c)) { /* comment */
1659
+ nc = *qp->qstrp;
1660
+ switch (c) {
1661
+ case '-': case '!': return NOT;
1662
+ case '+': return REQ;
1663
+ case '*':
1664
+ if (nc == ':') return c;
1665
+ break;
1666
+ case '&':
1667
+ if (nc == '&') {
1668
+ qp->qstrp++;
1669
+ return AND;
1670
+ }
1671
+ break; /* Don't return single & character. Use in word. */
1672
+ case '|':
1673
+ if (nc == '|') {
1674
+ qp->qstrp++;
1675
+ return OR;
1676
+ }
1677
+ default:
1678
+ return c;
1679
+ }
1680
+ }
1681
+
1682
+ return get_word(lvalp, qp);
1683
+ }
1684
+
1685
+ int yyerror(QParser *qp, char const *msg)
1686
+ {
1687
+ if (!qp->handle_parse_errors) {
1688
+ if (qp->clean_str) free(qp->qstr);
1689
+ eprintf(PARSE_ERROR, msg);
1690
+ }
1691
+ return 0;
1692
+ }
1693
+
1694
+
1695
+ Query *get_bool_q(Array *bclauses)
1696
+ {
1697
+ Query *q;
1698
+ BooleanQuery *bq;
1699
+ BooleanClause *bc;
1700
+
1701
+ if (bclauses->size == 0) {
1702
+ ary_destroy(bclauses);
1703
+ q = NULL;
1704
+ } else if (bclauses->size == 1) {
1705
+ bc = (BooleanClause *)bclauses->elems[0];
1706
+ q = bc->query;
1707
+ free(bc);
1708
+ ary_destroy(bclauses);
1709
+ } else {
1710
+ q = bq_create(false);
1711
+ /* copy clauses into query */
1712
+ bq = (BooleanQuery *)q->data;
1713
+ bq->clause_cnt = bclauses->size;
1714
+ bq->clause_capa = bclauses->allocated;
1715
+ free(bq->clauses);
1716
+ bq->clauses = (BooleanClause **)bclauses->elems;
1717
+ free(bclauses);
1718
+ }
1719
+ return q;
1720
+ }
1721
+
1722
+
1723
+ Array *first_cls(BooleanClause *cls)
1724
+ {
1725
+ Array *clauses = ary_create(0, NULL);
1726
+ if (cls) ary_append(clauses, cls);
1727
+ return clauses;
1728
+ }
1729
+
1730
+ Array *add_and_cls(Array *clauses, BooleanClause *clause)
1731
+ {
1732
+ if (clause) {
1733
+ BooleanClause *last_cl;
1734
+ if (clauses->size == 1) {
1735
+ last_cl = clauses->elems[0];
1736
+ if (!last_cl->is_prohibited) bc_set_occur(last_cl, BC_MUST);
1737
+ }
1738
+
1739
+ if (!clause->is_prohibited) bc_set_occur(clause, BC_MUST);
1740
+ ary_append(clauses, clause);
1741
+ }
1742
+ return clauses;
1743
+ }
1744
+
1745
+ Array *add_or_cls(Array *clauses, BooleanClause *clause)
1746
+ {
1747
+ if (clause) ary_append(clauses, clause);
1748
+ return clauses;
1749
+ }
1750
+
1751
+ Array *add_default_cls(QParser *qp, Array *clauses, BooleanClause *clause)
1752
+ {
1753
+ if (qp->or_default) {
1754
+ add_or_cls(clauses, clause);
1755
+ } else {
1756
+ add_and_cls(clauses, clause);
1757
+ }
1758
+ return clauses;
1759
+ }
1760
+
1761
+ BooleanClause *get_bool_cls(Query *q, unsigned int occur)
1762
+ {
1763
+ if (q) return bc_create(q, occur);
1764
+ else return NULL;
1765
+ }
1766
+
1767
+ Query *get_term_q(QParser *qp, char *field, char *word)
1768
+ {
1769
+ Query *q;
1770
+ Token *token;
1771
+ TokenStream *stream = a_get_ts(qp->analyzer, field, word);
1772
+
1773
+ if ((token = ts_next(stream)) == NULL) {
1774
+ q = NULL;
1775
+ } else {
1776
+ Term *term = term_create(field, token->text);
1777
+ if ((token = ts_next(stream)) == NULL) {
1778
+ q = tq_create(term);
1779
+ } else {
1780
+ q = phq_create();
1781
+ phq_add_term(q, term, 0);
1782
+ do {
1783
+ phq_add_term(q, term_create(field, token->text), token->pos_inc);
1784
+ } while ((token = ts_next(stream)) != NULL);
1785
+ }
1786
+ }
1787
+ return q;
1788
+ }
1789
+
1790
+ Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
1791
+ {
1792
+ Query *q;
1793
+ Token *token;
1794
+ TokenStream *stream = a_get_ts(qp->analyzer, field, word);
1795
+
1796
+ if ((token = ts_next(stream)) == NULL) {
1797
+ q = NULL;
1798
+ } else {
1799
+ /* it only makes sense to find one term in a fuzzy query */
1800
+ Term *term = term_create(field, token->text);
1801
+ if (slop_str) {
1802
+ float slop;
1803
+ sscanf(slop_str, "%f", &slop);
1804
+ q = fuzq_create_mp(term, slop, DEF_PRE_LEN);
1805
+ } else {
1806
+ q = fuzq_create(term);
1807
+ }
1808
+ }
1809
+ return q;
1810
+ }
1811
+
1812
+ Query *get_wild_q(QParser *qp, char *field, char *pattern)
1813
+ {
1814
+ if (qp->wild_lower) lower_str(pattern);
1815
+ return wcq_create(term_create(field, pattern));;
1816
+ }
1817
+
1818
+ HashSet *add_field(QParser *qp, char *field)
1819
+ {
1820
+ char *orig_field;
1821
+ if ((orig_field = hs_orig(qp->all_fields, field)) != NULL) {
1822
+ hs_add(qp->fields, orig_field);
1823
+ } else if (qp->allow_any_fields) {
1824
+ field = estrdup(field);
1825
+ hs_add(qp->all_fields, field);
1826
+ hs_add(qp->fields, field);
1827
+ }
1828
+ return qp->fields;
1829
+ }
1830
+
1831
+ HashSet *first_field(QParser *qp, char *field)
1832
+ {
1833
+ qp->fields = qp->fields_buf;
1834
+ qp->fields->size = 0;
1835
+ h_clear(qp->fields->ht);
1836
+ return add_field(qp, field);
1837
+ }
1838
+
1839
+ void ph_destroy(Phrase *self)
1840
+ {
1841
+ int i, j;
1842
+ for (i = 0; i < self->cnt; i++) {
1843
+ for (j = 0; j < self->w_cnt[i]; j++) {
1844
+ free(self->words[i][j]);
1845
+ }
1846
+ free(self->words[i]);
1847
+ }
1848
+ free(self->words);
1849
+ free(self->w_cnt);
1850
+ free(self->w_capa);
1851
+ free(self);
1852
+ }
1853
+
1854
+
1855
+ Phrase *ph_create()
1856
+ {
1857
+ Phrase *self = ALLOC(Phrase);
1858
+ self->cnt = 0;
1859
+ self->capa = PHRASE_INIT_CAPA;
1860
+ self->words = ALLOC_N(char **, PHRASE_INIT_CAPA);
1861
+ self->w_cnt = ALLOC_N(int, PHRASE_INIT_CAPA);
1862
+ self->w_capa = ALLOC_N(int, PHRASE_INIT_CAPA);
1863
+ return self;
1864
+ }
1865
+
1866
+ Phrase *ph_first_word(char *word)
1867
+ {
1868
+ Phrase *self = ph_create();
1869
+ if (word) { /* no point in adding NULL in start */
1870
+ self->words[0] = ALLOC(char *);
1871
+ self->words[0][0] = estrdup(word);
1872
+ self->w_cnt[0] = self->w_capa[0] = 1;
1873
+ self->cnt = 1;
1874
+ }
1875
+ return self;
1876
+ }
1877
+
1878
+ Phrase *ph_add_word(Phrase *self, char *word)
1879
+ {
1880
+ int i;
1881
+ if (self->cnt == self->capa) {
1882
+ self->capa <<= 1;
1883
+ REALLOC_N(self->words, char **, self->capa);
1884
+ REALLOC_N(self->w_cnt, int, self->capa);
1885
+ REALLOC_N(self->w_capa, int, self->capa);
1886
+ }
1887
+ i = self->cnt;
1888
+ self->cnt++;
1889
+ self->words[i] = ALLOC(char *);
1890
+ self->words[i][0] = word ? estrdup(word) : NULL;
1891
+ self->w_cnt[i] = self->w_capa[i] = 1;
1892
+ return self;
1893
+ }
1894
+
1895
+ Phrase *ph_add_multi_word(Phrase *self, char *word)
1896
+ {
1897
+ int i = self->cnt - 1;
1898
+
1899
+ if (!word) return self; /* no point in adding NULL in multi */
1900
+
1901
+ if (self->w_cnt[i] >= self->w_capa[i]) {
1902
+ self->w_capa[i] <<= 1;
1903
+ REALLOC_N(self->words[i], char *, self->w_capa[i]);
1904
+ }
1905
+ self->words[i][self->w_cnt[i]] = estrdup(word);
1906
+ self->w_cnt[i]++;
1907
+ return self;
1908
+ }
1909
+
1910
+ Query *get_normal_phrase_query(QParser *qp, char *field, Phrase *phrase, int slop)
1911
+ {
1912
+ int pos_inc = 0;
1913
+ int i;
1914
+ Token *token;
1915
+ TokenStream *stream;
1916
+ char *word;
1917
+
1918
+ Query *pq = phq_create();
1919
+ ((PhraseQuery *)pq->data)->slop = slop;
1920
+
1921
+ for (i = 0; i < phrase->cnt; i++) {
1922
+ word = phrase->words[i][0];
1923
+ if (!word) {
1924
+ pos_inc++;
1925
+ } else {
1926
+ stream = a_get_ts(qp->analyzer, field, word);
1927
+ while ((token = ts_next(stream))) {
1928
+ phq_add_term(pq, term_create(field, token->text),
1929
+ token->pos_inc + pos_inc);
1930
+ pos_inc = 0;
1931
+ }
1932
+ }
1933
+ }
1934
+ return pq;
1935
+ }
1936
+
1937
+ Query *get_multi_phrase_query(QParser *qp, char *field, Phrase *phrase, int slop)
1938
+ {
1939
+ int i, j;
1940
+ int pos_inc = 0;
1941
+ Token *token;
1942
+ TokenStream *stream;
1943
+ char *word;
1944
+ Term **terms = NULL;
1945
+ int t_cnt;
1946
+
1947
+ Query *mpq = mphq_create();
1948
+ ((MultiPhraseQuery *)mpq->data)->slop = slop;
1949
+
1950
+ for (i = 0; i < phrase->cnt; i++) {
1951
+ word = phrase->words[i][0];
1952
+ if (!word) {
1953
+ pos_inc++;
1954
+ } else {
1955
+ t_cnt = phrase->w_cnt[i];
1956
+ if (t_cnt > 1) {
1957
+ terms = ALLOC_N(Term *, t_cnt);
1958
+ for (j = 0; j < t_cnt; j++) {
1959
+ word = phrase->words[i][j];
1960
+ stream = a_get_ts(qp->analyzer, field, word);
1961
+ if ((token = ts_next(stream))) {
1962
+ terms[j] = term_create(field, token->text);
1963
+ } else {
1964
+ t_cnt--; j--;
1965
+ }
1966
+ }
1967
+ /* must advance at least one */
1968
+ mphq_add_terms(mpq, terms, t_cnt, pos_inc + 1);
1969
+ } else {
1970
+ stream = a_get_ts(qp->analyzer, field, word);
1971
+ while ((token = ts_next(stream))) {
1972
+ terms = ALLOC(Term *);
1973
+ terms[0] = term_create(field, token->text);
1974
+ mphq_add_terms(mpq, terms, 1, token->pos_inc + pos_inc);
1975
+ pos_inc = 0;
1976
+ }
1977
+ }
1978
+ }
1979
+ }
1980
+ return mpq;
1981
+ }
1982
+
1983
+ Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop_str)
1984
+ {
1985
+ Query *q;
1986
+ int i, j;
1987
+ int slop;
1988
+
1989
+ if (phrase->cnt == 0) {
1990
+ q = NULL;
1991
+ } else if (phrase->cnt == 1) {
1992
+ if (phrase->w_cnt[0] == 1) {
1993
+ FLDS(q, get_term_q(qp, field, phrase->words[0][0]));
1994
+ } else {
1995
+ Query *bq;
1996
+ q = bq_create(false);
1997
+ for (j = 0; j < phrase->w_cnt[0]; j++) {
1998
+ FLDS(bq, tq_create(term_create(field, phrase->words[0][j])));
1999
+ if (bq) bq_add_query(q, bq, BC_SHOULD);
2000
+ }
2001
+ }
2002
+ } else {
2003
+ bool multi_phrase = false;
2004
+ for (i = 0; i < phrase->cnt; i++) {
2005
+ if (phrase->w_cnt[i] > 1) multi_phrase = true;
2006
+ }
2007
+ slop = qp->def_slop;
2008
+ if (slop_str) sscanf(slop_str, "%d", &slop);
2009
+ if (multi_phrase) {
2010
+ FLDS(q, get_multi_phrase_query(qp, field, phrase, slop));
2011
+ } else {
2012
+ FLDS(q, get_normal_phrase_query(qp, field, phrase, slop));
2013
+ }
2014
+ }
2015
+ ph_destroy(phrase);
2016
+ return q;
2017
+ }
2018
+
2019
+ Query *get_range_q(char *field, char *from, char *to, bool inc_lower, bool inc_upper)
2020
+ {
2021
+ return rq_create(field, from, to, inc_lower, inc_upper);
2022
+ }
2023
+
2024
+ void qp_destroy(void *p)
2025
+ {
2026
+ QParser *self = (QParser *)p;
2027
+ if (self->close_def_fields) hs_destroy_all(self->def_fields);
2028
+ hs_destroy_all(self->all_fields);
2029
+ hs_destroy(self->fields_buf);
2030
+ free(self);
2031
+ }
2032
+
2033
+ QParser *qp_create(HashSet *all_fields, HashSet *def_fields, Analyzer *analyzer)
2034
+ {
2035
+ int i;
2036
+ QParser *self = ALLOC(QParser);
2037
+ self->or_default = true;
2038
+ self->wild_lower = true;
2039
+ self->clean_str = false;
2040
+ self->handle_parse_errors = false;
2041
+ self->allow_any_fields = false;
2042
+ self->def_slop = 0;
2043
+ self->fields_buf = hs_str_create(NULL);
2044
+ self->all_fields = all_fields;
2045
+ if (def_fields) {
2046
+ self->def_fields = def_fields;
2047
+ for (i = 0; i < self->def_fields->size; i++) {
2048
+ if (!hs_exists(self->all_fields, self->def_fields->elems[i])) {
2049
+ hs_add(self->all_fields, estrdup(self->def_fields->elems[i]));
2050
+ }
2051
+ }
2052
+ self->close_def_fields = true;
2053
+ } else {
2054
+ self->def_fields = all_fields;
2055
+ self->close_def_fields = false;
2056
+ }
2057
+ self->fields = self->def_fields;
2058
+ /* make sure all_fields contains the default fields */
2059
+ self->analyzer = analyzer;
2060
+ self->buf_index = 0;
2061
+ return self;
2062
+ }
2063
+
2064
+ /* these chars have meaning within phrases */
2065
+ static const char *PHRASE_CHARS = "<>|\"";
2066
+
2067
+ void str_insert(char *str, int len, char chr)
2068
+ {
2069
+ memmove(str+1, str, len*sizeof(char));
2070
+ *str = chr;
2071
+ }
2072
+
2073
+ char *qp_clean_str(char *str)
2074
+ {
2075
+ int b, pb = -1;
2076
+ int br_cnt = 0;
2077
+ bool quote_open = false;
2078
+ char *sp, *nsp;
2079
+
2080
+ /* leave a little extra */
2081
+ char *new_str = ALLOC_N(char, strlen(str)*2 + 1);
2082
+
2083
+ for (sp = str, nsp = new_str; *sp; sp++) {
2084
+ b = *sp;
2085
+ /* ignore escaped characters */
2086
+ if (pb == '\\') {
2087
+ if (quote_open && index(PHRASE_CHARS, b)) {
2088
+ *nsp++ = '\\'; /* this was left off the first time through */
2089
+ }
2090
+
2091
+ *nsp++ = b;
2092
+ /* \\ has escaped itself so has no power. Assign pb random char : */
2093
+ pb = ((b == '\\') ? ':' : b);
2094
+ continue;
2095
+ }
2096
+ switch (b) {
2097
+ case '\\':
2098
+ if (!quote_open) /* We do our own escaping below */
2099
+ *nsp++ = b;
2100
+ break;
2101
+ case '"':
2102
+ quote_open = !quote_open;
2103
+ *nsp++ = b;
2104
+ break;
2105
+ case '(':
2106
+ if (!quote_open) {
2107
+ br_cnt++;
2108
+ } else {
2109
+ *nsp++ = '\\';
2110
+ }
2111
+ *nsp++ = b;
2112
+ break;
2113
+ case ')':
2114
+ if (!quote_open) {
2115
+ if (br_cnt == 0) {
2116
+ str_insert(new_str, nsp - new_str, '(');
2117
+ nsp++;
2118
+ } else {
2119
+ br_cnt--;
2120
+ }
2121
+ } else {
2122
+ *nsp++ = '\\';
2123
+ }
2124
+ *nsp++ = b;
2125
+ break;
2126
+ case '>':
2127
+ if (quote_open) {
2128
+ if (pb == '<') {
2129
+ /* remove the escape character */
2130
+ nsp--;
2131
+ nsp[-1] = '<';
2132
+ } else {
2133
+ *nsp++ = '\\';
2134
+ }
2135
+ }
2136
+ *nsp++ = b;
2137
+ break;
2138
+ default:
2139
+ if (quote_open) {
2140
+ if (index(special_char, b) && b != '|') {
2141
+ *nsp++ = '\\';
2142
+ }
2143
+ }
2144
+ *nsp++ = b;
2145
+ }
2146
+ pb = b;
2147
+ }
2148
+ if (quote_open) *nsp++ = '"';
2149
+ for (;br_cnt > 0; br_cnt--) {
2150
+ *nsp++ = ')';
2151
+ }
2152
+ *nsp = '\0';
2153
+ return new_str;
2154
+ }
2155
+
2156
+ Query *qp_get_bad_query(QParser *qp, char *str)
2157
+ {
2158
+ Query *q;
2159
+ FLDS(q, get_term_q(qp, field, str));
2160
+ return q;
2161
+ }
2162
+
2163
+ Query *qp_parse(QParser *self, char *qstr)
2164
+ {
2165
+ if (self->clean_str) {
2166
+ self->qstrp = self->qstr = qp_clean_str(qstr);
2167
+ } else {
2168
+ self->qstrp = self->qstr = qstr;
2169
+ }
2170
+ self->fields = self->def_fields;
2171
+ self->result = NULL;
2172
+ yyparse(self);
2173
+ if (!self->result && self->handle_parse_errors)
2174
+ self->result = qp_get_bad_query(self, self->qstr);
2175
+ if (!self->result) self->result = bq_create(false);
2176
+ if (self->clean_str) free(self->qstr);
2177
+ return self->result;
2178
+ }
2179
+
2180
+