ferret 0.3.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
@@ -0,0 +1,343 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+
4
+ /***************************************************************************
5
+ *
6
+ * MultiPhraseWeight
7
+ *
8
+ ***************************************************************************/
9
+
10
+ char *mphw_to_s(Weight *self)
11
+ {
12
+ char dbuf[32];
13
+ dbl_to_s(dbuf, self->value);
14
+ return epstrdup("MultiPhraseWeight(%s)", strlen(dbuf), dbuf);
15
+ }
16
+
17
+ Scorer *mphw_scorer(Weight *self, IndexReader *ir)
18
+ {
19
+ Scorer *phsc;
20
+ MultiPhraseQuery *mphq = (MultiPhraseQuery *)self->query->data;
21
+ int i;
22
+ if (mphq->t_cnt == 0) return NULL; // optimize zero-term case
23
+
24
+ TermDocEnum **tps = ALLOC_N(TermDocEnum *, mphq->t_cnt);
25
+
26
+ for (i = 0; i < mphq->t_cnt; i++) {
27
+ if (mphq->pt_cnt[i] == 1) {
28
+ tps[i] = ir_term_positions_for(ir, mphq->terms[i][0]);
29
+ } else {
30
+ tps[i] = mtdpe_create(ir, mphq->terms[i], mphq->pt_cnt[i]);
31
+ }
32
+ if (tps[i] == NULL) {
33
+ // free everything we just created and return NULL
34
+ int j;
35
+ for (j = 0; j < i; j++) {
36
+ tps[i]->close(tps[i]);
37
+ }
38
+ free(tps);
39
+ return NULL;
40
+ }
41
+ }
42
+
43
+ if (mphq->slop == 0) { // optimize exact case
44
+ phsc = exact_phrase_scorer_create(self, tps, mphq->positions, mphq->t_cnt,
45
+ self->similarity,
46
+ ir->get_norms(ir, mphq->field));
47
+ } else {
48
+ phsc = sloppy_phrase_scorer_create(self, tps, mphq->positions, mphq->t_cnt,
49
+ self->similarity,
50
+ mphq->slop,
51
+ ir->get_norms(ir, mphq->field));
52
+ }
53
+ free(tps);
54
+ return phsc;
55
+ }
56
+
57
+ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
58
+ {
59
+ char *query_str = self->query->to_s(self->query, "");
60
+ MultiPhraseQuery *mphq = (MultiPhraseQuery *)self->query->data;
61
+ int i, j;
62
+ char *doc_freqs = NULL;
63
+ int len = 0, pos = 0;
64
+
65
+ Explanation *expl = expl_create(0.0,
66
+ epstrdup("weight(%s in %d), product of:",
67
+ strlen(query_str) + 20,
68
+ query_str, doc_num));
69
+
70
+ for (i = 0; i < mphq->t_cnt; i++) {
71
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
72
+ len += strlen(mphq->terms[i][j]->text) + 30;
73
+ }
74
+ }
75
+ doc_freqs = ALLOC_N(char, len);
76
+ for (i = 0; i < mphq->t_cnt; i++) {
77
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
78
+ Term *term = mphq->terms[i][j];
79
+ sprintf(doc_freqs + pos, "%s=%d, ", term->text, ir->doc_freq(ir, term));
80
+ pos += strlen(doc_freqs + pos);
81
+ }
82
+ }
83
+ pos -= 2; // remove ", " from the end
84
+ doc_freqs[pos] = 0;
85
+
86
+ Explanation *idf_expl1 = expl_create(self->idf,
87
+ epstrdup("idf(%s:<%s>)", strlen(mphq->field) + pos, mphq->field, doc_freqs));
88
+ Explanation *idf_expl2 = expl_create(self->idf,
89
+ epstrdup("idf(%s:<%s>)", strlen(mphq->field) + pos, mphq->field, doc_freqs));
90
+ free(doc_freqs);
91
+
92
+ // explain query weight
93
+ Explanation *query_expl = expl_create(0.0,
94
+ epstrdup("query_weight(%s), product of:", strlen(query_str), query_str));
95
+
96
+ if (self->query->boost != 1.0) {
97
+ expl_add_detail(query_expl, expl_create(self->query->boost, estrdup("boost")));
98
+ }
99
+ expl_add_detail(query_expl, idf_expl1);
100
+
101
+ Explanation *qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
102
+ expl_add_detail(query_expl, qnorm_expl);
103
+
104
+ query_expl->value = self->query->boost * self->idf * self->qnorm;
105
+
106
+ expl_add_detail(expl, query_expl);
107
+
108
+ // explain field weight
109
+ Explanation *field_expl = expl_create(0.0,
110
+ epstrdup("field_weight(%s in %d), product of:",
111
+ strlen(query_str) + 20, query_str, doc_num));
112
+ free(query_str);
113
+
114
+ Scorer *scorer = self->scorer(self, ir);
115
+ Explanation *tf_expl = scorer->explain(scorer, doc_num);
116
+ scorer->destroy(scorer);
117
+ expl_add_detail(field_expl, tf_expl);
118
+ expl_add_detail(field_expl, idf_expl2);
119
+
120
+ uchar *field_norms = ir->get_norms(ir, mphq->field);
121
+ float field_norm = (field_norms != NULL)
122
+ ? sim_decode_norm(self->similarity, field_norms[doc_num])
123
+ : 0.0;
124
+ Explanation *field_norm_expl = expl_create(field_norm,
125
+ epstrdup("field_norm(field=%s, doc=%d)",
126
+ strlen(mphq->field) + 20, mphq->field, doc_num));
127
+
128
+ expl_add_detail(field_expl, field_norm_expl);
129
+
130
+ field_expl->value = tf_expl->value * self->idf * field_norm;
131
+
132
+ // combine them
133
+ if (query_expl->value == 1.0) {
134
+ expl_destoy(expl);
135
+ return field_expl;
136
+ } else {
137
+ expl->value = (query_expl->value * field_expl->value);
138
+ expl_add_detail(expl, field_expl);
139
+ return expl;
140
+ }
141
+ }
142
+
143
+
144
+
145
+
146
+ Weight *mphw_create(Query *query, Searcher *searcher)
147
+ {
148
+ MultiPhraseQuery *mphq = (MultiPhraseQuery *)query->data;
149
+ Weight *self = ALLOC(Weight);
150
+ int i, j;
151
+ ZEROSET(self, Weight, 1);
152
+ self->get_query = &w_get_query;
153
+ self->get_value = &w_get_value;
154
+ self->normalize = &w_normalize;
155
+ self->scorer = &mphw_scorer;
156
+ self->explain = &mphw_explain;
157
+ self->to_s = &mphw_to_s;
158
+ self->destroy = &free;
159
+ self->sum_of_squared_weights = &w_sum_of_squared_weights;
160
+
161
+ self->similarity = query->get_similarity(query, searcher);
162
+ self->query = query;
163
+ self->value = query->boost;
164
+
165
+ self->idf = 0.0;
166
+ for (i = 0; i < mphq->t_cnt; i++) {
167
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
168
+ self->idf += sim_idf_term(self->similarity, mphq->terms[i][j], searcher);
169
+ }
170
+ }
171
+
172
+ return self;
173
+ }
174
+
175
+ /***************************************************************************
176
+ *
177
+ * MultiPhraseQuery
178
+ *
179
+ ***************************************************************************/
180
+
181
+ #define GET_MPHQ MultiPhraseQuery *mphq = (MultiPhraseQuery *)self->data
182
+
183
+ /**
184
+ * NOTE: terms must be allocated and it will be freed when the query is
185
+ * destroyed.
186
+ */
187
+ void mphq_add_terms(Query *self, Term **terms, int t_cnt, int pos_inc)
188
+ {
189
+ GET_MPHQ;
190
+ int position, index = mphq->t_cnt;
191
+ if (index >= mphq->t_capa) {
192
+ mphq->t_capa <<= 1;
193
+ REALLOC_N(mphq->terms, Term **, mphq->t_capa);
194
+ REALLOC_N(mphq->positions, int, mphq->t_capa);
195
+ REALLOC_N(mphq->pt_cnt, int, mphq->t_capa);
196
+ }
197
+ if (index == 0) {
198
+ position = 0;
199
+ mphq->field = terms[0]->field;
200
+ } else {
201
+ int i;
202
+ position = mphq->positions[index - 1] + pos_inc;
203
+ for (i = 0; i < t_cnt; i++) {
204
+ if (strcmp(terms[i]->field, mphq->field) != 0) {
205
+ eprintf(ARG_ERROR, "All phrase terms must be in the same field. Current phrase is %s, tried to add %s\n", mphq->field, terms[i]->field);
206
+ }
207
+ }
208
+ }
209
+
210
+ mphq->terms[index] = terms;
211
+ mphq->pt_cnt[index] = t_cnt;
212
+ mphq->positions[index] = position;
213
+ mphq->t_cnt++;
214
+ }
215
+
216
+ void mphq_destroy(void *p)
217
+ {
218
+ Query *self = (Query *)p;
219
+
220
+ GET_MPHQ;
221
+ int i, j;
222
+ if (self->destroy_all) {
223
+ for (i = 0; i < mphq->t_cnt; i++) {
224
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
225
+ term_destroy(mphq->terms[i][j]);
226
+ }
227
+ free(mphq->terms[i]);
228
+ }
229
+ }
230
+ free(mphq->terms);
231
+ free(mphq->positions);
232
+ free(mphq->pt_cnt);
233
+ free(mphq);
234
+
235
+ q_destroy(self);
236
+ }
237
+
238
+ void mphq_extract_terms(Query *self, Array *terms)
239
+ {
240
+ GET_MPHQ;
241
+ int i, j;
242
+ for (i = 0; i < mphq->t_cnt; i++) {
243
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
244
+ ary_append(terms, mphq->terms[i][j]);
245
+ }
246
+ }
247
+ }
248
+
249
+ char *mphq_to_s(Query *self, char *field)
250
+ {
251
+ GET_MPHQ;
252
+ int i, j, buf_index = 0, len = 0, pos, last_pos = -1;
253
+ char *buffer;
254
+ if (!mphq->t_cnt) return NULL;
255
+ len = strlen(mphq->field) + 1;
256
+ for (i = 0; i < mphq->t_cnt; i++) {
257
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
258
+ len += strlen(mphq->terms[i][j]->text) + 1;
259
+ }
260
+ }
261
+
262
+ // add space for extra characters and boost and slop
263
+ len += 100 + 3 * mphq->positions[mphq->t_cnt - 1];
264
+
265
+ buffer = ALLOC_N(char, len);
266
+
267
+ if (strcmp(field, mphq->field) != 0) {
268
+ len = strlen(mphq->field);
269
+ memcpy(buffer, mphq->field, len);
270
+ buffer[len] = ':';
271
+ buf_index += len + 1;
272
+ }
273
+ buffer[buf_index++] = '"';
274
+
275
+ for (i = 0; i < mphq->t_cnt; i++) {
276
+ pos = mphq->positions[i];
277
+ for (j = last_pos; j < pos - 1; j++) {
278
+ memcpy(buffer + buf_index, "<> ", 3);
279
+ buf_index += 3;
280
+ }
281
+ last_pos = pos;
282
+
283
+ for (j = 0; j < mphq->pt_cnt[i]; j++) {
284
+ Term *term = mphq->terms[i][j];
285
+ len = strlen(term->text);
286
+ memcpy(buffer + buf_index, term->text, len);
287
+ buf_index += len;
288
+ buffer[buf_index++] = '|';
289
+ }
290
+ buffer[buf_index-1] = ' '; /* change last '|' to ' ' */
291
+ }
292
+
293
+ if (buffer[buf_index-1] == ' ') buf_index--;
294
+ buffer[buf_index++] = '"';
295
+ buffer[buf_index] = 0;
296
+ if (mphq->slop != 0) {
297
+ sprintf(buffer + buf_index, "~%d", mphq->slop);
298
+ buf_index += strlen(buffer + buf_index);
299
+ }
300
+ if (self->boost != 1.0) {
301
+ buffer[buf_index] = '^';
302
+ dbl_to_s(buffer + buf_index + 1, self->boost);
303
+ }
304
+ return buffer;
305
+ }
306
+
307
+ Query *mphq_rewrite(Query *self, IndexReader *ir)
308
+ {
309
+ GET_MPHQ;
310
+ if (mphq->t_cnt == 1) { // optimize one-term case
311
+ Term **terms = mphq->terms[0];
312
+ Query *bq = bq_create(true);
313
+ int i;
314
+ for (i = 0; i < mphq->pt_cnt[0]; i++) {
315
+ bq_add_query(bq, tq_create(term_clone(terms[i])), BC_SHOULD);
316
+ }
317
+ bq->boost = self->boost;
318
+ if (self->rewritten) self->rewritten->destroy(self->rewritten);
319
+ return self->rewritten = bq;
320
+ } else {
321
+ return self;
322
+ }
323
+ }
324
+
325
+ Query *mphq_create()
326
+ {
327
+ Query *self = q_create();
328
+ MultiPhraseQuery *mphq = ALLOC(MultiPhraseQuery);
329
+ ZEROSET(mphq, MultiPhraseQuery, 1);
330
+ mphq->t_capa = PHQ_INIT_CAPA;
331
+ mphq->terms = ALLOC_N(Term **, PHQ_INIT_CAPA);
332
+ mphq->positions = ALLOC_N(int, PHQ_INIT_CAPA);
333
+ mphq->pt_cnt = ALLOC_N(int, PHQ_INIT_CAPA);
334
+ self->data = mphq;
335
+
336
+ self->create_weight = &mphw_create;
337
+ self->extract_terms = &mphq_extract_terms;
338
+ self->to_s = &mphq_to_s;
339
+ self->destroy = &mphq_destroy;
340
+ self->rewrite = &mphq_rewrite;
341
+ self->type = PHRASE_QUERY;
342
+ return self;
343
+ }
data/ext/q_parser.c ADDED
@@ -0,0 +1,2180 @@
1
+ /* A Bison parser, made by GNU Bison 2.0. */
2
+
3
+ /* Skeleton parser for Yacc-like parsing with Bison,
4
+ Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
5
+
6
+ This program is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2, or (at your option)
9
+ any later version.
10
+
11
+ This program is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with this program; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place - Suite 330,
19
+ Boston, MA 02111-1307, USA. */
20
+
21
+ /* As a special exception, when this file is copied by Bison into a
22
+ Bison output file, you may use that output file without restriction.
23
+ This special exception was added by the Free Software Foundation
24
+ in version 1.24 of Bison. */
25
+
26
+ /* Written by Richard Stallman by simplifying the original so called
27
+ ``semantic'' parser. */
28
+
29
+ /* All symbols defined below should begin with yy or YY, to avoid
30
+ infringing on user name space. This should be done even for local
31
+ variables, as they might otherwise be expanded by user macros.
32
+ There are some unavoidable exceptions within include files to
33
+ define necessary library symbols; they are noted "INFRINGES ON
34
+ USER NAME SPACE" below. */
35
+
36
+ /* Identify Bison output. */
37
+ #define YYBISON 1
38
+
39
+ /* Skeleton name. */
40
+ #define YYSKELETON_NAME "yacc.c"
41
+
42
+ /* Pure parsers. */
43
+ #define YYPURE 1
44
+
45
+ /* Using locations. */
46
+ #define YYLSP_NEEDED 0
47
+
48
+
49
+
50
+ /* Tokens. */
51
+ #ifndef YYTOKENTYPE
52
+ # define YYTOKENTYPE
53
+ /* Put the tokens into the symbol table, so that GDB and other debuggers
54
+ know about them. */
55
+ enum yytokentype {
56
+ WORD = 258,
57
+ WILD_STR = 259,
58
+ LOW = 260,
59
+ OR = 261,
60
+ AND = 262,
61
+ NOT = 263,
62
+ REQ = 264,
63
+ HIGH = 265
64
+ };
65
+ #endif
66
+ #define WORD 258
67
+ #define WILD_STR 259
68
+ #define LOW 260
69
+ #define OR 261
70
+ #define AND 262
71
+ #define NOT 263
72
+ #define REQ 264
73
+ #define HIGH 265
74
+
75
+
76
+
77
+
78
+ /* Copy the first part of user declarations. */
79
+ #line 1 "src/query_parser/q_parser.y"
80
+
81
+ #include <string.h>
82
+ #include "search.h"
83
+
84
+ typedef struct Phrase {
85
+ int cnt;
86
+ int capa;
87
+ char ***words;
88
+ int *w_cnt;
89
+ int *w_capa;
90
+ } Phrase;
91
+
92
+
93
+
94
+ /* Enabling traces. */
95
+ #ifndef YYDEBUG
96
+ # define YYDEBUG 0
97
+ #endif
98
+
99
+ /* Enabling verbose error messages. */
100
+ #ifdef YYERROR_VERBOSE
101
+ # undef YYERROR_VERBOSE
102
+ # define YYERROR_VERBOSE 1
103
+ #else
104
+ # define YYERROR_VERBOSE 0
105
+ #endif
106
+
107
+ #if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
108
+ #line 14 "src/query_parser/q_parser.y"
109
+ typedef union YYSTYPE {
110
+ Query *query;
111
+ BooleanClause *bcls;
112
+ Array *array;
113
+ HashSet *hashset;
114
+ Phrase *phrase;
115
+ char *str;
116
+ } YYSTYPE;
117
+ /* Line 190 of yacc.c. */
118
+ #line 119 "y.tab.c"
119
+ # define yystype YYSTYPE /* obsolescent; will be withdrawn */
120
+ # define YYSTYPE_IS_DECLARED 1
121
+ # define YYSTYPE_IS_TRIVIAL 1
122
+ #endif
123
+
124
+
125
+
126
+ /* Copy the second part of user declarations. */
127
+ #line 22 "src/query_parser/q_parser.y"
128
+
129
+ int yylex(YYSTYPE *lvalp, QParser *qp);
130
+ int yyerror(QParser *qp, char const *msg);
131
+
132
+ #define PHRASE_INIT_CAPA 4
133
+ Query *get_bool_q(Array *bclauses);
134
+
135
+ Array *first_cls(BooleanClause *cls);
136
+ Array *add_and_cls(Array *clauses, BooleanClause *cls);
137
+ Array *add_or_cls(Array *clauses, BooleanClause *cls);
138
+ Array *add_default_cls(QParser *qp, Array *clauses, BooleanClause *cls);
139
+
140
+ BooleanClause *get_bool_cls(Query *q, unsigned int occur);
141
+
142
+ Query *get_term_q(QParser *qp, char *field, char *word);
143
+ Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop);
144
+ Query *get_wild_q(QParser *qp, char *field, char *pattern);
145
+
146
+ HashSet *first_field(QParser *qp, char *field);
147
+ HashSet *add_field(QParser *qp, char *field);
148
+
149
+ Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop);
150
+
151
+ Phrase *ph_first_word(char *word);
152
+ Phrase *ph_add_word(Phrase *self, char *word);
153
+ Phrase *ph_add_multi_word(Phrase *self, char *word);
154
+
155
+ Query *get_range_q(char *field, char *from, char *to,
156
+ bool inc_lower, bool inc_upper);
157
+
158
+ #define FLDS(q, func) do {\
159
+ char *field;\
160
+ if (qp->fields->size == 0) {\
161
+ q = NULL;\
162
+ } else if (qp->fields->size == 1) {\
163
+ field = (char *)qp->fields->elems[0];\
164
+ q = func;\
165
+ } else {\
166
+ int i;Query *sq;\
167
+ q = bq_create(false);\
168
+ for (i = 0; i < qp->fields->size; i++) {\
169
+ field = (char *)qp->fields->elems[i];\
170
+ sq = func;\
171
+ if (sq) bq_add_query(q, sq, BC_SHOULD);\
172
+ }\
173
+ }\
174
+ } while (0)
175
+
176
+
177
+ /* Line 213 of yacc.c. */
178
+ #line 179 "y.tab.c"
179
+
180
+ #if ! defined (yyoverflow) || YYERROR_VERBOSE
181
+
182
+ # ifndef YYFREE
183
+ # define YYFREE free
184
+ # endif
185
+ # ifndef YYMALLOC
186
+ # define YYMALLOC malloc
187
+ # endif
188
+
189
+ /* The parser invokes alloca or malloc; define the necessary symbols. */
190
+
191
+ # ifdef YYSTACK_USE_ALLOCA
192
+ # if YYSTACK_USE_ALLOCA
193
+ # ifdef __GNUC__
194
+ # define YYSTACK_ALLOC __builtin_alloca
195
+ # else
196
+ # define YYSTACK_ALLOC alloca
197
+ # endif
198
+ # endif
199
+ # endif
200
+
201
+ # ifdef YYSTACK_ALLOC
202
+ /* Pacify GCC's `empty if-body' warning. */
203
+ # define YYSTACK_FREE(Ptr) do { /* empty */; } while (0)
204
+ # else
205
+ # if defined (__STDC__) || defined (__cplusplus)
206
+ # include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
207
+ # define YYSIZE_T size_t
208
+ # endif
209
+ # define YYSTACK_ALLOC YYMALLOC
210
+ # define YYSTACK_FREE YYFREE
211
+ # endif
212
+ #endif /* ! defined (yyoverflow) || YYERROR_VERBOSE */
213
+
214
+
215
+ #if (! defined (yyoverflow) \
216
+ && (! defined (__cplusplus) \
217
+ || (defined (YYSTYPE_IS_TRIVIAL) && YYSTYPE_IS_TRIVIAL)))
218
+
219
+ /* A type that is properly aligned for any stack member. */
220
+ union yyalloc
221
+ {
222
+ short int yyss;
223
+ YYSTYPE yyvs;
224
+ };
225
+
226
+ /* The size of the maximum gap between one aligned stack and the next. */
227
+ # define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1)
228
+
229
+ /* The size of an array large to enough to hold all stacks, each with
230
+ N elements. */
231
+ # define YYSTACK_BYTES(N) \
232
+ ((N) * (sizeof (short int) + sizeof (YYSTYPE)) \
233
+ + YYSTACK_GAP_MAXIMUM)
234
+
235
+ /* Copy COUNT objects from FROM to TO. The source and destination do
236
+ not overlap. */
237
+ # ifndef YYCOPY
238
+ # if defined (__GNUC__) && 1 < __GNUC__
239
+ # define YYCOPY(To, From, Count) \
240
+ __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
241
+ # else
242
+ # define YYCOPY(To, From, Count) \
243
+ do \
244
+ { \
245
+ register YYSIZE_T yyi; \
246
+ for (yyi = 0; yyi < (Count); yyi++) \
247
+ (To)[yyi] = (From)[yyi]; \
248
+ } \
249
+ while (0)
250
+ # endif
251
+ # endif
252
+
253
+ /* Relocate STACK from its old location to the new one. The
254
+ local variables YYSIZE and YYSTACKSIZE give the old and new number of
255
+ elements in the stack, and YYPTR gives the new location of the
256
+ stack. Advance YYPTR to a properly aligned location for the next
257
+ stack. */
258
+ # define YYSTACK_RELOCATE(Stack) \
259
+ do \
260
+ { \
261
+ YYSIZE_T yynewbytes; \
262
+ YYCOPY (&yyptr->Stack, Stack, yysize); \
263
+ Stack = &yyptr->Stack; \
264
+ yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
265
+ yyptr += yynewbytes / sizeof (*yyptr); \
266
+ } \
267
+ while (0)
268
+
269
+ #endif
270
+
271
+ #if defined (__STDC__) || defined (__cplusplus)
272
+ typedef signed char yysigned_char;
273
+ #else
274
+ typedef short int yysigned_char;
275
+ #endif
276
+
277
+ /* YYFINAL -- State number of the termination state. */
278
+ #define YYFINAL 38
279
+ /* YYLAST -- Last index in YYTABLE. */
280
+ #define YYLAST 98
281
+
282
+ /* YYNTOKENS -- Number of terminals. */
283
+ #define YYNTOKENS 26
284
+ /* YYNNTS -- Number of nonterminals. */
285
+ #define YYNNTS 16
286
+ /* YYNRULES -- Number of rules. */
287
+ #define YYNRULES 50
288
+ /* YYNRULES -- Number of states. */
289
+ #define YYNSTATES 79
290
+
291
+ /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
292
+ #define YYUNDEFTOK 2
293
+ #define YYMAXUTOK 265
294
+
295
+ #define YYTRANSLATE(YYX) \
296
+ ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
297
+
298
+ /* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */
299
+ static const unsigned char yytranslate[] =
300
+ {
301
+ 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
302
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
303
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
304
+ 2, 2, 2, 2, 18, 2, 2, 2, 2, 2,
305
+ 13, 14, 16, 2, 2, 2, 2, 2, 2, 2,
306
+ 2, 2, 2, 2, 2, 2, 2, 2, 10, 2,
307
+ 19, 25, 20, 2, 2, 2, 2, 2, 2, 2,
308
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
309
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
310
+ 2, 21, 2, 22, 12, 2, 2, 2, 2, 2,
311
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
312
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
313
+ 2, 2, 2, 24, 17, 23, 15, 2, 2, 2,
314
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
315
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
316
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
317
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
318
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
319
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
320
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
321
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
322
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
323
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
324
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
325
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
326
+ 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
327
+ 5, 6, 7, 8, 9, 11
328
+ };
329
+
330
+ #if YYDEBUG
331
+ /* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
332
+ YYRHS. */
333
+ static const unsigned char yyprhs[] =
334
+ {
335
+ 0, 0, 3, 4, 6, 8, 12, 16, 19, 22,
336
+ 25, 27, 29, 33, 35, 39, 41, 43, 45, 47,
337
+ 49, 53, 56, 58, 59, 64, 65, 66, 72, 74,
338
+ 78, 82, 88, 91, 96, 98, 101, 104, 108, 112,
339
+ 117, 122, 127, 132, 136, 140, 144, 148, 151, 155,
340
+ 159
341
+ };
342
+
343
+ /* YYRHS -- A `-1'-separated list of the rules' RHS. */
344
+ static const yysigned_char yyrhs[] =
345
+ {
346
+ 27, 0, -1, -1, 28, -1, 29, -1, 28, 7,
347
+ 29, -1, 28, 6, 29, -1, 28, 29, -1, 9,
348
+ 30, -1, 8, 30, -1, 30, -1, 31, -1, 31,
349
+ 12, 3, -1, 32, -1, 13, 28, 14, -1, 34,
350
+ -1, 39, -1, 41, -1, 33, -1, 3, -1, 3,
351
+ 15, 3, -1, 3, 15, -1, 4, -1, -1, 38,
352
+ 10, 31, 35, -1, -1, -1, 16, 36, 10, 31,
353
+ 37, -1, 3, -1, 38, 17, 3, -1, 18, 40,
354
+ 18, -1, 18, 40, 18, 15, 3, -1, 18, 18,
355
+ -1, 18, 18, 15, 3, -1, 3, -1, 19, 20,
356
+ -1, 40, 3, -1, 40, 19, 20, -1, 40, 17,
357
+ 3, -1, 21, 3, 3, 22, -1, 21, 3, 3,
358
+ 23, -1, 24, 3, 3, 22, -1, 24, 3, 3,
359
+ 23, -1, 19, 3, 23, -1, 19, 3, 22, -1,
360
+ 21, 3, 20, -1, 24, 3, 20, -1, 19, 3,
361
+ -1, 19, 25, 3, -1, 20, 25, 3, -1, 20,
362
+ 3, -1
363
+ };
364
+
365
+ /* YYRLINE[YYN] -- source line where rule number YYN was defined. */
366
+ static const unsigned char yyrline[] =
367
+ {
368
+ 0, 86, 86, 87, 89, 90, 91, 92, 94, 95,
369
+ 96, 98, 99, 101, 102, 103, 104, 105, 106, 108,
370
+ 109, 110, 112, 114, 114, 116, 116, 116, 119, 120,
371
+ 122, 123, 124, 125, 127, 128, 129, 130, 131, 133,
372
+ 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
373
+ 144
374
+ };
375
+ #endif
376
+
377
+ #if YYDEBUG || YYERROR_VERBOSE
378
+ /* YYTNME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
379
+ First, the terminals, then, starting at YYNTOKENS, nonterminals. */
380
+ static const char *const yytname[] =
381
+ {
382
+ "$end", "error", "$undefined", "WORD", "WILD_STR", "LOW", "OR", "AND",
383
+ "NOT", "REQ", "':'", "HIGH", "'^'", "'('", "')'", "'~'", "'*'", "'|'",
384
+ "'\"'", "'<'", "'>'", "'['", "']'", "'}'", "'{'", "'='", "$accept",
385
+ "bool_q", "bool_clss", "bool_cls", "boosted_q", "q", "term_q", "wild_q",
386
+ "field_q", "@1", "@2", "@3", "field", "phrase_q", "ph_words", "range_q", 0
387
+ };
388
+ #endif
389
+
390
+ # ifdef YYPRINT
391
+ /* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
392
+ token YYLEX-NUM. */
393
+ static const unsigned short int yytoknum[] =
394
+ {
395
+ 0, 256, 257, 258, 259, 260, 261, 262, 263, 264,
396
+ 58, 265, 94, 40, 41, 126, 42, 124, 34, 60,
397
+ 62, 91, 93, 125, 123, 61
398
+ };
399
+ # endif
400
+
401
+ /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
402
+ static const unsigned char yyr1[] =
403
+ {
404
+ 0, 26, 27, 27, 28, 28, 28, 28, 29, 29,
405
+ 29, 30, 30, 31, 31, 31, 31, 31, 31, 32,
406
+ 32, 32, 33, 35, 34, 36, 37, 34, 38, 38,
407
+ 39, 39, 39, 39, 40, 40, 40, 40, 40, 41,
408
+ 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
409
+ 41
410
+ };
411
+
412
+ /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
413
+ static const unsigned char yyr2[] =
414
+ {
415
+ 0, 2, 0, 1, 1, 3, 3, 2, 2, 2,
416
+ 1, 1, 3, 1, 3, 1, 1, 1, 1, 1,
417
+ 3, 2, 1, 0, 4, 0, 0, 5, 1, 3,
418
+ 3, 5, 2, 4, 1, 2, 2, 3, 3, 4,
419
+ 4, 4, 4, 3, 3, 3, 3, 2, 3, 3,
420
+ 2
421
+ };
422
+
423
+ /* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
424
+ STATE-NUM when YYTABLE doesn't specify something else to do. Zero
425
+ means the default is an error. */
426
+ static const unsigned char yydefact[] =
427
+ {
428
+ 2, 19, 22, 0, 0, 0, 25, 0, 0, 0,
429
+ 0, 0, 0, 3, 4, 10, 11, 13, 18, 15,
430
+ 0, 16, 17, 21, 9, 8, 0, 0, 34, 32,
431
+ 0, 0, 47, 0, 50, 0, 0, 0, 1, 0,
432
+ 0, 7, 0, 0, 0, 20, 14, 0, 0, 35,
433
+ 36, 0, 30, 0, 44, 43, 48, 49, 0, 45,
434
+ 0, 46, 6, 5, 12, 23, 29, 26, 33, 38,
435
+ 0, 37, 39, 40, 41, 42, 24, 27, 31
436
+ };
437
+
438
+ /* YYDEFGOTO[NTERM-NUM]. */
439
+ static const yysigned_char yydefgoto[] =
440
+ {
441
+ -1, 12, 13, 14, 15, 16, 17, 18, 19, 76,
442
+ 27, 77, 20, 21, 31, 22
443
+ };
444
+
445
+ /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
446
+ STATE-NUM. */
447
+ #define YYPACT_NINF -32
448
+ static const yysigned_char yypact[] =
449
+ {
450
+ 44, 75, -32, 63, 63, 44, -32, 55, -2, -1,
451
+ 0, 3, 11, 25, -32, -32, 18, -32, -32, -32,
452
+ 76, -32, -32, 33, -32, -32, 1, 32, -32, 41,
453
+ 39, 52, 17, 58, -32, 69, 15, 34, -32, 44,
454
+ 44, -32, 72, 63, 77, -32, -32, 63, 88, -32,
455
+ -32, 91, 80, 78, -32, -32, -32, -32, 28, -32,
456
+ 66, -32, -32, -32, -32, -32, -32, -32, -32, -32,
457
+ 93, -32, -32, -32, -32, -32, -32, -32, -32
458
+ };
459
+
460
+ /* YYPGOTO[NTERM-NUM]. */
461
+ static const yysigned_char yypgoto[] =
462
+ {
463
+ -32, -32, 92, -13, 74, -31, -32, -32, -32, -32,
464
+ -32, -32, -32, -32, -32, -32
465
+ };
466
+
467
+ /* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
468
+ positive, shift that token. If negative, reduce the rule which
469
+ number is the opposite. If zero, do what YYDEFACT says.
470
+ If YYTABLE_NINF, syntax error. */
471
+ #define YYTABLE_NINF -29
472
+ static const yysigned_char yytable[] =
473
+ {
474
+ 41, 32, 34, 36, 1, 2, 37, 39, 40, 3,
475
+ 4, 38, 65, 41, 5, 46, 67, 6, 58, 7,
476
+ 8, 9, 10, 33, 35, 11, 62, 63, 1, 2,
477
+ 42, 39, 40, 3, 4, 59, 45, 60, 5, 54,
478
+ 55, 6, 47, 7, 8, 9, 10, 1, 2, 11,
479
+ 72, 73, 3, 4, 61, 50, 48, 5, 28, 49,
480
+ 6, 56, 7, 8, 9, 10, 1, 2, 11, 51,
481
+ 52, 53, 57, 29, 30, 64, 5, 24, 25, 6,
482
+ 66, 7, 8, 9, 10, -28, 43, 11, 74, 75,
483
+ 23, 68, -28, 44, 69, 70, 78, 26, 71
484
+ };
485
+
486
+ static const unsigned char yycheck[] =
487
+ {
488
+ 13, 3, 3, 3, 3, 4, 3, 6, 7, 8,
489
+ 9, 0, 43, 26, 13, 14, 47, 16, 3, 18,
490
+ 19, 20, 21, 25, 25, 24, 39, 40, 3, 4,
491
+ 12, 6, 7, 8, 9, 20, 3, 3, 13, 22,
492
+ 23, 16, 10, 18, 19, 20, 21, 3, 4, 24,
493
+ 22, 23, 8, 9, 20, 3, 15, 13, 3, 20,
494
+ 16, 3, 18, 19, 20, 21, 3, 4, 24, 17,
495
+ 18, 19, 3, 18, 19, 3, 13, 3, 4, 16,
496
+ 3, 18, 19, 20, 21, 10, 10, 24, 22, 23,
497
+ 15, 3, 17, 17, 3, 15, 3, 5, 20
498
+ };
499
+
500
+ /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
501
+ symbol of state STATE-NUM. */
502
+ static const unsigned char yystos[] =
503
+ {
504
+ 0, 3, 4, 8, 9, 13, 16, 18, 19, 20,
505
+ 21, 24, 27, 28, 29, 30, 31, 32, 33, 34,
506
+ 38, 39, 41, 15, 30, 30, 28, 36, 3, 18,
507
+ 19, 40, 3, 25, 3, 25, 3, 3, 0, 6,
508
+ 7, 29, 12, 10, 17, 3, 14, 10, 15, 20,
509
+ 3, 17, 18, 19, 22, 23, 3, 3, 3, 20,
510
+ 3, 20, 29, 29, 3, 31, 3, 31, 3, 3,
511
+ 15, 20, 22, 23, 22, 23, 35, 37, 3
512
+ };
513
+
514
+ #if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__)
515
+ # define YYSIZE_T __SIZE_TYPE__
516
+ #endif
517
+ #if ! defined (YYSIZE_T) && defined (size_t)
518
+ # define YYSIZE_T size_t
519
+ #endif
520
+ #if ! defined (YYSIZE_T)
521
+ # if defined (__STDC__) || defined (__cplusplus)
522
+ # include <stddef.h> /* INFRINGES ON USER NAME SPACE */
523
+ # define YYSIZE_T size_t
524
+ # endif
525
+ #endif
526
+ #if ! defined (YYSIZE_T)
527
+ # define YYSIZE_T unsigned int
528
+ #endif
529
+
530
+ #define yyerrok (yyerrstatus = 0)
531
+ #define yyclearin (yychar = YYEMPTY)
532
+ #define YYEMPTY (-2)
533
+ #define YYEOF 0
534
+
535
+ #define YYACCEPT goto yyacceptlab
536
+ #define YYABORT goto yyabortlab
537
+ #define YYERROR goto yyerrorlab
538
+
539
+
540
+ /* Like YYERROR except do call yyerror. This remains here temporarily
541
+ to ease the transition to the new meaning of YYERROR, for GCC.
542
+ Once GCC version 2 has supplanted version 1, this can go. */
543
+
544
+ #define YYFAIL goto yyerrlab
545
+
546
+ #define YYRECOVERING() (!!yyerrstatus)
547
+
548
+ #define YYBACKUP(Token, Value) \
549
+ do \
550
+ if (yychar == YYEMPTY && yylen == 1) \
551
+ { \
552
+ yychar = (Token); \
553
+ yylval = (Value); \
554
+ yytoken = YYTRANSLATE (yychar); \
555
+ YYPOPSTACK; \
556
+ goto yybackup; \
557
+ } \
558
+ else \
559
+ { \
560
+ yyerror (qp, "syntax error: cannot back up");\
561
+ YYERROR; \
562
+ } \
563
+ while (0)
564
+
565
+
566
+ #define YYTERROR 1
567
+ #define YYERRCODE 256
568
+
569
+
570
+ /* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
571
+ If N is 0, then set CURRENT to the empty location which ends
572
+ the previous symbol: RHS[0] (always defined). */
573
+
574
+ #define YYRHSLOC(Rhs, K) ((Rhs)[K])
575
+ #ifndef YYLLOC_DEFAULT
576
+ # define YYLLOC_DEFAULT(Current, Rhs, N) \
577
+ do \
578
+ if (N) \
579
+ { \
580
+ (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
581
+ (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
582
+ (Current).last_line = YYRHSLOC (Rhs, N).last_line; \
583
+ (Current).last_column = YYRHSLOC (Rhs, N).last_column; \
584
+ } \
585
+ else \
586
+ { \
587
+ (Current).first_line = (Current).last_line = \
588
+ YYRHSLOC (Rhs, 0).last_line; \
589
+ (Current).first_column = (Current).last_column = \
590
+ YYRHSLOC (Rhs, 0).last_column; \
591
+ } \
592
+ while (0)
593
+ #endif
594
+
595
+
596
+ /* YY_LOCATION_PRINT -- Print the location on the stream.
597
+ This macro was not mandated originally: define only if we know
598
+ we won't break user code: when these are the locations we know. */
599
+
600
+ #ifndef YY_LOCATION_PRINT
601
+ # if YYLTYPE_IS_TRIVIAL
602
+ # define YY_LOCATION_PRINT(File, Loc) \
603
+ fprintf (File, "%d.%d-%d.%d", \
604
+ (Loc).first_line, (Loc).first_column, \
605
+ (Loc).last_line, (Loc).last_column)
606
+ # else
607
+ # define YY_LOCATION_PRINT(File, Loc) ((void) 0)
608
+ # endif
609
+ #endif
610
+
611
+
612
+ /* YYLEX -- calling `yylex' with the right arguments. */
613
+
614
+ #ifdef YYLEX_PARAM
615
+ # define YYLEX yylex (&yylval, YYLEX_PARAM)
616
+ #else
617
+ # define YYLEX yylex (&yylval, qp)
618
+ #endif
619
+
620
+ /* Enable debugging if requested. */
621
+ #if YYDEBUG
622
+
623
+ # ifndef YYFPRINTF
624
+ # include <stdio.h> /* INFRINGES ON USER NAME SPACE */
625
+ # define YYFPRINTF fprintf
626
+ # endif
627
+
628
+ # define YYDPRINTF(Args) \
629
+ do { \
630
+ if (yydebug) \
631
+ YYFPRINTF Args; \
632
+ } while (0)
633
+
634
+ # define YY_SYMBOL_PRINT(Title, Type, Value, Location) \
635
+ do { \
636
+ if (yydebug) \
637
+ { \
638
+ YYFPRINTF (stderr, "%s ", Title); \
639
+ yysymprint (stderr, \
640
+ Type, Value); \
641
+ YYFPRINTF (stderr, "\n"); \
642
+ } \
643
+ } while (0)
644
+
645
+ /*------------------------------------------------------------------.
646
+ | yy_stack_print -- Print the state stack from its BOTTOM up to its |
647
+ | TOP (included). |
648
+ `------------------------------------------------------------------*/
649
+
650
+ #if defined (__STDC__) || defined (__cplusplus)
651
+ static void
652
+ yy_stack_print (short int *bottom, short int *top)
653
+ #else
654
+ static void
655
+ yy_stack_print (bottom, top)
656
+ short int *bottom;
657
+ short int *top;
658
+ #endif
659
+ {
660
+ YYFPRINTF (stderr, "Stack now");
661
+ for (/* Nothing. */; bottom <= top; ++bottom)
662
+ YYFPRINTF (stderr, " %d", *bottom);
663
+ YYFPRINTF (stderr, "\n");
664
+ }
665
+
666
+ # define YY_STACK_PRINT(Bottom, Top) \
667
+ do { \
668
+ if (yydebug) \
669
+ yy_stack_print ((Bottom), (Top)); \
670
+ } while (0)
671
+
672
+
673
+ /*------------------------------------------------.
674
+ | Report that the YYRULE is going to be reduced. |
675
+ `------------------------------------------------*/
676
+
677
+ #if defined (__STDC__) || defined (__cplusplus)
678
+ static void
679
+ yy_reduce_print (int yyrule)
680
+ #else
681
+ static void
682
+ yy_reduce_print (yyrule)
683
+ int yyrule;
684
+ #endif
685
+ {
686
+ int yyi;
687
+ unsigned int yylno = yyrline[yyrule];
688
+ YYFPRINTF (stderr, "Reducing stack by rule %d (line %u), ",
689
+ yyrule - 1, yylno);
690
+ /* Print the symbols being reduced, and their result. */
691
+ for (yyi = yyprhs[yyrule]; 0 <= yyrhs[yyi]; yyi++)
692
+ YYFPRINTF (stderr, "%s ", yytname [yyrhs[yyi]]);
693
+ YYFPRINTF (stderr, "-> %s\n", yytname [yyr1[yyrule]]);
694
+ }
695
+
696
+ # define YY_REDUCE_PRINT(Rule) \
697
+ do { \
698
+ if (yydebug) \
699
+ yy_reduce_print (Rule); \
700
+ } while (0)
701
+
702
+ /* Nonzero means print parse trace. It is left uninitialized so that
703
+ multiple parsers can coexist. */
704
+ int yydebug;
705
+ #else /* !YYDEBUG */
706
+ # define YYDPRINTF(Args)
707
+ # define YY_SYMBOL_PRINT(Title, Type, Value, Location)
708
+ # define YY_STACK_PRINT(Bottom, Top)
709
+ # define YY_REDUCE_PRINT(Rule)
710
+ #endif /* !YYDEBUG */
711
+
712
+
713
+ /* YYINITDEPTH -- initial size of the parser's stacks. */
714
+ #ifndef YYINITDEPTH
715
+ # define YYINITDEPTH 200
716
+ #endif
717
+
718
+ /* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
719
+ if the built-in stack extension method is used).
720
+
721
+ Do not make this value too large; the results are undefined if
722
+ SIZE_MAX < YYSTACK_BYTES (YYMAXDEPTH)
723
+ evaluated with infinite-precision integer arithmetic. */
724
+
725
+ #ifndef YYMAXDEPTH
726
+ # define YYMAXDEPTH 10000
727
+ #endif
728
+
729
+
730
+
731
+ #if YYERROR_VERBOSE
732
+
733
+ # ifndef yystrlen
734
+ # if defined (__GLIBC__) && defined (_STRING_H)
735
+ # define yystrlen strlen
736
+ # else
737
+ /* Return the length of YYSTR. */
738
+ static YYSIZE_T
739
+ # if defined (__STDC__) || defined (__cplusplus)
740
+ yystrlen (const char *yystr)
741
+ # else
742
+ yystrlen (yystr)
743
+ const char *yystr;
744
+ # endif
745
+ {
746
+ register const char *yys = yystr;
747
+
748
+ while (*yys++ != '\0')
749
+ continue;
750
+
751
+ return yys - yystr - 1;
752
+ }
753
+ # endif
754
+ # endif
755
+
756
+ # ifndef yystpcpy
757
+ # if defined (__GLIBC__) && defined (_STRING_H) && defined (_GNU_SOURCE)
758
+ # define yystpcpy stpcpy
759
+ # else
760
+ /* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
761
+ YYDEST. */
762
+ static char *
763
+ # if defined (__STDC__) || defined (__cplusplus)
764
+ yystpcpy (char *yydest, const char *yysrc)
765
+ # else
766
+ yystpcpy (yydest, yysrc)
767
+ char *yydest;
768
+ const char *yysrc;
769
+ # endif
770
+ {
771
+ register char *yyd = yydest;
772
+ register const char *yys = yysrc;
773
+
774
+ while ((*yyd++ = *yys++) != '\0')
775
+ continue;
776
+
777
+ return yyd - 1;
778
+ }
779
+ # endif
780
+ # endif
781
+
782
+ #endif /* !YYERROR_VERBOSE */
783
+
784
+
785
+
786
+ #if YYDEBUG
787
+ /*--------------------------------.
788
+ | Print this symbol on YYOUTPUT. |
789
+ `--------------------------------*/
790
+
791
+ #if defined (__STDC__) || defined (__cplusplus)
792
+ static void
793
+ yysymprint (FILE *yyoutput, int yytype, YYSTYPE *yyvaluep)
794
+ #else
795
+ static void
796
+ yysymprint (yyoutput, yytype, yyvaluep)
797
+ FILE *yyoutput;
798
+ int yytype;
799
+ YYSTYPE *yyvaluep;
800
+ #endif
801
+ {
802
+ /* Pacify ``unused variable'' warnings. */
803
+ (void) yyvaluep;
804
+
805
+ if (yytype < YYNTOKENS)
806
+ YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
807
+ else
808
+ YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
809
+
810
+
811
+ # ifdef YYPRINT
812
+ if (yytype < YYNTOKENS)
813
+ YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
814
+ # endif
815
+ switch (yytype)
816
+ {
817
+ default:
818
+ break;
819
+ }
820
+ YYFPRINTF (yyoutput, ")");
821
+ }
822
+
823
+ #endif /* ! YYDEBUG */
824
+ /*-----------------------------------------------.
825
+ | Release the memory associated to this symbol. |
826
+ `-----------------------------------------------*/
827
+
828
+ #if defined (__STDC__) || defined (__cplusplus)
829
+ static void
830
+ yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
831
+ #else
832
+ static void
833
+ yydestruct (yymsg, yytype, yyvaluep)
834
+ const char *yymsg;
835
+ int yytype;
836
+ YYSTYPE *yyvaluep;
837
+ #endif
838
+ {
839
+ /* Pacify ``unused variable'' warnings. */
840
+ (void) yyvaluep;
841
+
842
+ if (!yymsg)
843
+ yymsg = "Deleting";
844
+ YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
845
+
846
+ switch (yytype)
847
+ {
848
+
849
+ default:
850
+ break;
851
+ }
852
+ }
853
+
854
+
855
+ /* Prevent warnings from -Wmissing-prototypes. */
856
+
857
+ #ifdef YYPARSE_PARAM
858
+ # if defined (__STDC__) || defined (__cplusplus)
859
+ int yyparse (void *YYPARSE_PARAM);
860
+ # else
861
+ int yyparse ();
862
+ # endif
863
+ #else /* ! YYPARSE_PARAM */
864
+ #if defined (__STDC__) || defined (__cplusplus)
865
+ int yyparse (QParser *qp);
866
+ #else
867
+ int yyparse ();
868
+ #endif
869
+ #endif /* ! YYPARSE_PARAM */
870
+
871
+
872
+
873
+
874
+
875
+
876
+ /*----------.
877
+ | yyparse. |
878
+ `----------*/
879
+
880
+ #ifdef YYPARSE_PARAM
881
+ # if defined (__STDC__) || defined (__cplusplus)
882
+ int yyparse (void *YYPARSE_PARAM)
883
+ # else
884
+ int yyparse (YYPARSE_PARAM)
885
+ void *YYPARSE_PARAM;
886
+ # endif
887
+ #else /* ! YYPARSE_PARAM */
888
+ #if defined (__STDC__) || defined (__cplusplus)
889
+ int
890
+ yyparse (QParser *qp)
891
+ #else
892
+ int
893
+ yyparse (qp)
894
+ QParser *qp;
895
+ #endif
896
+ #endif
897
+ {
898
+ /* The look-ahead symbol. */
899
+ int yychar;
900
+
901
+ /* The semantic value of the look-ahead symbol. */
902
+ YYSTYPE yylval;
903
+
904
+ /* Number of syntax errors so far. */
905
+ int yynerrs;
906
+
907
+ register int yystate;
908
+ register int yyn;
909
+ int yyresult;
910
+ /* Number of tokens to shift before error messages enabled. */
911
+ int yyerrstatus;
912
+ /* Look-ahead token as an internal (translated) token number. */
913
+ int yytoken = 0;
914
+
915
+ /* Three stacks and their tools:
916
+ `yyss': related to states,
917
+ `yyvs': related to semantic values,
918
+ `yyls': related to locations.
919
+
920
+ Refer to the stacks thru separate pointers, to allow yyoverflow
921
+ to reallocate them elsewhere. */
922
+
923
+ /* The state stack. */
924
+ short int yyssa[YYINITDEPTH];
925
+ short int *yyss = yyssa;
926
+ register short int *yyssp;
927
+
928
+ /* The semantic value stack. */
929
+ YYSTYPE yyvsa[YYINITDEPTH];
930
+ YYSTYPE *yyvs = yyvsa;
931
+ register YYSTYPE *yyvsp;
932
+
933
+
934
+
935
+ #define YYPOPSTACK (yyvsp--, yyssp--)
936
+
937
+ YYSIZE_T yystacksize = YYINITDEPTH;
938
+
939
+ /* The variables used to return semantic value and location from the
940
+ action routines. */
941
+ YYSTYPE yyval;
942
+
943
+
944
+ /* When reducing, the number of symbols on the RHS of the reduced
945
+ rule. */
946
+ int yylen;
947
+
948
+ YYDPRINTF ((stderr, "Starting parse\n"));
949
+
950
+ yystate = 0;
951
+ yyerrstatus = 0;
952
+ yynerrs = 0;
953
+ yychar = YYEMPTY; /* Cause a token to be read. */
954
+
955
+ /* Initialize stack pointers.
956
+ Waste one element of value and location stack
957
+ so that they stay on the same level as the state stack.
958
+ The wasted elements are never initialized. */
959
+
960
+ yyssp = yyss;
961
+ yyvsp = yyvs;
962
+
963
+
964
+ yyvsp[0] = yylval;
965
+
966
+ goto yysetstate;
967
+
968
+ /*------------------------------------------------------------.
969
+ | yynewstate -- Push a new state, which is found in yystate. |
970
+ `------------------------------------------------------------*/
971
+ yynewstate:
972
+ /* In all cases, when you get here, the value and location stacks
973
+ have just been pushed. so pushing a state here evens the stacks.
974
+ */
975
+ yyssp++;
976
+
977
+ yysetstate:
978
+ *yyssp = yystate;
979
+
980
+ if (yyss + yystacksize - 1 <= yyssp)
981
+ {
982
+ /* Get the current used size of the three stacks, in elements. */
983
+ YYSIZE_T yysize = yyssp - yyss + 1;
984
+
985
+ #ifdef yyoverflow
986
+ {
987
+ /* Give user a chance to reallocate the stack. Use copies of
988
+ these so that the &'s don't force the real ones into
989
+ memory. */
990
+ YYSTYPE *yyvs1 = yyvs;
991
+ short int *yyss1 = yyss;
992
+
993
+
994
+ /* Each stack pointer address is followed by the size of the
995
+ data in use in that stack, in bytes. This used to be a
996
+ conditional around just the two extra args, but that might
997
+ be undefined if yyoverflow is a macro. */
998
+ yyoverflow ("parser stack overflow",
999
+ &yyss1, yysize * sizeof (*yyssp),
1000
+ &yyvs1, yysize * sizeof (*yyvsp),
1001
+
1002
+ &yystacksize);
1003
+
1004
+ yyss = yyss1;
1005
+ yyvs = yyvs1;
1006
+ }
1007
+ #else /* no yyoverflow */
1008
+ # ifndef YYSTACK_RELOCATE
1009
+ goto yyoverflowlab;
1010
+ # else
1011
+ /* Extend the stack our own way. */
1012
+ if (YYMAXDEPTH <= yystacksize)
1013
+ goto yyoverflowlab;
1014
+ yystacksize *= 2;
1015
+ if (YYMAXDEPTH < yystacksize)
1016
+ yystacksize = YYMAXDEPTH;
1017
+
1018
+ {
1019
+ short int *yyss1 = yyss;
1020
+ union yyalloc *yyptr =
1021
+ (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
1022
+ if (! yyptr)
1023
+ goto yyoverflowlab;
1024
+ YYSTACK_RELOCATE (yyss);
1025
+ YYSTACK_RELOCATE (yyvs);
1026
+
1027
+ # undef YYSTACK_RELOCATE
1028
+ if (yyss1 != yyssa)
1029
+ YYSTACK_FREE (yyss1);
1030
+ }
1031
+ # endif
1032
+ #endif /* no yyoverflow */
1033
+
1034
+ yyssp = yyss + yysize - 1;
1035
+ yyvsp = yyvs + yysize - 1;
1036
+
1037
+
1038
+ YYDPRINTF ((stderr, "Stack size increased to %lu\n",
1039
+ (unsigned long int) yystacksize));
1040
+
1041
+ if (yyss + yystacksize - 1 <= yyssp)
1042
+ YYABORT;
1043
+ }
1044
+
1045
+ YYDPRINTF ((stderr, "Entering state %d\n", yystate));
1046
+
1047
+ goto yybackup;
1048
+
1049
+ /*-----------.
1050
+ | yybackup. |
1051
+ `-----------*/
1052
+ yybackup:
1053
+
1054
+ /* Do appropriate processing given the current state. */
1055
+ /* Read a look-ahead token if we need one and don't already have one. */
1056
+ /* yyresume: */
1057
+
1058
+ /* First try to decide what to do without reference to look-ahead token. */
1059
+
1060
+ yyn = yypact[yystate];
1061
+ if (yyn == YYPACT_NINF)
1062
+ goto yydefault;
1063
+
1064
+ /* Not known => get a look-ahead token if don't already have one. */
1065
+
1066
+ /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol. */
1067
+ if (yychar == YYEMPTY)
1068
+ {
1069
+ YYDPRINTF ((stderr, "Reading a token: "));
1070
+ yychar = YYLEX;
1071
+ }
1072
+
1073
+ if (yychar <= YYEOF)
1074
+ {
1075
+ yychar = yytoken = YYEOF;
1076
+ YYDPRINTF ((stderr, "Now at end of input.\n"));
1077
+ }
1078
+ else
1079
+ {
1080
+ yytoken = YYTRANSLATE (yychar);
1081
+ YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
1082
+ }
1083
+
1084
+ /* If the proper action on seeing token YYTOKEN is to reduce or to
1085
+ detect an error, take that action. */
1086
+ yyn += yytoken;
1087
+ if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
1088
+ goto yydefault;
1089
+ yyn = yytable[yyn];
1090
+ if (yyn <= 0)
1091
+ {
1092
+ if (yyn == 0 || yyn == YYTABLE_NINF)
1093
+ goto yyerrlab;
1094
+ yyn = -yyn;
1095
+ goto yyreduce;
1096
+ }
1097
+
1098
+ if (yyn == YYFINAL)
1099
+ YYACCEPT;
1100
+
1101
+ /* Shift the look-ahead token. */
1102
+ YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
1103
+
1104
+ /* Discard the token being shifted unless it is eof. */
1105
+ if (yychar != YYEOF)
1106
+ yychar = YYEMPTY;
1107
+
1108
+ *++yyvsp = yylval;
1109
+
1110
+
1111
+ /* Count tokens shifted since error; after three, turn off error
1112
+ status. */
1113
+ if (yyerrstatus)
1114
+ yyerrstatus--;
1115
+
1116
+ yystate = yyn;
1117
+ goto yynewstate;
1118
+
1119
+
1120
+ /*-----------------------------------------------------------.
1121
+ | yydefault -- do the default action for the current state. |
1122
+ `-----------------------------------------------------------*/
1123
+ yydefault:
1124
+ yyn = yydefact[yystate];
1125
+ if (yyn == 0)
1126
+ goto yyerrlab;
1127
+ goto yyreduce;
1128
+
1129
+
1130
+ /*-----------------------------.
1131
+ | yyreduce -- Do a reduction. |
1132
+ `-----------------------------*/
1133
+ yyreduce:
1134
+ /* yyn is the number of a rule to reduce with. */
1135
+ yylen = yyr2[yyn];
1136
+
1137
+ /* If YYLEN is nonzero, implement the default value of the action:
1138
+ `$$ = $1'.
1139
+
1140
+ Otherwise, the following line sets YYVAL to garbage.
1141
+ This behavior is undocumented and Bison
1142
+ users should not rely upon it. Assigning to YYVAL
1143
+ unconditionally makes the parser a bit smaller, and it avoids a
1144
+ GCC warning that YYVAL may be used uninitialized. */
1145
+ yyval = yyvsp[1-yylen];
1146
+
1147
+
1148
+ YY_REDUCE_PRINT (yyn);
1149
+ switch (yyn)
1150
+ {
1151
+ case 2:
1152
+ #line 86 "src/query_parser/q_parser.y"
1153
+ { qp->result = (yyval.query) = NULL; }
1154
+ break;
1155
+
1156
+ case 3:
1157
+ #line 87 "src/query_parser/q_parser.y"
1158
+ { qp->result = (yyval.query) = get_bool_q((yyvsp[0].array)); }
1159
+ break;
1160
+
1161
+ case 4:
1162
+ #line 89 "src/query_parser/q_parser.y"
1163
+ { (yyval.array) = first_cls((yyvsp[0].bcls)); }
1164
+ break;
1165
+
1166
+ case 5:
1167
+ #line 90 "src/query_parser/q_parser.y"
1168
+ { (yyval.array) = add_and_cls((yyvsp[-2].array), (yyvsp[0].bcls)); }
1169
+ break;
1170
+
1171
+ case 6:
1172
+ #line 91 "src/query_parser/q_parser.y"
1173
+ { (yyval.array) = add_or_cls((yyvsp[-2].array), (yyvsp[0].bcls)); }
1174
+ break;
1175
+
1176
+ case 7:
1177
+ #line 92 "src/query_parser/q_parser.y"
1178
+ { (yyval.array) = add_default_cls(qp, (yyvsp[-1].array), (yyvsp[0].bcls)); }
1179
+ break;
1180
+
1181
+ case 8:
1182
+ #line 94 "src/query_parser/q_parser.y"
1183
+ { (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST); }
1184
+ break;
1185
+
1186
+ case 9:
1187
+ #line 95 "src/query_parser/q_parser.y"
1188
+ { (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST_NOT); }
1189
+ break;
1190
+
1191
+ case 10:
1192
+ #line 96 "src/query_parser/q_parser.y"
1193
+ { (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_SHOULD); }
1194
+ break;
1195
+
1196
+ case 12:
1197
+ #line 99 "src/query_parser/q_parser.y"
1198
+ { if ((yyvsp[-2].query)) sscanf((yyvsp[0].str),"%f",&((yyvsp[-2].query)->boost)); (yyval.query)=(yyvsp[-2].query); }
1199
+ break;
1200
+
1201
+ case 14:
1202
+ #line 102 "src/query_parser/q_parser.y"
1203
+ { (yyval.query) = get_bool_q((yyvsp[-1].array)); }
1204
+ break;
1205
+
1206
+ case 19:
1207
+ #line 108 "src/query_parser/q_parser.y"
1208
+ { FLDS((yyval.query), get_term_q(qp, field, (yyvsp[0].str))); }
1209
+ break;
1210
+
1211
+ case 20:
1212
+ #line 109 "src/query_parser/q_parser.y"
1213
+ { FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-2].str), (yyvsp[0].str))); }
1214
+ break;
1215
+
1216
+ case 21:
1217
+ #line 110 "src/query_parser/q_parser.y"
1218
+ { FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-1].str), NULL)); }
1219
+ break;
1220
+
1221
+ case 22:
1222
+ #line 112 "src/query_parser/q_parser.y"
1223
+ { FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[0].str))); }
1224
+ break;
1225
+
1226
+ case 23:
1227
+ #line 114 "src/query_parser/q_parser.y"
1228
+ { qp->fields = qp->def_fields; }
1229
+ break;
1230
+
1231
+ case 24:
1232
+ #line 115 "src/query_parser/q_parser.y"
1233
+ { (yyval.query) = (yyvsp[-1].query); }
1234
+ break;
1235
+
1236
+ case 25:
1237
+ #line 116 "src/query_parser/q_parser.y"
1238
+ { qp->fields = qp->all_fields; }
1239
+ break;
1240
+
1241
+ case 26:
1242
+ #line 116 "src/query_parser/q_parser.y"
1243
+ {qp->fields = qp->def_fields;}
1244
+ break;
1245
+
1246
+ case 27:
1247
+ #line 117 "src/query_parser/q_parser.y"
1248
+ { (yyval.query) = (yyvsp[-1].query); }
1249
+ break;
1250
+
1251
+ case 28:
1252
+ #line 119 "src/query_parser/q_parser.y"
1253
+ { (yyval.hashset) = first_field(qp, (yyvsp[0].str)); }
1254
+ break;
1255
+
1256
+ case 29:
1257
+ #line 120 "src/query_parser/q_parser.y"
1258
+ { (yyval.hashset) = add_field(qp, (yyvsp[0].str));}
1259
+ break;
1260
+
1261
+ case 30:
1262
+ #line 122 "src/query_parser/q_parser.y"
1263
+ { (yyval.query) = get_phrase_q(qp, (yyvsp[-1].phrase), NULL); }
1264
+ break;
1265
+
1266
+ case 31:
1267
+ #line 123 "src/query_parser/q_parser.y"
1268
+ { (yyval.query) = get_phrase_q(qp, (yyvsp[-3].phrase), (yyvsp[0].str)); }
1269
+ break;
1270
+
1271
+ case 32:
1272
+ #line 124 "src/query_parser/q_parser.y"
1273
+ { (yyval.query) = NULL; }
1274
+ break;
1275
+
1276
+ case 33:
1277
+ #line 125 "src/query_parser/q_parser.y"
1278
+ { (yyval.query) = NULL; }
1279
+ break;
1280
+
1281
+ case 34:
1282
+ #line 127 "src/query_parser/q_parser.y"
1283
+ { (yyval.phrase) = ph_first_word((yyvsp[0].str)); }
1284
+ break;
1285
+
1286
+ case 35:
1287
+ #line 128 "src/query_parser/q_parser.y"
1288
+ { (yyval.phrase) = ph_first_word(NULL); }
1289
+ break;
1290
+
1291
+ case 36:
1292
+ #line 129 "src/query_parser/q_parser.y"
1293
+ { (yyval.phrase) = ph_add_word((yyvsp[-1].phrase), (yyvsp[0].str)); }
1294
+ break;
1295
+
1296
+ case 37:
1297
+ #line 130 "src/query_parser/q_parser.y"
1298
+ { (yyval.phrase) = ph_add_word((yyvsp[-2].phrase), NULL); }
1299
+ break;
1300
+
1301
+ case 38:
1302
+ #line 131 "src/query_parser/q_parser.y"
1303
+ { (yyval.phrase) = ph_add_multi_word((yyvsp[-2].phrase), (yyvsp[0].str)); }
1304
+ break;
1305
+
1306
+ case 39:
1307
+ #line 133 "src/query_parser/q_parser.y"
1308
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, true)); }
1309
+ break;
1310
+
1311
+ case 40:
1312
+ #line 134 "src/query_parser/q_parser.y"
1313
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, false)); }
1314
+ break;
1315
+
1316
+ case 41:
1317
+ #line 135 "src/query_parser/q_parser.y"
1318
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, true)); }
1319
+ break;
1320
+
1321
+ case 42:
1322
+ #line 136 "src/query_parser/q_parser.y"
1323
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, false)); }
1324
+ break;
1325
+
1326
+ case 43:
1327
+ #line 137 "src/query_parser/q_parser.y"
1328
+ { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, false)); }
1329
+ break;
1330
+
1331
+ case 44:
1332
+ #line 138 "src/query_parser/q_parser.y"
1333
+ { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, true)); }
1334
+ break;
1335
+
1336
+ case 45:
1337
+ #line 139 "src/query_parser/q_parser.y"
1338
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,true, false)); }
1339
+ break;
1340
+
1341
+ case 46:
1342
+ #line 140 "src/query_parser/q_parser.y"
1343
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,false, false)); }
1344
+ break;
1345
+
1346
+ case 47:
1347
+ #line 141 "src/query_parser/q_parser.y"
1348
+ { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, false)); }
1349
+ break;
1350
+
1351
+ case 48:
1352
+ #line 142 "src/query_parser/q_parser.y"
1353
+ { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, true)); }
1354
+ break;
1355
+
1356
+ case 49:
1357
+ #line 143 "src/query_parser/q_parser.y"
1358
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,true, false)); }
1359
+ break;
1360
+
1361
+ case 50:
1362
+ #line 144 "src/query_parser/q_parser.y"
1363
+ { FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,false, false)); }
1364
+ break;
1365
+
1366
+
1367
+ }
1368
+
1369
+ /* Line 1037 of yacc.c. */
1370
+ #line 1371 "y.tab.c"
1371
+
1372
+ yyvsp -= yylen;
1373
+ yyssp -= yylen;
1374
+
1375
+
1376
+ YY_STACK_PRINT (yyss, yyssp);
1377
+
1378
+ *++yyvsp = yyval;
1379
+
1380
+
1381
+ /* Now `shift' the result of the reduction. Determine what state
1382
+ that goes to, based on the state we popped back to and the rule
1383
+ number reduced by. */
1384
+
1385
+ yyn = yyr1[yyn];
1386
+
1387
+ yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
1388
+ if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
1389
+ yystate = yytable[yystate];
1390
+ else
1391
+ yystate = yydefgoto[yyn - YYNTOKENS];
1392
+
1393
+ goto yynewstate;
1394
+
1395
+
1396
+ /*------------------------------------.
1397
+ | yyerrlab -- here on detecting error |
1398
+ `------------------------------------*/
1399
+ yyerrlab:
1400
+ /* If not already recovering from an error, report this error. */
1401
+ if (!yyerrstatus)
1402
+ {
1403
+ ++yynerrs;
1404
+ #if YYERROR_VERBOSE
1405
+ yyn = yypact[yystate];
1406
+
1407
+ if (YYPACT_NINF < yyn && yyn < YYLAST)
1408
+ {
1409
+ YYSIZE_T yysize = 0;
1410
+ int yytype = YYTRANSLATE (yychar);
1411
+ const char* yyprefix;
1412
+ char *yymsg;
1413
+ int yyx;
1414
+
1415
+ /* Start YYX at -YYN if negative to avoid negative indexes in
1416
+ YYCHECK. */
1417
+ int yyxbegin = yyn < 0 ? -yyn : 0;
1418
+
1419
+ /* Stay within bounds of both yycheck and yytname. */
1420
+ int yychecklim = YYLAST - yyn;
1421
+ int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
1422
+ int yycount = 0;
1423
+
1424
+ yyprefix = ", expecting ";
1425
+ for (yyx = yyxbegin; yyx < yyxend; ++yyx)
1426
+ if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
1427
+ {
1428
+ yysize += yystrlen (yyprefix) + yystrlen (yytname [yyx]);
1429
+ yycount += 1;
1430
+ if (yycount == 5)
1431
+ {
1432
+ yysize = 0;
1433
+ break;
1434
+ }
1435
+ }
1436
+ yysize += (sizeof ("syntax error, unexpected ")
1437
+ + yystrlen (yytname[yytype]));
1438
+ yymsg = (char *) YYSTACK_ALLOC (yysize);
1439
+ if (yymsg != 0)
1440
+ {
1441
+ char *yyp = yystpcpy (yymsg, "syntax error, unexpected ");
1442
+ yyp = yystpcpy (yyp, yytname[yytype]);
1443
+
1444
+ if (yycount < 5)
1445
+ {
1446
+ yyprefix = ", expecting ";
1447
+ for (yyx = yyxbegin; yyx < yyxend; ++yyx)
1448
+ if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
1449
+ {
1450
+ yyp = yystpcpy (yyp, yyprefix);
1451
+ yyp = yystpcpy (yyp, yytname[yyx]);
1452
+ yyprefix = " or ";
1453
+ }
1454
+ }
1455
+ yyerror (qp, yymsg);
1456
+ YYSTACK_FREE (yymsg);
1457
+ }
1458
+ else
1459
+ yyerror (qp, "syntax error; also virtual memory exhausted");
1460
+ }
1461
+ else
1462
+ #endif /* YYERROR_VERBOSE */
1463
+ yyerror (qp, "syntax error");
1464
+ }
1465
+
1466
+
1467
+
1468
+ if (yyerrstatus == 3)
1469
+ {
1470
+ /* If just tried and failed to reuse look-ahead token after an
1471
+ error, discard it. */
1472
+
1473
+ if (yychar <= YYEOF)
1474
+ {
1475
+ /* If at end of input, pop the error token,
1476
+ then the rest of the stack, then return failure. */
1477
+ if (yychar == YYEOF)
1478
+ for (;;)
1479
+ {
1480
+
1481
+ YYPOPSTACK;
1482
+ if (yyssp == yyss)
1483
+ YYABORT;
1484
+ yydestruct ("Error: popping",
1485
+ yystos[*yyssp], yyvsp);
1486
+ }
1487
+ }
1488
+ else
1489
+ {
1490
+ yydestruct ("Error: discarding", yytoken, &yylval);
1491
+ yychar = YYEMPTY;
1492
+ }
1493
+ }
1494
+
1495
+ /* Else will try to reuse look-ahead token after shifting the error
1496
+ token. */
1497
+ goto yyerrlab1;
1498
+
1499
+
1500
+ /*---------------------------------------------------.
1501
+ | yyerrorlab -- error raised explicitly by YYERROR. |
1502
+ `---------------------------------------------------*/
1503
+ yyerrorlab:
1504
+
1505
+ #ifdef __GNUC__
1506
+ /* Pacify GCC when the user code never invokes YYERROR and the label
1507
+ yyerrorlab therefore never appears in user code. */
1508
+ if (0)
1509
+ goto yyerrorlab;
1510
+ #endif
1511
+
1512
+ yyvsp -= yylen;
1513
+ yyssp -= yylen;
1514
+ yystate = *yyssp;
1515
+ goto yyerrlab1;
1516
+
1517
+
1518
+ /*-------------------------------------------------------------.
1519
+ | yyerrlab1 -- common code for both syntax error and YYERROR. |
1520
+ `-------------------------------------------------------------*/
1521
+ yyerrlab1:
1522
+ yyerrstatus = 3; /* Each real token shifted decrements this. */
1523
+
1524
+ for (;;)
1525
+ {
1526
+ yyn = yypact[yystate];
1527
+ if (yyn != YYPACT_NINF)
1528
+ {
1529
+ yyn += YYTERROR;
1530
+ if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
1531
+ {
1532
+ yyn = yytable[yyn];
1533
+ if (0 < yyn)
1534
+ break;
1535
+ }
1536
+ }
1537
+
1538
+ /* Pop the current state because it cannot handle the error token. */
1539
+ if (yyssp == yyss)
1540
+ YYABORT;
1541
+
1542
+
1543
+ yydestruct ("Error: popping", yystos[yystate], yyvsp);
1544
+ YYPOPSTACK;
1545
+ yystate = *yyssp;
1546
+ YY_STACK_PRINT (yyss, yyssp);
1547
+ }
1548
+
1549
+ if (yyn == YYFINAL)
1550
+ YYACCEPT;
1551
+
1552
+ *++yyvsp = yylval;
1553
+
1554
+
1555
+ /* Shift the error token. */
1556
+ YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
1557
+
1558
+ yystate = yyn;
1559
+ goto yynewstate;
1560
+
1561
+
1562
+ /*-------------------------------------.
1563
+ | yyacceptlab -- YYACCEPT comes here. |
1564
+ `-------------------------------------*/
1565
+ yyacceptlab:
1566
+ yyresult = 0;
1567
+ goto yyreturn;
1568
+
1569
+ /*-----------------------------------.
1570
+ | yyabortlab -- YYABORT comes here. |
1571
+ `-----------------------------------*/
1572
+ yyabortlab:
1573
+ yydestruct ("Error: discarding lookahead",
1574
+ yytoken, &yylval);
1575
+ yychar = YYEMPTY;
1576
+ yyresult = 1;
1577
+ goto yyreturn;
1578
+
1579
+ #ifndef yyoverflow
1580
+ /*----------------------------------------------.
1581
+ | yyoverflowlab -- parser overflow comes here. |
1582
+ `----------------------------------------------*/
1583
+ yyoverflowlab:
1584
+ yyerror (qp, "parser stack overflow");
1585
+ yyresult = 2;
1586
+ /* Fall through. */
1587
+ #endif
1588
+
1589
+ yyreturn:
1590
+ #ifndef yyoverflow
1591
+ if (yyss != yyssa)
1592
+ YYSTACK_FREE (yyss);
1593
+ #endif
1594
+ return yyresult;
1595
+ }
1596
+
1597
+
1598
+ #line 146 "src/query_parser/q_parser.y"
1599
+
1600
+
1601
+ const char *special_char = "&:()[]{}!+\"~^-|<>=*?";
1602
+ const char *not_word = " \t&:()[]{}!+\"~^-|<>=";
1603
+
1604
+ int get_word(YYSTYPE *lvalp, QParser *qp)
1605
+ {
1606
+ bool is_wild = false;
1607
+ int len;
1608
+ char c;
1609
+ char *buf = qp->buf[qp->buf_index];
1610
+ char *bufp = buf;
1611
+ qp->buf_index = (qp->buf_index + 1) % CONC_WORDS;
1612
+
1613
+ qp->qstrp--; /* need to back up one character */
1614
+
1615
+ while (!strchr(not_word, (c=*qp->qstrp++))) {
1616
+ switch (c) {
1617
+ case '\\':
1618
+ if ((c=*qp->qstrp) == ' ' && c != '\t' && c != '\0') {
1619
+ *bufp++ = '\\';
1620
+ } else {
1621
+ *bufp++ = c;
1622
+ qp->qstrp++;
1623
+ }
1624
+ break;
1625
+ case '*': case '?':
1626
+ is_wild = true;
1627
+ default:
1628
+ *bufp++ = c;
1629
+ }
1630
+ }
1631
+ qp->qstrp--;
1632
+ /* check for keywords. There are only four so we have a bit of a hack which
1633
+ * just checks for all of them. */
1634
+ *bufp = '\0';
1635
+ len = bufp - buf;
1636
+ if (len == 3) {
1637
+ if (buf[0] == 'A' && buf[1] == 'N' && buf[2] == 'D') return AND;
1638
+ if (buf[0] == 'N' && buf[1] == 'O' && buf[2] == 'T') return NOT;
1639
+ if (buf[0] == 'R' && buf[1] == 'E' && buf[2] == 'Q') return REQ;
1640
+ }
1641
+ if (len == 2 && buf[0] == 'O' && buf[1] == 'R') return OR;
1642
+
1643
+ /* found a word so return it. */
1644
+ lvalp->str = buf;
1645
+ if (is_wild) return WILD_STR;
1646
+ return WORD;
1647
+ }
1648
+
1649
+ int yylex(YYSTYPE *lvalp, QParser *qp)
1650
+ {
1651
+ char c, nc;
1652
+
1653
+ while ((c=*qp->qstrp++) == ' ' || c == '\t')
1654
+ ;
1655
+ if (c == '\0')
1656
+ return 0;
1657
+
1658
+ if (strchr(special_char, c)) { /* comment */
1659
+ nc = *qp->qstrp;
1660
+ switch (c) {
1661
+ case '-': case '!': return NOT;
1662
+ case '+': return REQ;
1663
+ case '*':
1664
+ if (nc == ':') return c;
1665
+ break;
1666
+ case '&':
1667
+ if (nc == '&') {
1668
+ qp->qstrp++;
1669
+ return AND;
1670
+ }
1671
+ break; /* Don't return single & character. Use in word. */
1672
+ case '|':
1673
+ if (nc == '|') {
1674
+ qp->qstrp++;
1675
+ return OR;
1676
+ }
1677
+ default:
1678
+ return c;
1679
+ }
1680
+ }
1681
+
1682
+ return get_word(lvalp, qp);
1683
+ }
1684
+
1685
+ int yyerror(QParser *qp, char const *msg)
1686
+ {
1687
+ if (!qp->handle_parse_errors) {
1688
+ if (qp->clean_str) free(qp->qstr);
1689
+ eprintf(PARSE_ERROR, msg);
1690
+ }
1691
+ return 0;
1692
+ }
1693
+
1694
+
1695
+ Query *get_bool_q(Array *bclauses)
1696
+ {
1697
+ Query *q;
1698
+ BooleanQuery *bq;
1699
+ BooleanClause *bc;
1700
+
1701
+ if (bclauses->size == 0) {
1702
+ ary_destroy(bclauses);
1703
+ q = NULL;
1704
+ } else if (bclauses->size == 1) {
1705
+ bc = (BooleanClause *)bclauses->elems[0];
1706
+ q = bc->query;
1707
+ free(bc);
1708
+ ary_destroy(bclauses);
1709
+ } else {
1710
+ q = bq_create(false);
1711
+ /* copy clauses into query */
1712
+ bq = (BooleanQuery *)q->data;
1713
+ bq->clause_cnt = bclauses->size;
1714
+ bq->clause_capa = bclauses->allocated;
1715
+ free(bq->clauses);
1716
+ bq->clauses = (BooleanClause **)bclauses->elems;
1717
+ free(bclauses);
1718
+ }
1719
+ return q;
1720
+ }
1721
+
1722
+
1723
+ Array *first_cls(BooleanClause *cls)
1724
+ {
1725
+ Array *clauses = ary_create(0, NULL);
1726
+ if (cls) ary_append(clauses, cls);
1727
+ return clauses;
1728
+ }
1729
+
1730
+ Array *add_and_cls(Array *clauses, BooleanClause *clause)
1731
+ {
1732
+ if (clause) {
1733
+ BooleanClause *last_cl;
1734
+ if (clauses->size == 1) {
1735
+ last_cl = clauses->elems[0];
1736
+ if (!last_cl->is_prohibited) bc_set_occur(last_cl, BC_MUST);
1737
+ }
1738
+
1739
+ if (!clause->is_prohibited) bc_set_occur(clause, BC_MUST);
1740
+ ary_append(clauses, clause);
1741
+ }
1742
+ return clauses;
1743
+ }
1744
+
1745
+ Array *add_or_cls(Array *clauses, BooleanClause *clause)
1746
+ {
1747
+ if (clause) ary_append(clauses, clause);
1748
+ return clauses;
1749
+ }
1750
+
1751
+ Array *add_default_cls(QParser *qp, Array *clauses, BooleanClause *clause)
1752
+ {
1753
+ if (qp->or_default) {
1754
+ add_or_cls(clauses, clause);
1755
+ } else {
1756
+ add_and_cls(clauses, clause);
1757
+ }
1758
+ return clauses;
1759
+ }
1760
+
1761
+ BooleanClause *get_bool_cls(Query *q, unsigned int occur)
1762
+ {
1763
+ if (q) return bc_create(q, occur);
1764
+ else return NULL;
1765
+ }
1766
+
1767
+ Query *get_term_q(QParser *qp, char *field, char *word)
1768
+ {
1769
+ Query *q;
1770
+ Token *token;
1771
+ TokenStream *stream = a_get_ts(qp->analyzer, field, word);
1772
+
1773
+ if ((token = ts_next(stream)) == NULL) {
1774
+ q = NULL;
1775
+ } else {
1776
+ Term *term = term_create(field, token->text);
1777
+ if ((token = ts_next(stream)) == NULL) {
1778
+ q = tq_create(term);
1779
+ } else {
1780
+ q = phq_create();
1781
+ phq_add_term(q, term, 0);
1782
+ do {
1783
+ phq_add_term(q, term_create(field, token->text), token->pos_inc);
1784
+ } while ((token = ts_next(stream)) != NULL);
1785
+ }
1786
+ }
1787
+ return q;
1788
+ }
1789
+
1790
+ Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
1791
+ {
1792
+ Query *q;
1793
+ Token *token;
1794
+ TokenStream *stream = a_get_ts(qp->analyzer, field, word);
1795
+
1796
+ if ((token = ts_next(stream)) == NULL) {
1797
+ q = NULL;
1798
+ } else {
1799
+ /* it only makes sense to find one term in a fuzzy query */
1800
+ Term *term = term_create(field, token->text);
1801
+ if (slop_str) {
1802
+ float slop;
1803
+ sscanf(slop_str, "%f", &slop);
1804
+ q = fuzq_create_mp(term, slop, DEF_PRE_LEN);
1805
+ } else {
1806
+ q = fuzq_create(term);
1807
+ }
1808
+ }
1809
+ return q;
1810
+ }
1811
+
1812
+ Query *get_wild_q(QParser *qp, char *field, char *pattern)
1813
+ {
1814
+ if (qp->wild_lower) lower_str(pattern);
1815
+ return wcq_create(term_create(field, pattern));;
1816
+ }
1817
+
1818
+ HashSet *add_field(QParser *qp, char *field)
1819
+ {
1820
+ char *orig_field;
1821
+ if ((orig_field = hs_orig(qp->all_fields, field)) != NULL) {
1822
+ hs_add(qp->fields, orig_field);
1823
+ } else if (qp->allow_any_fields) {
1824
+ field = estrdup(field);
1825
+ hs_add(qp->all_fields, field);
1826
+ hs_add(qp->fields, field);
1827
+ }
1828
+ return qp->fields;
1829
+ }
1830
+
1831
+ HashSet *first_field(QParser *qp, char *field)
1832
+ {
1833
+ qp->fields = qp->fields_buf;
1834
+ qp->fields->size = 0;
1835
+ h_clear(qp->fields->ht);
1836
+ return add_field(qp, field);
1837
+ }
1838
+
1839
+ void ph_destroy(Phrase *self)
1840
+ {
1841
+ int i, j;
1842
+ for (i = 0; i < self->cnt; i++) {
1843
+ for (j = 0; j < self->w_cnt[i]; j++) {
1844
+ free(self->words[i][j]);
1845
+ }
1846
+ free(self->words[i]);
1847
+ }
1848
+ free(self->words);
1849
+ free(self->w_cnt);
1850
+ free(self->w_capa);
1851
+ free(self);
1852
+ }
1853
+
1854
+
1855
+ Phrase *ph_create()
1856
+ {
1857
+ Phrase *self = ALLOC(Phrase);
1858
+ self->cnt = 0;
1859
+ self->capa = PHRASE_INIT_CAPA;
1860
+ self->words = ALLOC_N(char **, PHRASE_INIT_CAPA);
1861
+ self->w_cnt = ALLOC_N(int, PHRASE_INIT_CAPA);
1862
+ self->w_capa = ALLOC_N(int, PHRASE_INIT_CAPA);
1863
+ return self;
1864
+ }
1865
+
1866
+ Phrase *ph_first_word(char *word)
1867
+ {
1868
+ Phrase *self = ph_create();
1869
+ if (word) { /* no point in adding NULL in start */
1870
+ self->words[0] = ALLOC(char *);
1871
+ self->words[0][0] = estrdup(word);
1872
+ self->w_cnt[0] = self->w_capa[0] = 1;
1873
+ self->cnt = 1;
1874
+ }
1875
+ return self;
1876
+ }
1877
+
1878
+ Phrase *ph_add_word(Phrase *self, char *word)
1879
+ {
1880
+ int i;
1881
+ if (self->cnt == self->capa) {
1882
+ self->capa <<= 1;
1883
+ REALLOC_N(self->words, char **, self->capa);
1884
+ REALLOC_N(self->w_cnt, int, self->capa);
1885
+ REALLOC_N(self->w_capa, int, self->capa);
1886
+ }
1887
+ i = self->cnt;
1888
+ self->cnt++;
1889
+ self->words[i] = ALLOC(char *);
1890
+ self->words[i][0] = word ? estrdup(word) : NULL;
1891
+ self->w_cnt[i] = self->w_capa[i] = 1;
1892
+ return self;
1893
+ }
1894
+
1895
+ Phrase *ph_add_multi_word(Phrase *self, char *word)
1896
+ {
1897
+ int i = self->cnt - 1;
1898
+
1899
+ if (!word) return self; /* no point in adding NULL in multi */
1900
+
1901
+ if (self->w_cnt[i] >= self->w_capa[i]) {
1902
+ self->w_capa[i] <<= 1;
1903
+ REALLOC_N(self->words[i], char *, self->w_capa[i]);
1904
+ }
1905
+ self->words[i][self->w_cnt[i]] = estrdup(word);
1906
+ self->w_cnt[i]++;
1907
+ return self;
1908
+ }
1909
+
1910
+ Query *get_normal_phrase_query(QParser *qp, char *field, Phrase *phrase, int slop)
1911
+ {
1912
+ int pos_inc = 0;
1913
+ int i;
1914
+ Token *token;
1915
+ TokenStream *stream;
1916
+ char *word;
1917
+
1918
+ Query *pq = phq_create();
1919
+ ((PhraseQuery *)pq->data)->slop = slop;
1920
+
1921
+ for (i = 0; i < phrase->cnt; i++) {
1922
+ word = phrase->words[i][0];
1923
+ if (!word) {
1924
+ pos_inc++;
1925
+ } else {
1926
+ stream = a_get_ts(qp->analyzer, field, word);
1927
+ while ((token = ts_next(stream))) {
1928
+ phq_add_term(pq, term_create(field, token->text),
1929
+ token->pos_inc + pos_inc);
1930
+ pos_inc = 0;
1931
+ }
1932
+ }
1933
+ }
1934
+ return pq;
1935
+ }
1936
+
1937
+ Query *get_multi_phrase_query(QParser *qp, char *field, Phrase *phrase, int slop)
1938
+ {
1939
+ int i, j;
1940
+ int pos_inc = 0;
1941
+ Token *token;
1942
+ TokenStream *stream;
1943
+ char *word;
1944
+ Term **terms = NULL;
1945
+ int t_cnt;
1946
+
1947
+ Query *mpq = mphq_create();
1948
+ ((MultiPhraseQuery *)mpq->data)->slop = slop;
1949
+
1950
+ for (i = 0; i < phrase->cnt; i++) {
1951
+ word = phrase->words[i][0];
1952
+ if (!word) {
1953
+ pos_inc++;
1954
+ } else {
1955
+ t_cnt = phrase->w_cnt[i];
1956
+ if (t_cnt > 1) {
1957
+ terms = ALLOC_N(Term *, t_cnt);
1958
+ for (j = 0; j < t_cnt; j++) {
1959
+ word = phrase->words[i][j];
1960
+ stream = a_get_ts(qp->analyzer, field, word);
1961
+ if ((token = ts_next(stream))) {
1962
+ terms[j] = term_create(field, token->text);
1963
+ } else {
1964
+ t_cnt--; j--;
1965
+ }
1966
+ }
1967
+ /* must advance at least one */
1968
+ mphq_add_terms(mpq, terms, t_cnt, pos_inc + 1);
1969
+ } else {
1970
+ stream = a_get_ts(qp->analyzer, field, word);
1971
+ while ((token = ts_next(stream))) {
1972
+ terms = ALLOC(Term *);
1973
+ terms[0] = term_create(field, token->text);
1974
+ mphq_add_terms(mpq, terms, 1, token->pos_inc + pos_inc);
1975
+ pos_inc = 0;
1976
+ }
1977
+ }
1978
+ }
1979
+ }
1980
+ return mpq;
1981
+ }
1982
+
1983
+ Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop_str)
1984
+ {
1985
+ Query *q;
1986
+ int i, j;
1987
+ int slop;
1988
+
1989
+ if (phrase->cnt == 0) {
1990
+ q = NULL;
1991
+ } else if (phrase->cnt == 1) {
1992
+ if (phrase->w_cnt[0] == 1) {
1993
+ FLDS(q, get_term_q(qp, field, phrase->words[0][0]));
1994
+ } else {
1995
+ Query *bq;
1996
+ q = bq_create(false);
1997
+ for (j = 0; j < phrase->w_cnt[0]; j++) {
1998
+ FLDS(bq, tq_create(term_create(field, phrase->words[0][j])));
1999
+ if (bq) bq_add_query(q, bq, BC_SHOULD);
2000
+ }
2001
+ }
2002
+ } else {
2003
+ bool multi_phrase = false;
2004
+ for (i = 0; i < phrase->cnt; i++) {
2005
+ if (phrase->w_cnt[i] > 1) multi_phrase = true;
2006
+ }
2007
+ slop = qp->def_slop;
2008
+ if (slop_str) sscanf(slop_str, "%d", &slop);
2009
+ if (multi_phrase) {
2010
+ FLDS(q, get_multi_phrase_query(qp, field, phrase, slop));
2011
+ } else {
2012
+ FLDS(q, get_normal_phrase_query(qp, field, phrase, slop));
2013
+ }
2014
+ }
2015
+ ph_destroy(phrase);
2016
+ return q;
2017
+ }
2018
+
2019
+ Query *get_range_q(char *field, char *from, char *to, bool inc_lower, bool inc_upper)
2020
+ {
2021
+ return rq_create(field, from, to, inc_lower, inc_upper);
2022
+ }
2023
+
2024
+ void qp_destroy(void *p)
2025
+ {
2026
+ QParser *self = (QParser *)p;
2027
+ if (self->close_def_fields) hs_destroy_all(self->def_fields);
2028
+ hs_destroy_all(self->all_fields);
2029
+ hs_destroy(self->fields_buf);
2030
+ free(self);
2031
+ }
2032
+
2033
+ QParser *qp_create(HashSet *all_fields, HashSet *def_fields, Analyzer *analyzer)
2034
+ {
2035
+ int i;
2036
+ QParser *self = ALLOC(QParser);
2037
+ self->or_default = true;
2038
+ self->wild_lower = true;
2039
+ self->clean_str = false;
2040
+ self->handle_parse_errors = false;
2041
+ self->allow_any_fields = false;
2042
+ self->def_slop = 0;
2043
+ self->fields_buf = hs_str_create(NULL);
2044
+ self->all_fields = all_fields;
2045
+ if (def_fields) {
2046
+ self->def_fields = def_fields;
2047
+ for (i = 0; i < self->def_fields->size; i++) {
2048
+ if (!hs_exists(self->all_fields, self->def_fields->elems[i])) {
2049
+ hs_add(self->all_fields, estrdup(self->def_fields->elems[i]));
2050
+ }
2051
+ }
2052
+ self->close_def_fields = true;
2053
+ } else {
2054
+ self->def_fields = all_fields;
2055
+ self->close_def_fields = false;
2056
+ }
2057
+ self->fields = self->def_fields;
2058
+ /* make sure all_fields contains the default fields */
2059
+ self->analyzer = analyzer;
2060
+ self->buf_index = 0;
2061
+ return self;
2062
+ }
2063
+
2064
+ /* these chars have meaning within phrases */
2065
+ static const char *PHRASE_CHARS = "<>|\"";
2066
+
2067
+ void str_insert(char *str, int len, char chr)
2068
+ {
2069
+ memmove(str+1, str, len*sizeof(char));
2070
+ *str = chr;
2071
+ }
2072
+
2073
+ char *qp_clean_str(char *str)
2074
+ {
2075
+ int b, pb = -1;
2076
+ int br_cnt = 0;
2077
+ bool quote_open = false;
2078
+ char *sp, *nsp;
2079
+
2080
+ /* leave a little extra */
2081
+ char *new_str = ALLOC_N(char, strlen(str)*2 + 1);
2082
+
2083
+ for (sp = str, nsp = new_str; *sp; sp++) {
2084
+ b = *sp;
2085
+ /* ignore escaped characters */
2086
+ if (pb == '\\') {
2087
+ if (quote_open && index(PHRASE_CHARS, b)) {
2088
+ *nsp++ = '\\'; /* this was left off the first time through */
2089
+ }
2090
+
2091
+ *nsp++ = b;
2092
+ /* \\ has escaped itself so has no power. Assign pb random char : */
2093
+ pb = ((b == '\\') ? ':' : b);
2094
+ continue;
2095
+ }
2096
+ switch (b) {
2097
+ case '\\':
2098
+ if (!quote_open) /* We do our own escaping below */
2099
+ *nsp++ = b;
2100
+ break;
2101
+ case '"':
2102
+ quote_open = !quote_open;
2103
+ *nsp++ = b;
2104
+ break;
2105
+ case '(':
2106
+ if (!quote_open) {
2107
+ br_cnt++;
2108
+ } else {
2109
+ *nsp++ = '\\';
2110
+ }
2111
+ *nsp++ = b;
2112
+ break;
2113
+ case ')':
2114
+ if (!quote_open) {
2115
+ if (br_cnt == 0) {
2116
+ str_insert(new_str, nsp - new_str, '(');
2117
+ nsp++;
2118
+ } else {
2119
+ br_cnt--;
2120
+ }
2121
+ } else {
2122
+ *nsp++ = '\\';
2123
+ }
2124
+ *nsp++ = b;
2125
+ break;
2126
+ case '>':
2127
+ if (quote_open) {
2128
+ if (pb == '<') {
2129
+ /* remove the escape character */
2130
+ nsp--;
2131
+ nsp[-1] = '<';
2132
+ } else {
2133
+ *nsp++ = '\\';
2134
+ }
2135
+ }
2136
+ *nsp++ = b;
2137
+ break;
2138
+ default:
2139
+ if (quote_open) {
2140
+ if (index(special_char, b) && b != '|') {
2141
+ *nsp++ = '\\';
2142
+ }
2143
+ }
2144
+ *nsp++ = b;
2145
+ }
2146
+ pb = b;
2147
+ }
2148
+ if (quote_open) *nsp++ = '"';
2149
+ for (;br_cnt > 0; br_cnt--) {
2150
+ *nsp++ = ')';
2151
+ }
2152
+ *nsp = '\0';
2153
+ return new_str;
2154
+ }
2155
+
2156
+ Query *qp_get_bad_query(QParser *qp, char *str)
2157
+ {
2158
+ Query *q;
2159
+ FLDS(q, get_term_q(qp, field, str));
2160
+ return q;
2161
+ }
2162
+
2163
+ Query *qp_parse(QParser *self, char *qstr)
2164
+ {
2165
+ if (self->clean_str) {
2166
+ self->qstrp = self->qstr = qp_clean_str(qstr);
2167
+ } else {
2168
+ self->qstrp = self->qstr = qstr;
2169
+ }
2170
+ self->fields = self->def_fields;
2171
+ self->result = NULL;
2172
+ yyparse(self);
2173
+ if (!self->result && self->handle_parse_errors)
2174
+ self->result = qp_get_bad_query(self, self->qstr);
2175
+ if (!self->result) self->result = bq_create(false);
2176
+ if (self->clean_str) free(self->qstr);
2177
+ return self->result;
2178
+ }
2179
+
2180
+