isomorfeus-ferret 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (222) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +612 -0
  3. data/README.md +44 -0
  4. data/ext/isomorfeus_ferret_ext/benchmark.c +223 -0
  5. data/ext/isomorfeus_ferret_ext/benchmark.h +45 -0
  6. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +25 -0
  7. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +123 -0
  8. data/ext/isomorfeus_ferret_ext/bm_hash.c +118 -0
  9. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +40 -0
  10. data/ext/isomorfeus_ferret_ext/bm_store.c +93 -0
  11. data/ext/isomorfeus_ferret_ext/email.rl +21 -0
  12. data/ext/isomorfeus_ferret_ext/extconf.rb +5 -0
  13. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -0
  14. data/ext/isomorfeus_ferret_ext/frb_analysis.c +2577 -0
  15. data/ext/isomorfeus_ferret_ext/frb_index.c +3457 -0
  16. data/ext/isomorfeus_ferret_ext/frb_lang.c +9 -0
  17. data/ext/isomorfeus_ferret_ext/frb_lang.h +17 -0
  18. data/ext/isomorfeus_ferret_ext/frb_qparser.c +629 -0
  19. data/ext/isomorfeus_ferret_ext/frb_search.c +4460 -0
  20. data/ext/isomorfeus_ferret_ext/frb_store.c +515 -0
  21. data/ext/isomorfeus_ferret_ext/frb_threading.h +30 -0
  22. data/ext/isomorfeus_ferret_ext/frb_utils.c +1127 -0
  23. data/ext/isomorfeus_ferret_ext/frt_analysis.c +1644 -0
  24. data/ext/isomorfeus_ferret_ext/frt_analysis.h +247 -0
  25. data/ext/isomorfeus_ferret_ext/frt_array.c +124 -0
  26. data/ext/isomorfeus_ferret_ext/frt_array.h +54 -0
  27. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +95 -0
  28. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +586 -0
  29. data/ext/isomorfeus_ferret_ext/frt_compound_io.c +374 -0
  30. data/ext/isomorfeus_ferret_ext/frt_config.h +44 -0
  31. data/ext/isomorfeus_ferret_ext/frt_document.c +134 -0
  32. data/ext/isomorfeus_ferret_ext/frt_document.h +52 -0
  33. data/ext/isomorfeus_ferret_ext/frt_except.c +95 -0
  34. data/ext/isomorfeus_ferret_ext/frt_except.h +188 -0
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +233 -0
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +42 -0
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +157 -0
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +502 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +427 -0
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +290 -0
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +518 -0
  42. data/ext/isomorfeus_ferret_ext/frt_hash.h +466 -0
  43. data/ext/isomorfeus_ferret_ext/frt_hashset.c +191 -0
  44. data/ext/isomorfeus_ferret_ext/frt_hashset.h +206 -0
  45. data/ext/isomorfeus_ferret_ext/frt_helper.c +62 -0
  46. data/ext/isomorfeus_ferret_ext/frt_helper.h +13 -0
  47. data/ext/isomorfeus_ferret_ext/frt_ind.c +353 -0
  48. data/ext/isomorfeus_ferret_ext/frt_ind.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/frt_index.c +6377 -0
  50. data/ext/isomorfeus_ferret_ext/frt_index.h +880 -0
  51. data/ext/isomorfeus_ferret_ext/frt_lang.c +104 -0
  52. data/ext/isomorfeus_ferret_ext/frt_lang.h +44 -0
  53. data/ext/isomorfeus_ferret_ext/frt_mempool.c +87 -0
  54. data/ext/isomorfeus_ferret_ext/frt_mempool.h +33 -0
  55. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +349 -0
  56. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +52 -0
  57. data/ext/isomorfeus_ferret_ext/frt_posh.c +1006 -0
  58. data/ext/isomorfeus_ferret_ext/frt_posh.h +973 -0
  59. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +147 -0
  60. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +147 -0
  61. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1612 -0
  62. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +157 -0
  63. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +209 -0
  64. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +281 -0
  65. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +147 -0
  66. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +672 -0
  67. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +3084 -0
  68. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1182 -0
  69. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +98 -0
  70. data/ext/isomorfeus_ferret_ext/frt_q_range.c +665 -0
  71. data/ext/isomorfeus_ferret_ext/frt_q_span.c +2386 -0
  72. data/ext/isomorfeus_ferret_ext/frt_q_term.c +311 -0
  73. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +166 -0
  74. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +460 -0
  75. data/ext/isomorfeus_ferret_ext/frt_scanner.c +899 -0
  76. data/ext/isomorfeus_ferret_ext/frt_scanner.h +28 -0
  77. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +6705 -0
  78. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +4419 -0
  79. data/ext/isomorfeus_ferret_ext/frt_search.c +1824 -0
  80. data/ext/isomorfeus_ferret_ext/frt_search.h +924 -0
  81. data/ext/isomorfeus_ferret_ext/frt_similarity.c +150 -0
  82. data/ext/isomorfeus_ferret_ext/frt_similarity.h +79 -0
  83. data/ext/isomorfeus_ferret_ext/frt_sort.c +796 -0
  84. data/ext/isomorfeus_ferret_ext/frt_stopwords.c +395 -0
  85. data/ext/isomorfeus_ferret_ext/frt_store.c +680 -0
  86. data/ext/isomorfeus_ferret_ext/frt_store.h +789 -0
  87. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +72 -0
  88. data/ext/isomorfeus_ferret_ext/frt_threading.h +23 -0
  89. data/ext/isomorfeus_ferret_ext/frt_win32.h +54 -0
  90. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +409 -0
  91. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +95 -0
  92. data/ext/isomorfeus_ferret_ext/libstemmer.c +93 -0
  93. data/ext/isomorfeus_ferret_ext/libstemmer.h +73 -0
  94. data/ext/isomorfeus_ferret_ext/q_parser.y +1366 -0
  95. data/ext/isomorfeus_ferret_ext/scanner.h +28 -0
  96. data/ext/isomorfeus_ferret_ext/scanner.in +43 -0
  97. data/ext/isomorfeus_ferret_ext/scanner.rl +84 -0
  98. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +200 -0
  99. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +85 -0
  100. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +324 -0
  101. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +7 -0
  102. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +610 -0
  103. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +6 -0
  104. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +1104 -0
  105. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +6 -0
  106. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +749 -0
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +7 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +1233 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +6 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +490 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +6 -0
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1217 -0
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +7 -0
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +1052 -0
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +6 -0
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +283 -0
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +6 -0
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +735 -0
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +6 -0
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +1003 -0
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +7 -0
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +1079 -0
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +6 -0
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +293 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +984 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +686 -0
  129. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +6 -0
  130. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +325 -0
  131. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +6 -0
  132. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +620 -0
  133. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +6 -0
  134. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +1111 -0
  135. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +6 -0
  136. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +754 -0
  137. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +6 -0
  138. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +1242 -0
  139. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +6 -0
  140. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +495 -0
  141. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +1220 -0
  143. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +6 -0
  144. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +1059 -0
  145. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +6 -0
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +285 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +741 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +1009 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +990 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +680 -0
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +6 -0
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +1083 -0
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +6 -0
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +294 -0
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +6 -0
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +2191 -0
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +6 -0
  162. data/ext/isomorfeus_ferret_ext/stem_api.c +66 -0
  163. data/ext/isomorfeus_ferret_ext/stem_api.h +26 -0
  164. data/ext/isomorfeus_ferret_ext/stem_header.h +57 -0
  165. data/ext/isomorfeus_ferret_ext/stem_modules.h +190 -0
  166. data/ext/isomorfeus_ferret_ext/stem_modules.txt +50 -0
  167. data/ext/isomorfeus_ferret_ext/stem_utilities.c +478 -0
  168. data/ext/isomorfeus_ferret_ext/test.c +850 -0
  169. data/ext/isomorfeus_ferret_ext/test.h +416 -0
  170. data/ext/isomorfeus_ferret_ext/test_1710.c +63 -0
  171. data/ext/isomorfeus_ferret_ext/test_analysis.c +1221 -0
  172. data/ext/isomorfeus_ferret_ext/test_array.c +272 -0
  173. data/ext/isomorfeus_ferret_ext/test_bitvector.c +600 -0
  174. data/ext/isomorfeus_ferret_ext/test_compound_io.c +170 -0
  175. data/ext/isomorfeus_ferret_ext/test_document.c +156 -0
  176. data/ext/isomorfeus_ferret_ext/test_except.c +244 -0
  177. data/ext/isomorfeus_ferret_ext/test_fields.c +522 -0
  178. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +185 -0
  179. data/ext/isomorfeus_ferret_ext/test_filter.c +331 -0
  180. data/ext/isomorfeus_ferret_ext/test_fs_store.c +25 -0
  181. data/ext/isomorfeus_ferret_ext/test_global.c +299 -0
  182. data/ext/isomorfeus_ferret_ext/test_hash.c +485 -0
  183. data/ext/isomorfeus_ferret_ext/test_hashset.c +288 -0
  184. data/ext/isomorfeus_ferret_ext/test_helper.c +47 -0
  185. data/ext/isomorfeus_ferret_ext/test_highlighter.c +548 -0
  186. data/ext/isomorfeus_ferret_ext/test_index.c +2323 -0
  187. data/ext/isomorfeus_ferret_ext/test_lang.c +74 -0
  188. data/ext/isomorfeus_ferret_ext/test_mempool.c +102 -0
  189. data/ext/isomorfeus_ferret_ext/test_multimapper.c +64 -0
  190. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +213 -0
  191. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +84 -0
  192. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +61 -0
  193. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +241 -0
  194. data/ext/isomorfeus_ferret_ext/test_q_parser.c +464 -0
  195. data/ext/isomorfeus_ferret_ext/test_q_span.c +575 -0
  196. data/ext/isomorfeus_ferret_ext/test_ram_store.c +77 -0
  197. data/ext/isomorfeus_ferret_ext/test_search.c +1874 -0
  198. data/ext/isomorfeus_ferret_ext/test_segments.c +167 -0
  199. data/ext/isomorfeus_ferret_ext/test_similarity.c +25 -0
  200. data/ext/isomorfeus_ferret_ext/test_sort.c +333 -0
  201. data/ext/isomorfeus_ferret_ext/test_store.c +591 -0
  202. data/ext/isomorfeus_ferret_ext/test_store.h +3 -0
  203. data/ext/isomorfeus_ferret_ext/test_term.c +351 -0
  204. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +373 -0
  205. data/ext/isomorfeus_ferret_ext/test_test.c +83 -0
  206. data/ext/isomorfeus_ferret_ext/test_threading.c +188 -0
  207. data/ext/isomorfeus_ferret_ext/testhelper.c +561 -0
  208. data/ext/isomorfeus_ferret_ext/testhelper.h +25 -0
  209. data/ext/isomorfeus_ferret_ext/tests_all.h +87 -0
  210. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +1854 -0
  211. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +1999 -0
  212. data/ext/isomorfeus_ferret_ext/url.rl +27 -0
  213. data/ext/isomorfeus_ferret_ext/word_list.h +15156 -0
  214. data/lib/isomorfeus/ferret/document.rb +132 -0
  215. data/lib/isomorfeus/ferret/field_symbol.rb +85 -0
  216. data/lib/isomorfeus/ferret/index/field_infos.rb +48 -0
  217. data/lib/isomorfeus/ferret/index/index.rb +970 -0
  218. data/lib/isomorfeus/ferret/monitor.rb +323 -0
  219. data/lib/isomorfeus/ferret/stdlib_patches.rb +151 -0
  220. data/lib/isomorfeus/ferret/version.rb +5 -0
  221. data/lib/isomorfeus-ferret.rb +8 -0
  222. metadata +307 -0
@@ -0,0 +1,1366 @@
1
+ /*****************************************************************************
2
+ * QueryParser
3
+ * ===========
4
+ *
5
+ * Brief Overview
6
+ * --------------
7
+ *
8
+ * === Creating a QueryParser
9
+ *
10
+ * +qp_new+ allocates a new QueryParser and assigns three very important
11
+ * HashSets; +qp->def_fields+, +qp->tkz_fields+ and +qp->all_fields+. The
12
+ * query language allows you to assign a field or a set of fields to each
13
+ * part of the query.
14
+ *
15
+ * - +qp->def_fields+ is the set of fields that a query is applied to by
16
+ * default when no fields are specified.
17
+ * - +qp->all_fields+ is the set of fields that gets searched when the user
18
+ * requests a search of all fields.
19
+ * - +qp->tkz_fields+ is the set of fields that gets tokenized before being
20
+ * added to the query parser.
21
+ *
22
+ * === qp_parse
23
+ *
24
+ * The main QueryParser method is +qp_parse+. It gets called with a the query
25
+ * string and returns a Query object which can then be passed to the
26
+ * IndexSearcher. The first thing it does is to clean the query string if
27
+ * +qp->clean_str+ is set to true. The cleaning is done with the
28
+ * +qp_clean_str+.
29
+ *
30
+ * It then calls the yacc parser which will set +qp->result+ to the parsed
31
+ * query. If parsing fails in any way, +qp->result+ should be set to NULL, in
32
+ * which case qp_parse does one of two things depending on the value of
33
+ * +qp->handle_parse_errors+;
34
+ *
35
+ * - If it is set to true, qp_parse attempts to do a very basic parsing of
36
+ * the query by ignoring all special characters and parsing the query as
37
+ * a plain boolean query.
38
+ * - If it is set to false, qp_parse will raise a PARSE_ERROR and hopefully
39
+ * free all allocated memory.
40
+ *
41
+ * === The Lexer
42
+ *
43
+ * +yylex+ is the lexing method called by the QueryParser. It breaks the
44
+ * query up into special characters;
45
+ *
46
+ * ( "&:()[]{}!\"~^|<>=*?+-" )
47
+ *
48
+ * and tokens;
49
+ *
50
+ * - QWRD
51
+ * - WILD_STR
52
+ * - AND['AND', '&&']
53
+ * - OR['OR', '||']
54
+ * - REQ['REQ', '+']
55
+ * - NOT['NOT', '-', '~']
56
+ *
57
+ * QWRD tokens are query word tokens which are made up of characters other
58
+ * than the special characters. They can also contain special characters when
59
+ * escaped with a backslash '\'. WILD_STR is the same as QWRD except that it
60
+ * may also contain '?' and '*' characters.
61
+ *
62
+ * === The Parser
63
+ *
64
+ * For a better understanding of the how the query parser works, it is a good
65
+ * idea to study the Ferret Query Language (FQL) described below. Once you
66
+ * understand FQL the one tricky part that needs to be mentioned is how
67
+ * fields are handled. This is where +qp->def_fields+ and +qp->all_fields
68
+ * come into play. When no fields are specified then the default fields are
69
+ * used. The '*:' field specifier will search all fields contained in the
70
+ * all_fields set. Otherwise all fields specified in the field descripter
71
+ * separated by '|' will be searched. For example 'title|content:' will
72
+ * search the title and content fields. When fields are specified like this,
73
+ * the parser will push the fields onto a stack and all queries modified by
74
+ * the field specifier will be applied to the fields on top of the stack.
75
+ * The parser uses the FLDS macro to handle the current fields. It takes the
76
+ * current query building function in the parser and calls it for all the
77
+ * current search fields (on top of the stack).
78
+ *
79
+ * Ferret Query Language (FQL)
80
+ * ---------------------------
81
+ *
82
+ * FIXME to be continued...
83
+ *****************************************************************************/
84
+ %{
85
+ #include <string.h>
86
+ #include <ctype.h>
87
+ #include <wctype.h>
88
+ #include <assert.h>
89
+ #include "frt_global.h"
90
+ #include "frt_except.h"
91
+ #include "frt_search.h"
92
+ #include "frt_array.h"
93
+ #include "frt_internal.h"
94
+
95
+ typedef struct Phrase {
96
+ int size;
97
+ int capa;
98
+ int pos_inc;
99
+ FrtPhrasePosition *positions;
100
+ } Phrase;
101
+
102
+ #define BCA_INIT_CAPA 4
103
+ typedef struct BCArray {
104
+ int size;
105
+ int capa;
106
+ BooleanClause **clauses;
107
+ } BCArray;
108
+
109
+ float qp_default_fuzzy_min_sim = 0.5;
110
+ int qp_default_fuzzy_pre_len = 0;
111
+
112
+ %}
113
+ %union {
114
+ FrtQuery *query;
115
+ BooleanClause *bcls;
116
+ BCArray *bclss;
117
+ FrtHashSet *hashset;
118
+ Phrase *phrase;
119
+ char *str;
120
+ }
121
+ %{
122
+ static int yylex(YYSTYPE *lvalp, FrtQParser *qp);
123
+ static int yyerror(QParser *qp, char const *msg);
124
+
125
+ #define PHRASE_INIT_CAPA 4
126
+ static FrtQuery *get_bool_q(BCArray *bca);
127
+
128
+ static BCArray *first_cls(BooleanClause *boolean_clause);
129
+ static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause);
130
+ static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause);
131
+ static BCArray *add_default_cls(QParser *qp, BCArray *bca,
132
+ BooleanClause *clause);
133
+ static void bca_destroy(BCArray *bca);
134
+
135
+ static BooleanClause *get_bool_cls(FrtQuery *q, BCType occur);
136
+
137
+ static FrtQuery *get_term_q(QParser *qp, FrtSymbol field, char *word);
138
+ static FrtQuery *get_fuzzy_q(QParser *qp, FrtSymbol field, char *word,
139
+ char *slop);
140
+ static FrtQuery *get_wild_q(QParser *qp, FrtSymbol field, char *pattern);
141
+
142
+ static FrtHashSet *first_field(QParser *qp, const char *field);
143
+ static FrtHashSet *add_field(QParser *qp, const char *field);
144
+
145
+ static FrtQuery *get_phrase_q(QParser *qp, Phrase *phrase, char *slop);
146
+
147
+ static Phrase *ph_first_word(char *word);
148
+ static Phrase *ph_add_word(Phrase *self, char *word);
149
+ static Phrase *ph_add_multi_word(Phrase *self, char *word);
150
+ static void ph_destroy(Phrase *self);
151
+
152
+ static FrtQuery *get_r_q(QParser *qp, FrtSymbol field, char *from, char *to,
153
+ bool inc_lower, bool inc_upper);
154
+
155
+ static void qp_push_fields(QParser *self, FrtHashSet *fields, bool destroy);
156
+ static void qp_pop_fields(QParser *self);
157
+
158
+ /**
159
+ * +FLDS+ calls +func+ for all fields on top of the field stack. +func+
160
+ * must return a query. If there is more than one field on top of FieldStack
161
+ * then +FLDS+ will combing all the queries returned by +func+ into a single
162
+ * BooleanQuery which it than assigns to +q+. If there is only one field, the
163
+ * return value of +func+ is assigned to +q+ directly.
164
+ */
165
+ #define FLDS(q, func) do {\
166
+ FRT_TRY {\
167
+ FrtSymbol field;\
168
+ if (qp->fields->size == 0) {\
169
+ q = NULL;\
170
+ } else if (qp->fields->size == 1) {\
171
+ field = (Symbol)qp->fields->first->elem;\
172
+ q = func;\
173
+ } else {\
174
+ FrtQuery *volatile sq; FrtHashSetEntry *volatile hse;\
175
+ q = bq_new_max(false, qp->max_clauses);\
176
+ for (hse = qp->fields->first; hse; hse = hse->next) {\
177
+ field = (Symbol)hse->elem;\
178
+ sq = func;\
179
+ FRT_TRY\
180
+ if (sq) frt_bq_add_query_nr(q, sq, FRT_BC_SHOULD);\
181
+ FRT_XCATCHALL\
182
+ if (sq) frt_q_deref(sq);\
183
+ FRT_XENDTRY\
184
+ }\
185
+ if (((FrtBooleanQuery *)q)->clause_cnt == 0) {\
186
+ frt_q_deref(q);\
187
+ q = NULL;\
188
+ }\
189
+ }\
190
+ } FRT_XCATCHALL\
191
+ qp->destruct = true;\
192
+ FRT_HANDLED();\
193
+ FRT_XENDTRY\
194
+ if (qp->destruct && !qp->recovering && q) {q_deref(q); q = NULL;}\
195
+ } while (0)
196
+
197
+ #define Y if (qp->destruct) goto yyerrorlab;
198
+ #define T FRT_TRY
199
+ #define E\
200
+ FRT_XCATCHALL\
201
+ qp->destruct = true;\
202
+ FRT_HANDLED();\
203
+ FRT_XENDTRY\
204
+ if (qp->destruct) Y;
205
+ %}
206
+ %expect 1
207
+ %pure-parser
208
+ %parse-param { FrtQParser *qp }
209
+ %lex-param { FrtQParser *qp }
210
+ %token <str> QWRD WILD_STR
211
+ %type <query> q bool_q boosted_q term_q wild_q field_q phrase_q range_q
212
+ %type <bcls> bool_cls
213
+ %type <bclss> bool_clss
214
+ %type <hashset> field
215
+ %type <phrase> ph_words
216
+ %nonassoc LOW
217
+ %left AND OR
218
+ %nonassoc REQ NOT
219
+ %left ':'
220
+ %nonassoc HIGH
221
+ %destructor { if ($$ && qp->destruct) frt_q_deref($$); } q bool_q boosted_q term_q wild_q field_q phrase_q range_q
222
+ %destructor { if ($$ && qp->destruct) bc_deref($$); } bool_cls
223
+ %destructor { if ($$ && qp->destruct) bca_destroy($$); } bool_clss
224
+ %destructor { if ($$ && qp->destruct) ph_destroy($$); } ph_words
225
+ %%
226
+ bool_q : /* Nothing */ { qp->result = $$ = NULL; }
227
+ | bool_clss { T qp->result = $$ = get_bool_q($1); E }
228
+ ;
229
+ bool_clss : bool_cls { T $$ = first_cls($1); E }
230
+ | bool_clss AND bool_cls { T $$ = add_and_cls($1, $3); E }
231
+ | bool_clss OR bool_cls { T $$ = add_or_cls($1, $3); E }
232
+ | bool_clss bool_cls { T $$ = add_default_cls(qp, $1, $2); E }
233
+ ;
234
+ bool_cls : REQ boosted_q { T $$ = get_bool_cls($2, FRT_BC_MUST); E }
235
+ | NOT boosted_q { T $$ = get_bool_cls($2, FRT_BC_MUST_NOT); E }
236
+ | boosted_q { T $$ = get_bool_cls($1, FRT_BC_SHOULD); E }
237
+ ;
238
+ boosted_q : q
239
+ | q '^' QWRD { T if ($1) sscanf($3,"%f",&($1->boost)); $$=$1; E }
240
+ ;
241
+ q : term_q
242
+ | '(' ')' { T $$ = bq_new_max(true, qp->max_clauses); E }
243
+ | '(' bool_clss ')' { T $$ = get_bool_q($2); E }
244
+ | field_q
245
+ | phrase_q
246
+ | range_q
247
+ | wild_q
248
+ ;
249
+ term_q : QWRD { FLDS($$, get_term_q(qp, field, $1)); Y}
250
+ | QWRD '~' QWRD %prec HIGH { FLDS($$, get_fuzzy_q(qp, field, $1, $3)); Y}
251
+ | QWRD '~' %prec LOW { FLDS($$, get_fuzzy_q(qp, field, $1, NULL)); Y}
252
+ ;
253
+ wild_q : WILD_STR { FLDS($$, get_wild_q(qp, field, $1)); Y}
254
+ ;
255
+ field_q : field ':' q { qp_pop_fields(qp); }
256
+ { $$ = $3; }
257
+ | '*' { qp_push_fields(qp, qp->all_fields, false); } ':' q { qp_pop_fields(qp); }
258
+ { $$ = $4; }
259
+ ;
260
+ field : QWRD { $$ = first_field(qp, $1); }
261
+ | field '|' QWRD { $$ = add_field(qp, $3);}
262
+ ;
263
+ phrase_q : '"' ph_words '"' { $$ = get_phrase_q(qp, $2, NULL); }
264
+ | '"' ph_words '"' '~' QWRD { $$ = get_phrase_q(qp, $2, $5); }
265
+ | '"' '"' { $$ = NULL; }
266
+ | '"' '"' '~' QWRD { $$ = NULL; (void)$4;}
267
+ ;
268
+ ph_words : QWRD { $$ = ph_first_word($1); }
269
+ | '<' '>' { $$ = ph_first_word(NULL); }
270
+ | ph_words QWRD { $$ = ph_add_word($1, $2); }
271
+ | ph_words '<' '>' { $$ = ph_add_word($1, NULL); }
272
+ | ph_words '|' QWRD { $$ = ph_add_multi_word($1, $3); }
273
+ ;
274
+ range_q : '[' QWRD QWRD ']' { FLDS($$, get_r_q(qp, field, $2, $3, true, true)); Y}
275
+ | '[' QWRD QWRD '}' { FLDS($$, get_r_q(qp, field, $2, $3, true, false)); Y}
276
+ | '{' QWRD QWRD ']' { FLDS($$, get_r_q(qp, field, $2, $3, false, true)); Y}
277
+ | '{' QWRD QWRD '}' { FLDS($$, get_r_q(qp, field, $2, $3, false, false)); Y}
278
+ | '<' QWRD '}' { FLDS($$, get_r_q(qp, field, NULL,$2, false, false)); Y}
279
+ | '<' QWRD ']' { FLDS($$, get_r_q(qp, field, NULL,$2, false, true)); Y}
280
+ | '[' QWRD '>' { FLDS($$, get_r_q(qp, field, $2, NULL,true, false)); Y}
281
+ | '{' QWRD '>' { FLDS($$, get_r_q(qp, field, $2, NULL,false, false)); Y}
282
+ | '<' QWRD { FLDS($$, get_r_q(qp, field, NULL,$2, false, false)); Y}
283
+ | '<' '=' QWRD { FLDS($$, get_r_q(qp, field, NULL,$3, false, true)); Y}
284
+ | '>' '=' QWRD { FLDS($$, get_r_q(qp, field, $3, NULL,true, false)); Y}
285
+ | '>' QWRD { FLDS($$, get_r_q(qp, field, $2, NULL,false, false)); Y}
286
+ ;
287
+ %%
288
+
289
+ static const char *special_char = "&:()[]{}!\"~^|<>=*?+-";
290
+ static const char *not_word = " \t()[]{}!\"~^|<>=";
291
+
292
+ /**
293
+ * +get_word+ gets the next query-word from the query string. A query-word is
294
+ * basically a string of non-special or escaped special characters. It is
295
+ * FrtAnalyzer agnostic. It is up to the get_*_q methods to tokenize the word and
296
+ * turn it into a +Query+. See the documentation for each get_*_q method to
297
+ * see how it handles tokenization.
298
+ *
299
+ * Note that +get_word+ is also responsible for returning field names and
300
+ * matching the special tokens 'AND', 'NOT', 'REQ' and 'OR'.
301
+ */
302
+ static int get_word(YYSTYPE *lvalp, FrtQParser *qp)
303
+ {
304
+ bool is_wild = false;
305
+ int len;
306
+ char c;
307
+ char *buf = qp->buf[qp->buf_index];
308
+ char *bufp = buf;
309
+ qp->buf_index = (qp->buf_index + 1) % QP_CONC_WORDS;
310
+
311
+ if (qp->dynbuf) {
312
+ free(qp->dynbuf);
313
+ qp->dynbuf = NULL;
314
+ }
315
+
316
+ qp->qstrp--; /* need to back up one character */
317
+
318
+ while (!strchr(not_word, (c = *qp->qstrp++))) {
319
+ switch (c) {
320
+ case '\\':
321
+ if ((c = *qp->qstrp) == '\0') {
322
+ *bufp++ = '\\';
323
+ }
324
+ else {
325
+ *bufp++ = c;
326
+ qp->qstrp++;
327
+ }
328
+ break;
329
+ case ':':
330
+ if ((*qp->qstrp) == ':') {
331
+ qp->qstrp++;
332
+ *bufp++ = ':';
333
+ *bufp++ = ':';
334
+ }
335
+ else {
336
+ goto get_word_done;
337
+ }
338
+ break;
339
+ case '*': case '?':
340
+ is_wild = true;
341
+ /* fall through */
342
+ default:
343
+ *bufp++ = c;
344
+ }
345
+ /* we've exceeded the static buffer. switch to the dynamic one. The
346
+ * dynamic buffer is allocated enough space to hold the whole query
347
+ * string so it's capacity doesn't need to be checked again once
348
+ * allocated. */
349
+ if (!qp->dynbuf && ((bufp - buf) == MAX_WORD_SIZE)) {
350
+ qp->dynbuf = FRT_ALLOC_AND_ZERO_N(char, strlen(qp->qstr) + 1);
351
+ strncpy(qp->dynbuf, buf, MAX_WORD_SIZE);
352
+ buf = qp->dynbuf;
353
+ bufp = buf + MAX_WORD_SIZE;
354
+ }
355
+ }
356
+ get_word_done:
357
+ qp->qstrp--;
358
+ /* check for keywords. There are only four so we have a bit of a hack
359
+ * which just checks for all of them. */
360
+ *bufp = '\0';
361
+ len = (int)(bufp - buf);
362
+ if (qp->use_keywords) {
363
+ if (len == 3) {
364
+ if (buf[0] == 'A' && buf[1] == 'N' && buf[2] == 'D') return AND;
365
+ if (buf[0] == 'N' && buf[1] == 'O' && buf[2] == 'T') return NOT;
366
+ if (buf[0] == 'R' && buf[1] == 'E' && buf[2] == 'Q') return REQ;
367
+ }
368
+ if (len == 2 && buf[0] == 'O' && buf[1] == 'R') return OR;
369
+ }
370
+
371
+ /* found a word so return it. */
372
+ lvalp->str = buf;
373
+ if (is_wild) {
374
+ return WILD_STR;
375
+ }
376
+ return QWRD;
377
+ }
378
+
379
+ /**
380
+ * +yylex+ is the lexing method called by the QueryParser. It breaks the
381
+ * query up into special characters;
382
+ *
383
+ * ( "&:()[]{}!\"~^|<>=*?+-" )
384
+ *
385
+ * and tokens;
386
+ *
387
+ * - QWRD
388
+ * - WILD_STR
389
+ * - AND['AND', '&&']
390
+ * - OR['OR', '||']
391
+ * - REQ['REQ', '+']
392
+ * - NOT['NOT', '-', '~']
393
+ *
394
+ * QWRD tokens are query word tokens which are made up of characters other
395
+ * than the special characters. They can also contain special characters when
396
+ * escaped with a backslash '\'. WILD_STR is the same as QWRD except that it
397
+ * may also contain '?' and '*' characters.
398
+ *
399
+ * If any of the special chars are seen they will usually be returned straight
400
+ * away. The exceptions are the wild chars '*' and '?', and '&' which will be
401
+ * treated as a plain old word character unless followed by another '&'.
402
+ *
403
+ * If no special characters or tokens are found then yylex delegates to
404
+ * +get_word+ which will fetch the next query-word.
405
+ */
406
+ static int yylex(YYSTYPE *lvalp, FrtQParser *qp)
407
+ {
408
+ char c, nc;
409
+
410
+ while ((c=*qp->qstrp++) == ' ' || c == '\t') {
411
+ }
412
+
413
+ if (c == '\0') return 0;
414
+
415
+ if (strchr(special_char, c)) { /* comment */
416
+ nc = *qp->qstrp;
417
+ switch (c) {
418
+ case '-': case '!': return NOT;
419
+ case '+': return REQ;
420
+ case '*':
421
+ if (nc == ':') return c;
422
+ break;
423
+ case '?':
424
+ break;
425
+ case '&':
426
+ if (nc == '&') {
427
+ qp->qstrp++;
428
+ return AND;
429
+ }
430
+ break; /* Don't return single & character. Use in word. */
431
+ case '|':
432
+ if (nc == '|') {
433
+ qp->qstrp++;
434
+ return OR;
435
+ }
436
+ default:
437
+ return c;
438
+ }
439
+ }
440
+
441
+ return get_word(lvalp, qp);
442
+ }
443
+
444
+ /**
445
+ * yyerror gets called if there is an parse error with the yacc parser.
446
+ * It is responsible for clearing any memory that was allocated during the
447
+ * parsing process.
448
+ */
449
+ static int yyerror(QParser *qp, char const *msg)
450
+ {
451
+ qp->destruct = true;
452
+ if (!qp->handle_parse_errors) {
453
+ char buf[1024];
454
+ buf[1023] = '\0';
455
+ strncpy(buf, qp->qstr, 1023);
456
+ if (qp->clean_str) {
457
+ free(qp->qstr);
458
+ }
459
+ frt_mutex_unlock(&qp->mutex);
460
+ snprintf(xmsg_buffer, XMSG_BUFFER_SIZE,
461
+ "couldn't parse query ``%s''. Error message "
462
+ " was %s", buf, (char *)msg);
463
+ }
464
+ while (qp->fields_top->next != NULL) {
465
+ qp_pop_fields(qp);
466
+ }
467
+ return 0;
468
+ }
469
+
470
+ #define BQ(query) ((FrtBooleanQuery *)(query))
471
+
472
+ /**
473
+ * The QueryParser caches a tokenizer for each field so that it doesn't need
474
+ * to allocate a new tokenizer for each term in the query. This would be quite
475
+ * expensive as tokenizers use quite a large hunk of memory.
476
+ *
477
+ * This method returns the query parser for a particular field and sets it up
478
+ * with the text to be tokenized.
479
+ */
480
+ static FrtTokenStream *get_cached_ts(QParser *qp, FrtSymbol field, char *text)
481
+ {
482
+ FrtTokenStream *ts;
483
+ if (frt_hs_exists(qp->tokenized_fields, field)) {
484
+ ts = (FrtTokenStream *)frt_h_get(qp->ts_cache, field);
485
+ if (!ts) {
486
+ ts = frt_a_get_ts(qp->analyzer, field, text);
487
+ frt_h_set(qp->ts_cache, field, ts);
488
+ }
489
+ else {
490
+ ts->reset(ts, text);
491
+ }
492
+ }
493
+ else {
494
+ ts = qp->non_tokenizer;
495
+ ts->reset(ts, text);
496
+ }
497
+ return ts;
498
+ }
499
+
500
+ /**
501
+ * Turns a BooleanClause array into a BooleanQuery. It will optimize the query
502
+ * if 0 or 1 clauses are present to NULL or the actual query in the clause
503
+ * respectively.
504
+ */
505
+ static FrtQuery *get_bool_q(BCArray *bca)
506
+ {
507
+ FrtQuery *q;
508
+ const int clause_count = bca->size;
509
+
510
+ if (clause_count == 0) {
511
+ q = NULL;
512
+ free(bca->clauses);
513
+ }
514
+ else if (clause_count == 1) {
515
+ BooleanClause *bc = bca->clauses[0];
516
+ if (bc->is_prohibited) {
517
+ q = frt_bq_new(false);
518
+ frt_bq_add_query_nr(q, bc->query, FRT_BC_MUST_NOT);
519
+ frt_bq_add_query_nr(q, frt_maq_new(), FRT_BC_MUST);
520
+ }
521
+ else {
522
+ q = bc->query;
523
+ }
524
+ free(bc);
525
+ free(bca->clauses);
526
+ }
527
+ else {
528
+ q = frt_bq_new(false);
529
+ /* copy clauses into query */
530
+
531
+ BQ(q)->clause_cnt = clause_count;
532
+ BQ(q)->clause_capa = bca->capa;
533
+ free(BQ(q)->clauses);
534
+ BQ(q)->clauses = bca->clauses;
535
+ }
536
+ free(bca);
537
+ return q;
538
+ }
539
+
540
+ /**
541
+ * Base method for appending BooleanClauses to a BooleanClause array. This
542
+ * method doesn't care about the type of clause (MUST, SHOULD, MUST_NOT).
543
+ */
544
+ static void bca_add_clause(BCArray *bca, BooleanClause *clause)
545
+ {
546
+ if (bca->size >= bca->capa) {
547
+ bca->capa <<= 1;
548
+ FRT_REALLOC_N(bca->clauses, BooleanClause *, bca->capa);
549
+ }
550
+ bca->clauses[bca->size] = clause;
551
+ bca->size++;
552
+ }
553
+
554
+ /**
555
+ * Add the first clause to a BooleanClause array. This method is also
556
+ * responsible for allocating a new BooleanClause array.
557
+ */
558
+ static BCArray *first_cls(BooleanClause *clause)
559
+ {
560
+ BCArray *bca = FRT_ALLOC_AND_ZERO(BCArray);
561
+ bca->capa = BCA_INIT_CAPA;
562
+ bca->clauses = FRT_ALLOC_N(BooleanClause *, BCA_INIT_CAPA);
563
+ if (clause) {
564
+ bca_add_clause(bca, clause);
565
+ }
566
+ return bca;
567
+ }
568
+
569
+ /**
570
+ * Add AND clause to the BooleanClause array. The means that it will set the
571
+ * clause being added and the previously added clause from SHOULD clauses to
572
+ * MUST clauses. (If they are currently MUST_NOT clauses they stay as they
573
+ * are.)
574
+ */
575
+ static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause)
576
+ {
577
+ if (clause) {
578
+ if (bca->size == 1) {
579
+ if (!bca->clauses[0]->is_prohibited) {
580
+ bc_set_occur(bca->clauses[0], FRT_BC_MUST);
581
+ }
582
+ }
583
+ if (!clause->is_prohibited) {
584
+ bc_set_occur(clause, FRT_BC_MUST);
585
+ }
586
+ bca_add_clause(bca, clause);
587
+ }
588
+ return bca;
589
+ }
590
+
591
+ /**
592
+ * Add SHOULD clause to the BooleanClause array.
593
+ */
594
+ static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause)
595
+ {
596
+ if (clause) {
597
+ bca_add_clause(bca, clause);
598
+ }
599
+ return bca;
600
+ }
601
+
602
+ /**
603
+ * Add AND or OR clause to the BooleanClause array, depending on the default
604
+ * clause type.
605
+ */
606
+ static BCArray *add_default_cls(QParser *qp, BCArray *bca,
607
+ BooleanClause *clause)
608
+ {
609
+ if (qp->or_default) {
610
+ add_or_cls(bca, clause);
611
+ }
612
+ else {
613
+ add_and_cls(bca, clause);
614
+ }
615
+ return bca;
616
+ }
617
+
618
+ /**
619
+ * destroy array of BooleanClauses
620
+ */
621
+ static void bca_destroy(BCArray *bca)
622
+ {
623
+ int i;
624
+ for (i = 0; i < bca->size; i++) {
625
+ bc_deref(bca->clauses[i]);
626
+ }
627
+ free(bca->clauses);
628
+ free(bca);
629
+ }
630
+
631
+ /**
632
+ * Turn a query into a BooleanClause for addition to a BooleanQuery.
633
+ */
634
+ static BooleanClause *get_bool_cls(FrtQuery *q, BCType occur)
635
+ {
636
+ if (q) {
637
+ return bc_new(q, occur);
638
+ }
639
+ else {
640
+ return NULL;
641
+ }
642
+ }
643
+
644
+ /**
645
+ * Create a TermQuery. The word will be tokenized and if the tokenization
646
+ * produces more than one token, a PhraseQuery will be returned. For example,
647
+ * if the word is dbalmain@gmail.com and a LetterTokenizer is used then a
648
+ * PhraseQuery "dbalmain gmail com" will be returned which is actually exactly
649
+ * what we want as it will match any documents containing the same email
650
+ * address and tokenized with the same tokenizer.
651
+ */
652
+ static FrtQuery *get_term_q(QParser *qp, FrtSymbol field, char *word)
653
+ {
654
+ FrtQuery *q;
655
+ FrtToken *token;
656
+ FrtTokenStream *stream = get_cached_ts(qp, field, word);
657
+
658
+ if ((token = frt_ts_next(stream)) == NULL) {
659
+ q = NULL;
660
+ }
661
+ else {
662
+ q = frt_tq_new(field, token->text);
663
+ if ((token = frt_ts_next(stream)) != NULL) {
664
+ /* Less likely case, destroy the term query and create a
665
+ * phrase query instead */
666
+ FrtQuery *phq = frt_phq_new(field);
667
+ frt_phq_add_term(phq, ((TermQuery *)q)->term, 0);
668
+ q->destroy_i(q);
669
+ q = phq;
670
+ do {
671
+ if (token->pos_inc) {
672
+ frt_phq_add_term(q, token->text, token->pos_inc);
673
+ /* add some slop since single term was expected */
674
+ ((FrtPhraseQuery *)q)->slop++;
675
+ }
676
+ else {
677
+ frt_phq_append_multi_term(q, token->text);
678
+ }
679
+ } while ((token = frt_ts_next(stream)) != NULL);
680
+ }
681
+ }
682
+ return q;
683
+ }
684
+
685
+ /**
686
+ * Create a FuzzyQuery. The word will be tokenized and only the first token
687
+ * will be used. If there are any more tokens after tokenization, they will be
688
+ * ignored.
689
+ */
690
+ static FrtQuery *get_fuzzy_q(QParser *qp, FrtSymbol field, char *word,
691
+ char *slop_str)
692
+ {
693
+ FrtQuery *q;
694
+ FrtToken *token;
695
+ FrtTokenStream *stream = get_cached_ts(qp, field, word);
696
+
697
+ if ((token = frt_ts_next(stream)) == NULL) {
698
+ q = NULL;
699
+ }
700
+ else {
701
+ /* it only makes sense to find one term in a fuzzy query */
702
+ float slop = qp_default_fuzzy_min_sim;
703
+ if (slop_str) {
704
+ sscanf(slop_str, "%f", &slop);
705
+ }
706
+ q = frt_fuzq_new_conf(field, token->text, slop, qp_default_fuzzy_pre_len,
707
+ qp->max_clauses);
708
+ }
709
+ return q;
710
+ }
711
+
712
+ /**
713
+ * Downcase a string taking locale into account and works for multibyte
714
+ * character sets.
715
+ */
716
+ static char *lower_str(char *str)
717
+ {
718
+ const int max_len = (int)strlen(str) + 1;
719
+ int cnt;
720
+ wchar_t *wstr = FRT_ALLOC_N(wchar_t, max_len);
721
+ if ((cnt = mbstowcs(wstr, str, max_len)) > 0) {
722
+ wchar_t *w = wstr;
723
+ while (*w) {
724
+ *w = towlower(*w);
725
+ w++;
726
+ }
727
+ wcstombs(str, wstr, max_len);
728
+ }
729
+ else {
730
+ char *s = str;
731
+ while (*s) {
732
+ *s = tolower(*s);
733
+ s++;
734
+ }
735
+ }
736
+ free(wstr);
737
+ str[max_len] = '\0';
738
+ return str;
739
+ }
740
+
741
+ /**
742
+ * Create a WildCardQuery. No tokenization will be performed on the pattern
743
+ * but the pattern will be downcased if +qp->wild_lower+ is set to true and
744
+ * the field in question is a tokenized field.
745
+ *
746
+ * Note: this method will not always return a WildCardQuery. It could be
747
+ * optimized to a MatchAllQuery if the pattern is '*' or a PrefixQuery if the
748
+ * only wild char (*, ?) in the pattern is a '*' at the end of the pattern.
749
+ */
750
+ static FrtQuery *get_wild_q(QParser *qp, FrtSymbol field, char *pattern)
751
+ {
752
+ FrtQuery *q;
753
+ bool is_prefix = false;
754
+ char *p;
755
+ int len = (int)strlen(pattern);
756
+
757
+ if (qp->wild_lower
758
+ && (!qp->tokenized_fields || frt_hs_exists(qp->tokenized_fields, field))) {
759
+ lower_str(pattern);
760
+ }
761
+
762
+ /* simplify the wildcard query to a prefix query if possible. Basically a
763
+ * prefix query is any wildcard query that has a '*' as the last character
764
+ * and no other wildcard characters before it. "*" by itself will expand
765
+ * to a MatchAllQuery */
766
+ if (strcmp(pattern, "*") == 0) {
767
+ return frt_maq_new();
768
+ }
769
+ if (pattern[len - 1] == '*') {
770
+ is_prefix = true;
771
+ for (p = &pattern[len - 2]; p >= pattern; p--) {
772
+ if (*p == '*' || *p == '?') {
773
+ is_prefix = false;
774
+ break;
775
+ }
776
+ }
777
+ }
778
+ if (is_prefix) {
779
+ /* chop off the '*' temporarily to create the query */
780
+ pattern[len - 1] = 0;
781
+ q = frt_prefixq_new(field, pattern);
782
+ pattern[len - 1] = '*';
783
+ }
784
+ else {
785
+ q = frt_wcq_new(field, pattern);
786
+ }
787
+ MTQMaxTerms(q) = qp->max_clauses;
788
+ return q;
789
+ }
790
+
791
+ /**
792
+ * Adds another field to the top of the FieldStack.
793
+ */
794
+ static FrtHashSet *add_field(QParser *qp, const char *field_name)
795
+ {
796
+ FrtSymbol field = field_name;
797
+ if (qp->allow_any_fields || frt_hs_exists(qp->all_fields, field)) {
798
+ frt_hs_add(qp->fields, field);
799
+ }
800
+ return qp->fields;
801
+ }
802
+
803
+ /**
804
+ * The method gets called when a field modifier ("field1|field2:") is seen. It
805
+ * will push a new FieldStack object onto the stack and add +field+ to its
806
+ * fields set.
807
+ */
808
+ static FrtHashSet *first_field(QParser *qp, const char *field)
809
+ {
810
+ qp_push_fields(qp, frt_hs_new_ptr(NULL), true);
811
+ return add_field(qp, field);
812
+ }
813
+
814
+ /**
815
+ * Destroy a phrase object freeing all allocated memory.
816
+ */
817
+ static void ph_destroy(Phrase *self)
818
+ {
819
+ int i;
820
+ for (i = 0; i < self->size; i++) {
821
+ frt_ary_destroy(self->positions[i].terms, &free);
822
+ }
823
+ free(self->positions);
824
+ free(self);
825
+ }
826
+
827
+
828
+ /**
829
+ * Allocate a new Phrase object
830
+ */
831
+ static Phrase *ph_new()
832
+ {
833
+ Phrase *self = FRT_ALLOC_AND_ZERO(Phrase);
834
+ self->capa = PHRASE_INIT_CAPA;
835
+ self->positions = FRT_ALLOC_AND_ZERO_N(PhrasePosition, PHRASE_INIT_CAPA);
836
+ return self;
837
+ }
838
+
839
+ /**
840
+ * Add the first word to the phrase. This method is also in charge of
841
+ * allocating a new Phrase object.
842
+ */
843
+ static Phrase *ph_first_word(char *word)
844
+ {
845
+ Phrase *self = ph_new();
846
+ if (word) { /* no point in adding NULL in start */
847
+ self->positions[0].terms = frt_ary_new_type_capa(char *, 1);
848
+ frt_ary_push(self->positions[0].terms, frt_estrdup(word));
849
+ self->size = 1;
850
+ }
851
+ return self;
852
+ }
853
+
854
+ /**
855
+ * Add a new word to the Phrase
856
+ */
857
+ static Phrase *ph_add_word(Phrase *self, char *word)
858
+ {
859
+ if (word) {
860
+ const int index = self->size;
861
+ FrtPhrasePosition *pp = self->positions;
862
+ if (index >= self->capa) {
863
+ self->capa <<= 1;
864
+ FRT_REALLOC_N(pp, PhrasePosition, self->capa);
865
+ self->positions = pp;
866
+ }
867
+ pp[index].pos = self->pos_inc;
868
+ pp[index].terms = frt_ary_new_type_capa(char *, 1);
869
+ frt_ary_push(pp[index].terms, frt_estrdup(word));
870
+ self->size++;
871
+ self->pos_inc = 0;
872
+ }
873
+ else {
874
+ self->pos_inc++;
875
+ }
876
+ return self;
877
+ }
878
+
879
+ /**
880
+ * Adds a word to the Phrase object in the same position as the previous word
881
+ * added to the Phrase. This will later be turned into a multi-PhraseQuery.
882
+ */
883
+ static Phrase *ph_add_multi_word(Phrase *self, char *word)
884
+ {
885
+ const int index = self->size - 1;
886
+ FrtPhrasePosition *pp = self->positions;
887
+
888
+ if (word) {
889
+ frt_ary_push(pp[index].terms, frt_estrdup(word));
890
+ }
891
+ return self;
892
+ }
893
+
894
+ /**
895
+ * Build a phrase query for a single field. It might seem like a better idea
896
+ * to build the PhraseQuery once and duplicate it for each field but this
897
+ * would be buggy in the case of PerFieldAnalyzers in which case a different
898
+ * tokenizer could be used for each field.
899
+ *
900
+ * Note that the query object returned by this method is not always a
901
+ * PhraseQuery. If there is only one term in the query then the query is
902
+ * simplified to a TermQuery. If there are multiple terms but only a single
903
+ * position, then a MultiTermQuery is retured.
904
+ *
905
+ * Note that each word in the query gets tokenized. Unlike get_term_q, if the
906
+ * word gets tokenized into more than one token, the rest of the tokens are
907
+ * ignored. For example, if you have the phrase;
908
+ *
909
+ * "email: dbalmain@gmail.com"
910
+ *
911
+ * the Phrase object will contain to positions with the words 'email:' and
912
+ * 'dbalmain@gmail.com'. Now, if you are using a LetterTokenizer then the
913
+ * second word will be tokenized into the tokens ['dbalmain', 'gmail', 'com']
914
+ * and only the first token will be used, so the resulting phrase query will
915
+ * actually look like this;
916
+ *
917
+ * "email dbalmain"
918
+ *
919
+ * This problem can easily be solved by using the StandardTokenizer or any
920
+ * custom tokenizer which will leave dbalmain@gmail.com as a single token.
921
+ */
922
+ static FrtQuery *get_phrase_query(QParser *qp, FrtSymbol field,
923
+ Phrase *phrase, char *slop_str)
924
+ {
925
+ const int pos_cnt = phrase->size;
926
+ FrtQuery *q = NULL;
927
+
928
+ if (pos_cnt == 1) {
929
+ char **words = phrase->positions[0].terms;
930
+ const int word_count = frt_ary_size(words);
931
+ if (word_count == 1) {
932
+ q = get_term_q(qp, field, words[0]);
933
+ }
934
+ else {
935
+ int i;
936
+ int term_cnt = 0;
937
+ FrtToken *token;
938
+ char *last_word = NULL;
939
+
940
+ for (i = 0; i < word_count; i++) {
941
+ token = frt_ts_next(get_cached_ts(qp, field, words[i]));
942
+ if (token) {
943
+ free(words[i]);
944
+ last_word = words[i] = frt_estrdup(token->text);
945
+ ++term_cnt;
946
+ }
947
+ else {
948
+ /* empty words will later be ignored */
949
+ words[i][0] = '\0';
950
+ }
951
+ }
952
+
953
+ switch (term_cnt) {
954
+ case 0:
955
+ q = frt_bq_new(false);
956
+ break;
957
+ case 1:
958
+ q = frt_tq_new(field, last_word);
959
+ break;
960
+ default:
961
+ q = frt_multi_tq_new_conf(field, term_cnt, 0.0);
962
+ for (i = 0; i < word_count; i++) {
963
+ /* ignore empty words */
964
+ if (words[i][0]) {
965
+ frt_multi_tq_add_term(q, words[i]);
966
+ }
967
+ }
968
+ break;
969
+ }
970
+ }
971
+ }
972
+ else if (pos_cnt > 1) {
973
+ FrtToken *token;
974
+ FrtTokenStream *stream;
975
+ int i, j;
976
+ int pos_inc = 0;
977
+ q = frt_phq_new(field);
978
+ if (slop_str) {
979
+ int slop;
980
+ sscanf(slop_str,"%d",&slop);
981
+ ((FrtPhraseQuery *)q)->slop = slop;
982
+ }
983
+
984
+ for (i = 0; i < pos_cnt; i++) {
985
+ char **words = phrase->positions[i].terms;
986
+ const int word_count = frt_ary_size(words);
987
+ if (pos_inc) {
988
+ ((FrtPhraseQuery *)q)->slop++;
989
+ }
990
+ pos_inc += phrase->positions[i].pos + 1; /* Actually holds pos_inc*/
991
+
992
+ if (word_count == 1) {
993
+ stream = get_cached_ts(qp, field, words[0]);
994
+ while ((token = frt_ts_next(stream))) {
995
+ if (token->pos_inc) {
996
+ frt_phq_add_term(q, token->text,
997
+ pos_inc ? pos_inc : token->pos_inc);
998
+ }
999
+ else {
1000
+ frt_phq_append_multi_term(q, token->text);
1001
+ ((FrtPhraseQuery *)q)->slop++;
1002
+ }
1003
+ pos_inc = 0;
1004
+ }
1005
+ }
1006
+ else {
1007
+ bool added_position = false;
1008
+
1009
+ for (j = 0; j < word_count; j++) {
1010
+ stream = get_cached_ts(qp, field, words[j]);
1011
+ if ((token = frt_ts_next(stream))) {
1012
+ if (!added_position) {
1013
+ frt_phq_add_term(q, token->text,
1014
+ pos_inc ? pos_inc : token->pos_inc);
1015
+ added_position = true;
1016
+ pos_inc = 0;
1017
+ }
1018
+ else {
1019
+ frt_phq_append_multi_term(q, token->text);
1020
+ }
1021
+ }
1022
+ }
1023
+ }
1024
+ }
1025
+ }
1026
+ return q;
1027
+ }
1028
+
1029
+ /**
1030
+ * Get a phrase query from the Phrase object. The Phrase object is built up by
1031
+ * the query parser as the all PhraseQuery didn't work well for this. Once the
1032
+ * PhraseQuery has been built the Phrase object needs to be destroyed.
1033
+ */
1034
+ static FrtQuery *get_phrase_q(QParser *qp, Phrase *phrase, char *slop_str)
1035
+ {
1036
+ FrtQuery *volatile q = NULL;
1037
+ FLDS(q, get_phrase_query(qp, field, phrase, slop_str));
1038
+ ph_destroy(phrase);
1039
+ return q;
1040
+ }
1041
+
1042
+ /**
1043
+ * Gets a RangeQuery object.
1044
+ *
1045
+ * Just like with WildCardQuery, RangeQuery needs to downcase its terms if the
1046
+ * tokenizer also downcased its terms.
1047
+ */
1048
+ static FrtQuery *get_r_q(QParser *qp, FrtSymbol field, char *from, char *to,
1049
+ bool inc_lower, bool inc_upper)
1050
+ {
1051
+ FrtQuery *rq;
1052
+ if (qp->wild_lower
1053
+ && (!qp->tokenized_fields || frt_hs_exists(qp->tokenized_fields, field))) {
1054
+ if (from) {
1055
+ lower_str(from);
1056
+ }
1057
+ if (to) {
1058
+ lower_str(to);
1059
+ }
1060
+ }
1061
+ /*
1062
+ * terms don't get tokenized as it doesn't really make sense to do so for
1063
+ * range queries.
1064
+
1065
+ if (from) {
1066
+ FrtTokenStream *stream = get_cached_ts(qp, field, from);
1067
+ FrtToken *token = frt_ts_next(stream);
1068
+ from = token ? frt_estrdup(token->text) : NULL;
1069
+ }
1070
+ if (to) {
1071
+ FrtTokenStream *stream = get_cached_ts(qp, field, to);
1072
+ FrtToken *token = frt_ts_next(stream);
1073
+ to = token ? frt_estrdup(token->text) : NULL;
1074
+ }
1075
+ */
1076
+
1077
+ rq = qp->use_typed_range_query ?
1078
+ frt_trq_new(field, from, to, inc_lower, inc_upper) :
1079
+ frt_rq_new(field, from, to, inc_lower, inc_upper);
1080
+ return rq;
1081
+ }
1082
+
1083
+ /**
1084
+ * Every time the query parser sees a new field modifier ("field1|field2:")
1085
+ * it pushes a new FieldStack object onto the stack and sets its fields to the
1086
+ * fields specified in the fields modifier. If the field modifier is '*',
1087
+ * fs->fields is set to all_fields. fs->fields is set to +qp->def_field+ at
1088
+ * the bottom of the stack (ie the very first set of fields pushed onto the
1089
+ * stack).
1090
+ */
1091
+ static void qp_push_fields(QParser *self, FrtHashSet *fields, bool destroy)
1092
+ {
1093
+ FieldStack *fs = FRT_ALLOC(FieldStack);
1094
+
1095
+ fs->next = self->fields_top;
1096
+ fs->fields = fields;
1097
+ fs->destroy = destroy;
1098
+
1099
+ self->fields_top = fs;
1100
+ self->fields = fields;
1101
+ }
1102
+
1103
+ /**
1104
+ * Pops the top of the fields stack and frees any memory used by it. This will
1105
+ * get called when query modified by a field modifier ("field1|field2:") has
1106
+ * been fully parsed and the field specifier no longer applies.
1107
+ */
1108
+ static void qp_pop_fields(QParser *self)
1109
+ {
1110
+ FieldStack *fs = self->fields_top;
1111
+
1112
+ if (fs->destroy) {
1113
+ frt_hs_destroy(fs->fields);
1114
+ }
1115
+ self->fields_top = fs->next;
1116
+ if (self->fields_top) {
1117
+ self->fields = self->fields_top->fields;
1118
+ }
1119
+ free(fs);
1120
+ }
1121
+
1122
+ /**
1123
+ * Free all memory allocated by the QueryParser.
1124
+ */
1125
+ void frt_qp_destroy(QParser *self)
1126
+ {
1127
+ if (self->tokenized_fields != self->all_fields) {
1128
+ frt_hs_destroy(self->tokenized_fields);
1129
+ }
1130
+ if (self->def_fields != self->all_fields) {
1131
+ frt_hs_destroy(self->def_fields);
1132
+ }
1133
+ frt_hs_destroy(self->all_fields);
1134
+
1135
+ qp_pop_fields(self);
1136
+ assert(NULL == self->fields_top);
1137
+
1138
+ frt_h_destroy(self->ts_cache);
1139
+ frt_tk_destroy(self->non_tokenizer);
1140
+ frt_a_deref(self->analyzer);
1141
+ free(self);
1142
+ }
1143
+
1144
+ /**
1145
+ * Creates a new QueryParser setting all boolean parameters to their defaults.
1146
+ * If +def_fields+ is NULL then +all_fields+ is used in place of +def_fields+.
1147
+ * Not also that this method ensures that all fields that exist in
1148
+ * +def_fields+ must also exist in +all_fields+. This should make sense.
1149
+ */
1150
+ QParser *qp_new(FrtAnalyzer *analyzer)
1151
+ {
1152
+ FrtQParser *self = FRT_ALLOC(QParser);
1153
+ self->or_default = true;
1154
+ self->wild_lower = true;
1155
+ self->clean_str = false;
1156
+ self->max_clauses = QP_MAX_CLAUSES;
1157
+ self->handle_parse_errors = false;
1158
+ self->allow_any_fields = false;
1159
+ self->use_keywords = true;
1160
+ self->use_typed_range_query = false;
1161
+ self->def_slop = 0;
1162
+
1163
+ self->tokenized_fields = frt_hs_new_ptr(NULL);
1164
+ self->all_fields = frt_hs_new_ptr(NULL);
1165
+ self->def_fields = frt_hs_new_ptr(NULL);
1166
+
1167
+ self->fields_top = NULL;
1168
+ qp_push_fields(self, self->def_fields, false);
1169
+
1170
+ /* make sure all_fields contains the default fields */
1171
+ self->analyzer = analyzer;
1172
+ self->ts_cache = frt_h_new_ptr((frt_free_ft)&ts_deref);
1173
+ self->buf_index = 0;
1174
+ self->dynbuf = NULL;
1175
+ self->non_tokenizer = non_tokenizer_new();
1176
+ frt_mutex_init(&self->mutex, NULL);
1177
+ return self;
1178
+ }
1179
+
1180
+ void frt_qp_add_field(QParser *self,
1181
+ FrtSymbol field,
1182
+ bool is_default,
1183
+ bool is_tokenized)
1184
+ {
1185
+ frt_hs_add(self->all_fields, field);
1186
+ if (is_default) {
1187
+ frt_hs_add(self->def_fields, field);
1188
+ }
1189
+ if (is_tokenized) {
1190
+ frt_hs_add(self->tokenized_fields, field);
1191
+ }
1192
+ }
1193
+
1194
+ /* these chars have meaning within phrases */
1195
+ static const char *PHRASE_CHARS = "<>|\"";
1196
+
1197
+ /**
1198
+ * +str_insert_char+ inserts a character at the beginning of a string by
1199
+ * shifting the rest of the string right.
1200
+ */
1201
+ static void str_insert_char(char *str, int len, char chr)
1202
+ {
1203
+ memmove(str+1, str, len*sizeof(char));
1204
+ *str = chr;
1205
+ }
1206
+
1207
+ /**
1208
+ * +qp_clean_str+ basically scans the query string and ensures that all open
1209
+ * and close parentheses '()' and quotes '"' are balanced. It does this by
1210
+ * inserting or appending extra parentheses or quotes to the string. This
1211
+ * obviously won't necessarily be exactly what the user wanted but we are
1212
+ * never going to know that anyway. The main job of this method is to help the
1213
+ * query at least parse correctly.
1214
+ *
1215
+ * It also checks that all special characters within phrases (ie between
1216
+ * quotes) are escaped correctly unless they have meaning within a phrase
1217
+ * ( <>,|," ). Note that '<' and '>' will also be escaped unless the appear
1218
+ * together like so; '<>'.
1219
+ */
1220
+ char *qp_clean_str(char *str)
1221
+ {
1222
+ int b, pb = -1;
1223
+ int br_cnt = 0;
1224
+ bool quote_open = false;
1225
+ char *sp, *nsp;
1226
+
1227
+ /* leave a little extra */
1228
+ char *new_str = FRT_ALLOC_N(char, strlen(str)*2 + 1);
1229
+
1230
+ for (sp = str, nsp = new_str; *sp; sp++) {
1231
+ b = *sp;
1232
+ /* ignore escaped characters */
1233
+ if (pb == '\\') {
1234
+ if (quote_open && strrchr(PHRASE_CHARS, b)) {
1235
+ *nsp++ = '\\'; /* this was left off the first time through */
1236
+ }
1237
+ *nsp++ = b;
1238
+ /* \ has escaped itself so has no power. Assign pb random char 'r' */
1239
+ pb = ((b == '\\') ? 'r' : b);
1240
+ continue;
1241
+ }
1242
+ switch (b) {
1243
+ case '\\':
1244
+ if (!quote_open) { /* We do our own escaping below */
1245
+ *nsp++ = b;
1246
+ }
1247
+ break;
1248
+ case '"':
1249
+ quote_open = !quote_open;
1250
+ *nsp++ = b;
1251
+ break;
1252
+ case '(':
1253
+ if (!quote_open) {
1254
+ br_cnt++;
1255
+ }
1256
+ else {
1257
+ *nsp++ = '\\';
1258
+ }
1259
+ *nsp++ = b;
1260
+ break;
1261
+ case ')':
1262
+ if (!quote_open) {
1263
+ if (br_cnt == 0) {
1264
+ str_insert_char(new_str, (int)(nsp - new_str), '(');
1265
+ nsp++;
1266
+ }
1267
+ else {
1268
+ br_cnt--;
1269
+ }
1270
+ }
1271
+ else {
1272
+ *nsp++ = '\\';
1273
+ }
1274
+ *nsp++ = b;
1275
+ break;
1276
+ case '>':
1277
+ if (quote_open) {
1278
+ if (pb == '<') {
1279
+ /* remove the escape character */
1280
+ nsp--;
1281
+ nsp[-1] = '<';
1282
+ }
1283
+ else {
1284
+ *nsp++ = '\\';
1285
+ }
1286
+ }
1287
+ *nsp++ = b;
1288
+ break;
1289
+ default:
1290
+ if (quote_open) {
1291
+ if (strrchr(special_char, b) && b != '|') {
1292
+ *nsp++ = '\\';
1293
+ }
1294
+ }
1295
+ *nsp++ = b;
1296
+ }
1297
+ pb = b;
1298
+ }
1299
+ if (quote_open) {
1300
+ *nsp++ = '"';
1301
+ }
1302
+ for (;br_cnt > 0; br_cnt--) {
1303
+ *nsp++ = ')';
1304
+ }
1305
+ *nsp = '\0';
1306
+ return new_str;
1307
+ }
1308
+
1309
+ /**
1310
+ * Takes a string and finds whatever tokens it can using the QueryParser's
1311
+ * analyzer. It then turns these tokens (if any) into a boolean query. If it
1312
+ * fails to find any tokens, this method will return NULL.
1313
+ */
1314
+ static FrtQuery *qp_get_bad_query(QParser *qp, char *str)
1315
+ {
1316
+ FrtQuery *volatile q = NULL;
1317
+ qp->recovering = true;
1318
+ assert(qp->fields_top->next == NULL);
1319
+ FLDS(q, get_term_q(qp, field, str));
1320
+ return q;
1321
+ }
1322
+
1323
+ /**
1324
+ * +qp_parse+ takes a string and turns it into a Query object using Ferret's
1325
+ * query language. It must either raise an error or return a query object. It
1326
+ * must not return NULL. If the yacc parser fails it will use a very basic
1327
+ * boolean query parser which takes whatever tokens it can find in the query
1328
+ * and terns them into a boolean query on the default fields.
1329
+ */
1330
+ extern VALUE cQueryParseException;
1331
+ Query *qp_parse(QParser *self, char *qstr)
1332
+ {
1333
+ FrtQuery *result = NULL;
1334
+ frt_mutex_lock(&self->mutex);
1335
+ /* if qp->fields_top->next is not NULL we have a left over field-stack
1336
+ * object that was not popped during the last query parse */
1337
+ assert(NULL == self->fields_top->next);
1338
+
1339
+ self->recovering = self->destruct = false;
1340
+ if (self->clean_str) {
1341
+ self->qstrp = self->qstr = frt_qp_clean_str(qstr);
1342
+ }
1343
+ else {
1344
+ self->qstrp = self->qstr = qstr;
1345
+ }
1346
+ self->fields = self->def_fields;
1347
+ self->result = NULL;
1348
+
1349
+ if (0 == yyparse(self)) result = self->result;
1350
+ if (!result && self->handle_parse_errors) {
1351
+ self->destruct = false;
1352
+ result = qp_get_bad_query(self, self->qstr);
1353
+ }
1354
+ if (self->destruct && !self->handle_parse_errors) {
1355
+ rb_raise(cQueryParseException, xmsg_buffer);
1356
+ }
1357
+ if (!result) {
1358
+ result = frt_bq_new(false);
1359
+ }
1360
+ if (self->clean_str) {
1361
+ free(self->qstr);
1362
+ }
1363
+
1364
+ frt_mutex_unlock(&self->mutex);
1365
+ return result;
1366
+ }