ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/q_parser.c CHANGED
@@ -80,19 +80,28 @@
80
80
 
81
81
 
82
82
  /* Copy the first part of user declarations. */
83
- #line 1 "src/query_parser/q_parser.y"
83
+ #line 1 "src/q_parser.y"
84
84
 
85
85
  #include <string.h>
86
+ #include <ctype.h>
87
+ #include <wctype.h>
86
88
  #include "search.h"
89
+ #include "array.h"
87
90
 
88
91
  typedef struct Phrase {
89
- int cnt;
90
- int capa;
91
- char ***words;
92
- int *w_cnt;
93
- int *w_capa;
92
+ int size;
93
+ int capa;
94
+ int pos_inc;
95
+ PhrasePosition *positions;
94
96
  } Phrase;
95
97
 
98
+ #define BCA_INIT_CAPA 4
99
+ typedef struct BCArray {
100
+ int size;
101
+ int capa;
102
+ BooleanClause **clauses;
103
+ } BCArray;
104
+
96
105
 
97
106
 
98
107
  /* Enabling traces. */
@@ -114,17 +123,17 @@ typedef struct Phrase {
114
123
  #endif
115
124
 
116
125
  #if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
117
- #line 14 "src/query_parser/q_parser.y"
126
+ #line 23 "src/q_parser.y"
118
127
  typedef union YYSTYPE {
119
- Query *query;
120
- BooleanClause *bcls;
121
- Array *array;
122
- HashSet *hashset;
123
- Phrase *phrase;
124
- char *str;
128
+ Query *query;
129
+ BooleanClause *bcls;
130
+ BCArray *bclss;
131
+ HashSet *hashset;
132
+ Phrase *phrase;
133
+ char *str;
125
134
  } YYSTYPE;
126
135
  /* Line 196 of yacc.c. */
127
- #line 128 "y.tab.c"
136
+ #line 137 "y.tab.c"
128
137
  # define yystype YYSTYPE /* obsolescent; will be withdrawn */
129
138
  # define YYSTYPE_IS_DECLARED 1
130
139
  # define YYSTYPE_IS_TRIVIAL 1
@@ -133,62 +142,58 @@ typedef union YYSTYPE {
133
142
 
134
143
 
135
144
  /* Copy the second part of user declarations. */
136
- #line 22 "src/query_parser/q_parser.y"
145
+ #line 31 "src/q_parser.y"
137
146
 
138
- int yylex(YYSTYPE *lvalp, QParser *qp);
139
- int yyerror(QParser *qp, char const *msg);
147
+ static int yylex(YYSTYPE *lvalp, QParser *qp);
148
+ static int yyerror(QParser *qp, char const *msg);
140
149
 
141
150
  #define PHRASE_INIT_CAPA 4
142
- Query *get_bool_q(Array *bclauses);
151
+ static Query *get_bool_q(BCArray *bca);
143
152
 
144
- Array *first_cls(BooleanClause *cls);
145
- Array *add_and_cls(Array *clauses, BooleanClause *cls);
146
- Array *add_or_cls(Array *clauses, BooleanClause *cls);
147
- Array *add_default_cls(QParser *qp, Array *clauses, BooleanClause *cls);
153
+ static BCArray *first_cls(BooleanClause *boolean_clause);
154
+ static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause);
155
+ static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause);
156
+ static BCArray *add_default_cls(QParser *qp, BCArray *bca, BooleanClause *clause);
148
157
 
149
- BooleanClause *get_bool_cls(Query *q, unsigned int occur);
158
+ static BooleanClause *get_bool_cls(Query *q, unsigned int occur);
150
159
 
151
- Query *get_term_q(QParser *qp, char *field, char *word);
152
- Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop);
153
- Query *get_wild_q(QParser *qp, char *field, char *pattern);
160
+ static Query *get_term_q(QParser *qp, char *field, char *word);
161
+ static Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop);
162
+ static Query *get_wild_q(QParser *qp, char *field, char *pattern);
154
163
 
155
- HashSet *first_field(QParser *qp, char *field);
156
- HashSet *add_field(QParser *qp, char *field);
164
+ static HashSet *first_field(QParser *qp, char *field);
165
+ static HashSet *add_field(QParser *qp, char *field);
157
166
 
158
- Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop);
167
+ static Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop);
159
168
 
160
- Phrase *ph_first_word(char *word);
161
- Phrase *ph_add_word(Phrase *self, char *word);
162
- Phrase *ph_add_multi_word(Phrase *self, char *word);
169
+ static Phrase *ph_first_word(char *word);
170
+ static Phrase *ph_add_word(Phrase *self, char *word);
171
+ static Phrase *ph_add_multi_word(Phrase *self, char *word);
163
172
 
164
- Query *get_range_q(char *field, char *from, char *to,
165
- bool inc_lower, bool inc_upper);
173
+ static Query *get_range_q(const char *field, const char *from, const char *to,
174
+ bool inc_lower, bool inc_upper);
166
175
 
167
176
  #define FLDS(q, func) do {\
168
- char *field;\
169
- if (qp->fields->size == 0) {\
170
- q = NULL;\
171
- } else if (qp->fields->size == 1) {\
172
- field = (char *)qp->fields->elems[0];\
173
- q = func;\
174
- } else {\
175
- int i;Query *sq;\
176
- q = bq_create(false);\
177
- for (i = 0; i < qp->fields->size; i++) {\
178
- field = (char *)qp->fields->elems[i];\
179
- sq = func;\
180
- if (sq) bq_add_query(q, sq, BC_SHOULD);\
181
- }\
182
- if (((BooleanQuery *)q->data)->clause_cnt == 0) {\
183
- q_deref(q);\
184
- q = NULL;\
177
+ char *field;\
178
+ if (qp->fields->size == 0) {\
179
+ q = NULL;\
180
+ } else if (qp->fields->size == 1) {\
181
+ field = (char *)qp->fields->elems[0];\
182
+ q = func;\
183
+ } else {\
184
+ int i;Query *sq;\
185
+ q = bq_new(false);\
186
+ for (i = 0; i < qp->fields->size; i++) {\
187
+ field = (char *)qp->fields->elems[i];\
188
+ sq = func;\
189
+ if (sq) bq_add_query_nr(q, sq, BC_SHOULD);\
190
+ }\
185
191
  }\
186
- }\
187
192
  } while (0)
188
193
 
189
194
 
190
195
  /* Line 219 of yacc.c. */
191
- #line 192 "y.tab.c"
196
+ #line 197 "y.tab.c"
192
197
 
193
198
  #if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__)
194
199
  # define YYSIZE_T __SIZE_TYPE__
@@ -427,12 +432,12 @@ static const yysigned_char yyrhs[] =
427
432
  /* YYRLINE[YYN] -- source line where rule number YYN was defined. */
428
433
  static const unsigned char yyrline[] =
429
434
  {
430
- 0, 90, 90, 91, 93, 94, 95, 96, 98, 99,
431
- 100, 102, 103, 105, 106, 107, 108, 109, 110, 112,
432
- 113, 114, 116, 118, 118, 120, 120, 120, 123, 124,
433
- 126, 127, 128, 129, 131, 132, 133, 134, 135, 137,
434
- 138, 139, 140, 141, 142, 143, 144, 145, 146, 147,
435
- 148
435
+ 0, 95, 95, 96, 98, 99, 100, 101, 103, 104,
436
+ 105, 107, 108, 110, 111, 112, 113, 114, 115, 117,
437
+ 118, 119, 121, 123, 123, 125, 125, 125, 128, 129,
438
+ 131, 132, 133, 134, 136, 137, 138, 139, 140, 142,
439
+ 143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
440
+ 153
436
441
  };
437
442
  #endif
438
443
 
@@ -1240,217 +1245,217 @@ yyreduce:
1240
1245
  switch (yyn)
1241
1246
  {
1242
1247
  case 2:
1243
- #line 90 "src/query_parser/q_parser.y"
1248
+ #line 95 "src/q_parser.y"
1244
1249
  { qp->result = (yyval.query) = NULL; }
1245
1250
  break;
1246
1251
 
1247
1252
  case 3:
1248
- #line 91 "src/query_parser/q_parser.y"
1249
- { qp->result = (yyval.query) = get_bool_q((yyvsp[0].array)); }
1253
+ #line 96 "src/q_parser.y"
1254
+ { qp->result = (yyval.query) = get_bool_q((yyvsp[0].bclss)); }
1250
1255
  break;
1251
1256
 
1252
1257
  case 4:
1253
- #line 93 "src/query_parser/q_parser.y"
1254
- { (yyval.array) = first_cls((yyvsp[0].bcls)); }
1258
+ #line 98 "src/q_parser.y"
1259
+ { (yyval.bclss) = first_cls((yyvsp[0].bcls)); }
1255
1260
  break;
1256
1261
 
1257
1262
  case 5:
1258
- #line 94 "src/query_parser/q_parser.y"
1259
- { (yyval.array) = add_and_cls((yyvsp[-2].array), (yyvsp[0].bcls)); }
1263
+ #line 99 "src/q_parser.y"
1264
+ { (yyval.bclss) = add_and_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
1260
1265
  break;
1261
1266
 
1262
1267
  case 6:
1263
- #line 95 "src/query_parser/q_parser.y"
1264
- { (yyval.array) = add_or_cls((yyvsp[-2].array), (yyvsp[0].bcls)); }
1268
+ #line 100 "src/q_parser.y"
1269
+ { (yyval.bclss) = add_or_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
1265
1270
  break;
1266
1271
 
1267
1272
  case 7:
1268
- #line 96 "src/query_parser/q_parser.y"
1269
- { (yyval.array) = add_default_cls(qp, (yyvsp[-1].array), (yyvsp[0].bcls)); }
1273
+ #line 101 "src/q_parser.y"
1274
+ { (yyval.bclss) = add_default_cls(qp, (yyvsp[-1].bclss), (yyvsp[0].bcls)); }
1270
1275
  break;
1271
1276
 
1272
1277
  case 8:
1273
- #line 98 "src/query_parser/q_parser.y"
1278
+ #line 103 "src/q_parser.y"
1274
1279
  { (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST); }
1275
1280
  break;
1276
1281
 
1277
1282
  case 9:
1278
- #line 99 "src/query_parser/q_parser.y"
1283
+ #line 104 "src/q_parser.y"
1279
1284
  { (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST_NOT); }
1280
1285
  break;
1281
1286
 
1282
1287
  case 10:
1283
- #line 100 "src/query_parser/q_parser.y"
1288
+ #line 105 "src/q_parser.y"
1284
1289
  { (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_SHOULD); }
1285
1290
  break;
1286
1291
 
1287
1292
  case 12:
1288
- #line 103 "src/query_parser/q_parser.y"
1293
+ #line 108 "src/q_parser.y"
1289
1294
  { if ((yyvsp[-2].query)) sscanf((yyvsp[0].str),"%f",&((yyvsp[-2].query)->boost)); (yyval.query)=(yyvsp[-2].query); }
1290
1295
  break;
1291
1296
 
1292
1297
  case 14:
1293
- #line 106 "src/query_parser/q_parser.y"
1294
- { (yyval.query) = get_bool_q((yyvsp[-1].array)); }
1298
+ #line 111 "src/q_parser.y"
1299
+ { (yyval.query) = get_bool_q((yyvsp[-1].bclss)); }
1295
1300
  break;
1296
1301
 
1297
1302
  case 19:
1298
- #line 112 "src/query_parser/q_parser.y"
1303
+ #line 117 "src/q_parser.y"
1299
1304
  { FLDS((yyval.query), get_term_q(qp, field, (yyvsp[0].str))); }
1300
1305
  break;
1301
1306
 
1302
1307
  case 20:
1303
- #line 113 "src/query_parser/q_parser.y"
1308
+ #line 118 "src/q_parser.y"
1304
1309
  { FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-2].str), (yyvsp[0].str))); }
1305
1310
  break;
1306
1311
 
1307
1312
  case 21:
1308
- #line 114 "src/query_parser/q_parser.y"
1313
+ #line 119 "src/q_parser.y"
1309
1314
  { FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-1].str), NULL)); }
1310
1315
  break;
1311
1316
 
1312
1317
  case 22:
1313
- #line 116 "src/query_parser/q_parser.y"
1318
+ #line 121 "src/q_parser.y"
1314
1319
  { FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[0].str))); }
1315
1320
  break;
1316
1321
 
1317
1322
  case 23:
1318
- #line 118 "src/query_parser/q_parser.y"
1323
+ #line 123 "src/q_parser.y"
1319
1324
  { qp->fields = qp->def_fields; }
1320
1325
  break;
1321
1326
 
1322
1327
  case 24:
1323
- #line 119 "src/query_parser/q_parser.y"
1328
+ #line 124 "src/q_parser.y"
1324
1329
  { (yyval.query) = (yyvsp[-1].query); }
1325
1330
  break;
1326
1331
 
1327
1332
  case 25:
1328
- #line 120 "src/query_parser/q_parser.y"
1333
+ #line 125 "src/q_parser.y"
1329
1334
  { qp->fields = qp->all_fields; }
1330
1335
  break;
1331
1336
 
1332
1337
  case 26:
1333
- #line 120 "src/query_parser/q_parser.y"
1338
+ #line 125 "src/q_parser.y"
1334
1339
  {qp->fields = qp->def_fields;}
1335
1340
  break;
1336
1341
 
1337
1342
  case 27:
1338
- #line 121 "src/query_parser/q_parser.y"
1343
+ #line 126 "src/q_parser.y"
1339
1344
  { (yyval.query) = (yyvsp[-1].query); }
1340
1345
  break;
1341
1346
 
1342
1347
  case 28:
1343
- #line 123 "src/query_parser/q_parser.y"
1348
+ #line 128 "src/q_parser.y"
1344
1349
  { (yyval.hashset) = first_field(qp, (yyvsp[0].str)); }
1345
1350
  break;
1346
1351
 
1347
1352
  case 29:
1348
- #line 124 "src/query_parser/q_parser.y"
1353
+ #line 129 "src/q_parser.y"
1349
1354
  { (yyval.hashset) = add_field(qp, (yyvsp[0].str));}
1350
1355
  break;
1351
1356
 
1352
1357
  case 30:
1353
- #line 126 "src/query_parser/q_parser.y"
1358
+ #line 131 "src/q_parser.y"
1354
1359
  { (yyval.query) = get_phrase_q(qp, (yyvsp[-1].phrase), NULL); }
1355
1360
  break;
1356
1361
 
1357
1362
  case 31:
1358
- #line 127 "src/query_parser/q_parser.y"
1363
+ #line 132 "src/q_parser.y"
1359
1364
  { (yyval.query) = get_phrase_q(qp, (yyvsp[-3].phrase), (yyvsp[0].str)); }
1360
1365
  break;
1361
1366
 
1362
1367
  case 32:
1363
- #line 128 "src/query_parser/q_parser.y"
1368
+ #line 133 "src/q_parser.y"
1364
1369
  { (yyval.query) = NULL; }
1365
1370
  break;
1366
1371
 
1367
1372
  case 33:
1368
- #line 129 "src/query_parser/q_parser.y"
1373
+ #line 134 "src/q_parser.y"
1369
1374
  { (yyval.query) = NULL; }
1370
1375
  break;
1371
1376
 
1372
1377
  case 34:
1373
- #line 131 "src/query_parser/q_parser.y"
1378
+ #line 136 "src/q_parser.y"
1374
1379
  { (yyval.phrase) = ph_first_word((yyvsp[0].str)); }
1375
1380
  break;
1376
1381
 
1377
1382
  case 35:
1378
- #line 132 "src/query_parser/q_parser.y"
1383
+ #line 137 "src/q_parser.y"
1379
1384
  { (yyval.phrase) = ph_first_word(NULL); }
1380
1385
  break;
1381
1386
 
1382
1387
  case 36:
1383
- #line 133 "src/query_parser/q_parser.y"
1388
+ #line 138 "src/q_parser.y"
1384
1389
  { (yyval.phrase) = ph_add_word((yyvsp[-1].phrase), (yyvsp[0].str)); }
1385
1390
  break;
1386
1391
 
1387
1392
  case 37:
1388
- #line 134 "src/query_parser/q_parser.y"
1393
+ #line 139 "src/q_parser.y"
1389
1394
  { (yyval.phrase) = ph_add_word((yyvsp[-2].phrase), NULL); }
1390
1395
  break;
1391
1396
 
1392
1397
  case 38:
1393
- #line 135 "src/query_parser/q_parser.y"
1398
+ #line 140 "src/q_parser.y"
1394
1399
  { (yyval.phrase) = ph_add_multi_word((yyvsp[-2].phrase), (yyvsp[0].str)); }
1395
1400
  break;
1396
1401
 
1397
1402
  case 39:
1398
- #line 137 "src/query_parser/q_parser.y"
1403
+ #line 142 "src/q_parser.y"
1399
1404
  { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, true)); }
1400
1405
  break;
1401
1406
 
1402
1407
  case 40:
1403
- #line 138 "src/query_parser/q_parser.y"
1408
+ #line 143 "src/q_parser.y"
1404
1409
  { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, false)); }
1405
1410
  break;
1406
1411
 
1407
1412
  case 41:
1408
- #line 139 "src/query_parser/q_parser.y"
1413
+ #line 144 "src/q_parser.y"
1409
1414
  { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, true)); }
1410
1415
  break;
1411
1416
 
1412
1417
  case 42:
1413
- #line 140 "src/query_parser/q_parser.y"
1418
+ #line 145 "src/q_parser.y"
1414
1419
  { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, false)); }
1415
1420
  break;
1416
1421
 
1417
1422
  case 43:
1418
- #line 141 "src/query_parser/q_parser.y"
1423
+ #line 146 "src/q_parser.y"
1419
1424
  { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, false)); }
1420
1425
  break;
1421
1426
 
1422
1427
  case 44:
1423
- #line 142 "src/query_parser/q_parser.y"
1428
+ #line 147 "src/q_parser.y"
1424
1429
  { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, true)); }
1425
1430
  break;
1426
1431
 
1427
1432
  case 45:
1428
- #line 143 "src/query_parser/q_parser.y"
1433
+ #line 148 "src/q_parser.y"
1429
1434
  { FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,true, false)); }
1430
1435
  break;
1431
1436
 
1432
1437
  case 46:
1433
- #line 144 "src/query_parser/q_parser.y"
1438
+ #line 149 "src/q_parser.y"
1434
1439
  { FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,false, false)); }
1435
1440
  break;
1436
1441
 
1437
1442
  case 47:
1438
- #line 145 "src/query_parser/q_parser.y"
1443
+ #line 150 "src/q_parser.y"
1439
1444
  { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, false)); }
1440
1445
  break;
1441
1446
 
1442
1447
  case 48:
1443
- #line 146 "src/query_parser/q_parser.y"
1448
+ #line 151 "src/q_parser.y"
1444
1449
  { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, true)); }
1445
1450
  break;
1446
1451
 
1447
1452
  case 49:
1448
- #line 147 "src/query_parser/q_parser.y"
1453
+ #line 152 "src/q_parser.y"
1449
1454
  { FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,true, false)); }
1450
1455
  break;
1451
1456
 
1452
1457
  case 50:
1453
- #line 148 "src/query_parser/q_parser.y"
1458
+ #line 153 "src/q_parser.y"
1454
1459
  { FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,false, false)); }
1455
1460
  break;
1456
1461
 
@@ -1459,7 +1464,7 @@ yyreduce:
1459
1464
  }
1460
1465
 
1461
1466
  /* Line 1126 of yacc.c. */
1462
- #line 1463 "y.tab.c"
1467
+ #line 1468 "y.tab.c"
1463
1468
 
1464
1469
  yyvsp -= yylen;
1465
1470
  yyssp -= yylen;
@@ -1727,613 +1732,682 @@ yyreturn:
1727
1732
  }
1728
1733
 
1729
1734
 
1730
- #line 150 "src/query_parser/q_parser.y"
1735
+ #line 155 "src/q_parser.y"
1731
1736
 
1732
1737
 
1733
1738
  const char *special_char = "&:()[]{}!+\"~^-|<>=*?";
1734
1739
  const char *not_word = " \t&:()[]{}!+\"~^-|<>=";
1735
1740
 
1736
- int get_word(YYSTYPE *lvalp, QParser *qp)
1741
+ static int get_word(YYSTYPE *lvalp, QParser *qp)
1737
1742
  {
1738
- bool is_wild = false;
1739
- int len;
1740
- char c;
1741
- char *buf = qp->buf[qp->buf_index];
1742
- char *bufp = buf;
1743
- qp->buf_index = (qp->buf_index + 1) % CONC_WORDS;
1744
-
1745
- qp->qstrp--; /* need to back up one character */
1746
-
1747
- while (!strchr(not_word, (c=*qp->qstrp++))) {
1748
- switch (c) {
1749
- case '\\':
1750
- if ((c=*qp->qstrp) == ' ' && c != '\t' && c != '\0') {
1751
- *bufp++ = '\\';
1752
- } else {
1753
- *bufp++ = c;
1754
- qp->qstrp++;
1743
+ bool is_wild = false;
1744
+ int len;
1745
+ char c;
1746
+ char *buf = qp->buf[qp->buf_index];
1747
+ char *bufp = buf;
1748
+ qp->buf_index = (qp->buf_index + 1) % QP_CONC_WORDS;
1749
+
1750
+ qp->qstrp--; /* need to back up one character */
1751
+
1752
+ while (!strchr(not_word, (c=*qp->qstrp++))) {
1753
+ switch (c) {
1754
+ case '\\':
1755
+ if ((c=*qp->qstrp) == ' ' && c != '\t' && c != '\0') {
1756
+ *bufp++ = '\\';
1757
+ }
1758
+ else {
1759
+ *bufp++ = c;
1760
+ qp->qstrp++;
1761
+ }
1762
+ break;
1763
+ case '*': case '?':
1764
+ is_wild = true;
1765
+ /* fall through */
1766
+ default:
1767
+ *bufp++ = c;
1755
1768
  }
1756
- break;
1757
- case '*': case '?':
1758
- is_wild = true;
1759
- default:
1760
- *bufp++ = c;
1761
1769
  }
1762
- }
1763
- qp->qstrp--;
1764
- /* check for keywords. There are only four so we have a bit of a hack which
1765
- * just checks for all of them. */
1766
- *bufp = '\0';
1767
- len = (int)(bufp - buf);
1768
- if (len == 3) {
1769
- if (buf[0] == 'A' && buf[1] == 'N' && buf[2] == 'D') return AND;
1770
- if (buf[0] == 'N' && buf[1] == 'O' && buf[2] == 'T') return NOT;
1771
- if (buf[0] == 'R' && buf[1] == 'E' && buf[2] == 'Q') return REQ;
1772
- }
1773
- if (len == 2 && buf[0] == 'O' && buf[1] == 'R') return OR;
1774
-
1775
- /* found a word so return it. */
1776
- lvalp->str = buf;
1777
- if (is_wild) return WILD_STR;
1778
- return WORD;
1770
+ qp->qstrp--;
1771
+ /* check for keywords. There are only four so we have a bit of a hack which
1772
+ * just checks for all of them. */
1773
+ *bufp = '\0';
1774
+ len = (int)(bufp - buf);
1775
+ if (len == 3) {
1776
+ if (buf[0] == 'A' && buf[1] == 'N' && buf[2] == 'D') return AND;
1777
+ if (buf[0] == 'N' && buf[1] == 'O' && buf[2] == 'T') return NOT;
1778
+ if (buf[0] == 'R' && buf[1] == 'E' && buf[2] == 'Q') return REQ;
1779
+ }
1780
+ if (len == 2 && buf[0] == 'O' && buf[1] == 'R') return OR;
1781
+
1782
+ /* found a word so return it. */
1783
+ lvalp->str = buf;
1784
+ if (is_wild) return WILD_STR;
1785
+ return WORD;
1779
1786
  }
1780
1787
 
1781
- int yylex(YYSTYPE *lvalp, QParser *qp)
1788
+ static int yylex(YYSTYPE *lvalp, QParser *qp)
1782
1789
  {
1783
- char c, nc;
1790
+ char c, nc;
1784
1791
 
1785
- while ((c=*qp->qstrp++) == ' ' || c == '\t')
1786
- ;
1787
- if (c == '\0')
1788
- return 0;
1792
+ while ((c=*qp->qstrp++) == ' ' || c == '\t') {
1793
+ }
1789
1794
 
1790
- if (strchr(special_char, c)) { /* comment */
1791
- nc = *qp->qstrp;
1792
- switch (c) {
1793
- case '-': case '!': return NOT;
1794
- case '+': return REQ;
1795
- case '*':
1796
- if (nc == ':') return c;
1797
- break;
1798
- case '&':
1799
- if (nc == '&') {
1800
- qp->qstrp++;
1801
- return AND;
1795
+ if (c == '\0') return 0;
1796
+
1797
+ if (strchr(special_char, c)) { /* comment */
1798
+ nc = *qp->qstrp;
1799
+ switch (c) {
1800
+ case '-': case '!': return NOT;
1801
+ case '+': return REQ;
1802
+ case '*':
1803
+ if (nc == ':') return c;
1804
+ break;
1805
+ case '&':
1806
+ if (nc == '&') {
1807
+ qp->qstrp++;
1808
+ return AND;
1809
+ }
1810
+ break; /* Don't return single & character. Use in word. */
1811
+ case '|':
1812
+ if (nc == '|') {
1813
+ qp->qstrp++;
1814
+ return OR;
1815
+ }
1816
+ default:
1817
+ return c;
1802
1818
  }
1803
- break; /* Don't return single & character. Use in word. */
1804
- case '|':
1805
- if (nc == '|') {
1806
- qp->qstrp++;
1807
- return OR;
1808
- }
1809
- default:
1810
- return c;
1811
1819
  }
1812
- }
1813
1820
 
1814
- return get_word(lvalp, qp);
1821
+ return get_word(lvalp, qp);
1815
1822
  }
1816
1823
 
1817
- int yyerror(QParser *qp, char const *msg)
1824
+ static int yyerror(QParser *qp, char const *msg)
1818
1825
  {
1819
- if (!qp->handle_parse_errors) {
1820
- if (qp->clean_str) free(qp->qstr);
1821
- RAISE(PARSE_ERROR, (char *)msg);
1822
- }
1823
- return 0;
1826
+ if (!qp->handle_parse_errors) {
1827
+ char buf[1024];
1828
+ buf[1023] = '\0';
1829
+ strncpy(buf, qp->qstr, 1023);
1830
+ if (qp->clean_str) {
1831
+ free(qp->qstr);
1832
+ }
1833
+ mutex_unlock(&qp->mutex);
1834
+ RAISE(PARSE_ERROR, "couldn't parse query ``%s''. Error message "
1835
+ " was %se", buf, (char *)msg);
1836
+ }
1837
+ return 0;
1824
1838
  }
1825
1839
 
1840
+ #define BQ(query) ((BooleanQuery *)(query))
1826
1841
 
1827
- Query *get_bool_q(Array *bclauses)
1842
+ static TokenStream *get_cached_ts(QParser *qp, char *field, char *text)
1828
1843
  {
1829
- Query *q;
1830
- BooleanQuery *bq;
1831
- BooleanClause *bc;
1832
-
1833
- if (bclauses->size == 0) {
1834
- ary_destroy(bclauses);
1835
- q = NULL;
1836
- } else if (bclauses->size == 1) {
1837
- bc = (BooleanClause *)bclauses->elems[0];
1838
- q = bc->query;
1839
- free(bc);
1840
- ary_destroy(bclauses);
1841
- } else {
1842
- q = bq_create(false);
1843
- /* copy clauses into query */
1844
- bq = (BooleanQuery *)q->data;
1845
- bq->clause_cnt = bclauses->size;
1846
- bq->clause_capa = bclauses->allocated;
1847
- free(bq->clauses);
1848
- bq->clauses = (BooleanClause **)bclauses->elems;
1849
- free(bclauses);
1850
- }
1851
- return q;
1844
+ TokenStream *ts = h_get(qp->ts_cache, field);
1845
+ if (!ts) {
1846
+ ts = a_get_ts(qp->analyzer, field, text);
1847
+ h_set(qp->ts_cache, estrdup(field), ts);
1848
+ }
1849
+ else {
1850
+ ts->reset(ts, text);
1851
+ }
1852
+ return ts;
1852
1853
  }
1853
1854
 
1854
-
1855
- Array *first_cls(BooleanClause *cls)
1855
+ static char *get_cached_field(HashTable *field_cache, const char *field)
1856
1856
  {
1857
- Array *clauses = ary_create(0, NULL);
1858
- if (cls) ary_append(clauses, cls);
1859
- return clauses;
1857
+ char *cached_field = h_get(field_cache, field);
1858
+ if (!cached_field) {
1859
+ cached_field = estrdup(field);
1860
+ h_set(field_cache, cached_field, cached_field);
1861
+ }
1862
+ return cached_field;
1860
1863
  }
1861
1864
 
1862
- Array *add_and_cls(Array *clauses, BooleanClause *clause)
1865
+ static Query *get_bool_q(BCArray *bca)
1863
1866
  {
1864
- if (clause) {
1865
- BooleanClause *last_cl;
1866
- if (clauses->size == 1) {
1867
- last_cl = clauses->elems[0];
1868
- if (!last_cl->is_prohibited) bc_set_occur(last_cl, BC_MUST);
1869
- }
1867
+ Query *q;
1868
+ const int clause_count = bca->size;
1870
1869
 
1871
- if (!clause->is_prohibited) bc_set_occur(clause, BC_MUST);
1872
- ary_append(clauses, clause);
1873
- }
1874
- return clauses;
1870
+ if (clause_count == 0) {
1871
+ q = NULL;
1872
+ free(bca->clauses);
1873
+ }
1874
+ else if (clause_count == 1) {
1875
+ BooleanClause *bc = bca->clauses[0];
1876
+ q = bc->query;
1877
+ free(bc);
1878
+ free(bca->clauses);
1879
+ }
1880
+ else {
1881
+ q = bq_new(false);
1882
+ /* copy clauses into query */
1883
+
1884
+ BQ(q)->clause_cnt = clause_count;
1885
+ BQ(q)->clause_capa = bca->capa;
1886
+ free(BQ(q)->clauses);
1887
+ BQ(q)->clauses = bca->clauses;
1888
+ }
1889
+ free(bca);
1890
+ return q;
1875
1891
  }
1876
1892
 
1877
- Array *add_or_cls(Array *clauses, BooleanClause *clause)
1893
+ static void bca_add_clause(BCArray *bca, BooleanClause *clause)
1878
1894
  {
1879
- if (clause) ary_append(clauses, clause);
1880
- return clauses;
1895
+ if (bca->size >= bca->capa) {
1896
+ bca->capa <<= 1;
1897
+ REALLOC_N(bca->clauses, BooleanClause *, bca->capa);
1898
+ }
1899
+ bca->clauses[bca->size] = clause;
1900
+ bca->size++;
1881
1901
  }
1882
1902
 
1883
- Array *add_default_cls(QParser *qp, Array *clauses, BooleanClause *clause)
1903
+ static BCArray *first_cls(BooleanClause *clause)
1884
1904
  {
1885
- if (qp->or_default) {
1886
- add_or_cls(clauses, clause);
1887
- } else {
1888
- add_and_cls(clauses, clause);
1889
- }
1890
- return clauses;
1905
+ BCArray *bca = ALLOC_AND_ZERO(BCArray);
1906
+ bca->capa = BCA_INIT_CAPA;
1907
+ bca->clauses = ALLOC_N(BooleanClause *, BCA_INIT_CAPA);
1908
+ if (clause) {
1909
+ bca_add_clause(bca, clause);
1910
+ }
1911
+ return bca;
1891
1912
  }
1892
1913
 
1893
- BooleanClause *get_bool_cls(Query *q, unsigned int occur)
1914
+ static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause)
1894
1915
  {
1895
- if (q) return bc_create(q, occur);
1896
- else return NULL;
1916
+ if (clause) {
1917
+ if (bca->size == 1) {
1918
+ if (!bca->clauses[0]->is_prohibited) {
1919
+ bc_set_occur(bca->clauses[0], BC_MUST);
1920
+ }
1921
+ }
1922
+ if (!clause->is_prohibited) {
1923
+ bc_set_occur(clause, BC_MUST);
1924
+ }
1925
+ bca_add_clause(bca, clause);
1926
+ }
1927
+ return bca;
1897
1928
  }
1898
1929
 
1899
- Query *get_term_q(QParser *qp, char *field, char *word)
1930
+ static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause)
1900
1931
  {
1901
- Query *q;
1902
- Token *token;
1903
- TokenStream *stream = a_get_ts(qp->analyzer, field, word);
1904
-
1905
- if ((token = ts_next(stream)) == NULL) {
1906
- q = NULL;
1907
- } else {
1908
- Term *term = term_create(field, token->text);
1909
- if ((token = ts_next(stream)) == NULL) {
1910
- q = tq_create(term);
1911
- } else {
1912
- q = phq_create();
1913
- phq_add_term(q, term, 0);
1914
- do {
1915
- phq_add_term(q, term_create(field, token->text), token->pos_inc);
1916
- } while ((token = ts_next(stream)) != NULL);
1932
+ if (clause) {
1933
+ bca_add_clause(bca, clause);
1917
1934
  }
1918
- }
1919
- return q;
1935
+ return bca;
1920
1936
  }
1921
1937
 
1922
- Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
1938
+ static BCArray *add_default_cls(QParser *qp, BCArray *bca,
1939
+ BooleanClause *clause)
1923
1940
  {
1924
- Query *q;
1925
- Token *token;
1926
- TokenStream *stream = a_get_ts(qp->analyzer, field, word);
1927
-
1928
- if ((token = ts_next(stream)) == NULL) {
1929
- q = NULL;
1930
- } else {
1931
- /* it only makes sense to find one term in a fuzzy query */
1932
- Term *term = term_create(field, token->text);
1933
- if (slop_str) {
1934
- float slop;
1935
- sscanf(slop_str, "%f", &slop);
1936
- q = fuzq_create_mp(term, slop, DEF_PRE_LEN);
1937
- } else {
1938
- q = fuzq_create(term);
1941
+ if (qp->or_default) {
1942
+ add_or_cls(bca, clause);
1943
+ }
1944
+ else {
1945
+ add_and_cls(bca, clause);
1939
1946
  }
1940
- }
1941
- return q;
1947
+ return bca;
1942
1948
  }
1943
1949
 
1944
- Query *get_wild_q(QParser *qp, char *field, char *pattern)
1950
+ static BooleanClause *get_bool_cls(Query *q, unsigned int occur)
1945
1951
  {
1946
- Query *q;
1947
- bool is_prefix = false;
1948
- char *p;
1949
- int len = (int)strlen(pattern);
1950
-
1951
- if (qp->wild_lower) lower_str(pattern);
1952
-
1953
- /* simplify the wildcard query to a prefix query if possible. Basically a
1954
- * prefix query is any wildcard query that has a '*' as the last character
1955
- * and no other wildcard characters before it. */
1956
- if (pattern[len-1] == '*') {
1957
- is_prefix = true;
1958
- for (p = &pattern[len-2]; p >= pattern; p--) {
1959
- if (*p == '*' || *p == '?') {
1960
- is_prefix = false;
1961
- break;
1962
- }
1952
+ if (q) {
1953
+ return bc_new(q, occur);
1954
+ }
1955
+ else {
1956
+ return NULL;
1963
1957
  }
1964
- }
1965
-
1966
- if (is_prefix) {
1967
- /* chop off the '*' temporarily to create the query */
1968
- pattern[len-1] = 0;
1969
- q = prefixq_create(term_create(field, pattern));;
1970
- pattern[len-1] = '*';
1971
- } else {
1972
- q = wcq_create(term_create(field, pattern));;
1973
- }
1974
- return q;
1975
1958
  }
1976
1959
 
1977
- HashSet *add_field(QParser *qp, char *field)
1960
+ static Query *get_term_q(QParser *qp, char *field, char *word)
1978
1961
  {
1979
- char *orig_field;
1980
- if ((orig_field = hs_orig(qp->all_fields, field)) != NULL) {
1981
- hs_add(qp->fields, orig_field);
1982
- } else if (qp->allow_any_fields) {
1983
- field = estrdup(field);
1984
- hs_add(qp->all_fields, field);
1985
- hs_add(qp->fields, field);
1986
- }
1987
- return qp->fields;
1962
+ Query *q;
1963
+ Token *token;
1964
+ TokenStream *stream = get_cached_ts(qp, field, word);
1965
+
1966
+ if ((token = ts_next(stream)) == NULL) {
1967
+ q = NULL;
1968
+ }
1969
+ else {
1970
+ q = tq_new(field, token->text);
1971
+ if ((token = ts_next(stream)) != NULL) {
1972
+ /* Less likely case, destroy the term query and create a
1973
+ * phrase query instead */
1974
+ Query *phq = phq_new(field);
1975
+ phq_add_term(phq, ((TermQuery *)q)->term, 0);
1976
+ q->destroy_i(q);
1977
+ q = phq;
1978
+ do {
1979
+ phq_add_term(q, token->text, token->pos_inc);
1980
+ } while ((token = ts_next(stream)) != NULL);
1981
+ }
1982
+ }
1983
+ return q;
1988
1984
  }
1989
1985
 
1990
- HashSet *first_field(QParser *qp, char *field)
1986
+ static Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
1991
1987
  {
1992
- qp->fields = qp->fields_buf;
1993
- qp->fields->size = 0;
1994
- h_clear(qp->fields->ht);
1995
- return add_field(qp, field);
1988
+ Query *q;
1989
+ Token *token;
1990
+ TokenStream *stream = get_cached_ts(qp, field, word);
1991
+
1992
+ if ((token = ts_next(stream)) == NULL) {
1993
+ q = NULL;
1994
+ }
1995
+ else {
1996
+ /* it only makes sense to find one term in a fuzzy query */
1997
+ float slop = DEF_MIN_SIM;
1998
+ if (slop_str) {
1999
+ sscanf(slop_str, "%f", &slop);
2000
+ }
2001
+ q = fuzq_new_conf(field, token->text, slop, DEF_PRE_LEN,
2002
+ qp->max_clauses);
2003
+ }
2004
+ return q;
1996
2005
  }
1997
2006
 
1998
- void ph_destroy(Phrase *self)
2007
+ static char *lower_str(char *str)
1999
2008
  {
2000
- int i, j;
2001
- for (i = 0; i < self->cnt; i++) {
2002
- for (j = 0; j < self->w_cnt[i]; j++) {
2003
- free(self->words[i][j]);
2009
+ const int max_len = (int)strlen(str) + 1;
2010
+ int cnt;
2011
+ wchar_t *wstr = ALLOC_N(wchar_t, max_len);
2012
+ if ((cnt = mbstowcs(wstr, str, max_len)) > 0) {
2013
+ wchar_t *w = wstr;
2014
+ while (*w) {
2015
+ *w = towlower(*w);
2016
+ w++;
2017
+ }
2018
+ wcstombs(str, wstr, max_len);
2004
2019
  }
2005
- free(self->words[i]);
2006
- }
2007
- free(self->words);
2008
- free(self->w_cnt);
2009
- free(self->w_capa);
2010
- free(self);
2020
+ else {
2021
+ char *s = str;
2022
+ while (*s) {
2023
+ *s = tolower(*s);
2024
+ s++;
2025
+ }
2026
+ }
2027
+ free(wstr);
2028
+ str[max_len] = '\0';
2029
+ return str;
2011
2030
  }
2012
2031
 
2013
-
2014
- Phrase *ph_create()
2032
+ static Query *get_wild_q(QParser *qp, char *field, char *pattern)
2015
2033
  {
2016
- Phrase *self = ALLOC(Phrase);
2017
- self->cnt = 0;
2018
- self->capa = PHRASE_INIT_CAPA;
2019
- self->words = ALLOC_N(char **, PHRASE_INIT_CAPA);
2020
- self->w_cnt = ALLOC_N(int, PHRASE_INIT_CAPA);
2021
- self->w_capa = ALLOC_N(int, PHRASE_INIT_CAPA);
2022
- return self;
2034
+ Query *q;
2035
+ bool is_prefix = false;
2036
+ char *p;
2037
+ int len = (int)strlen(pattern);
2038
+
2039
+ if (qp->wild_lower) {
2040
+ lower_str(pattern);
2041
+ }
2042
+
2043
+ /* simplify the wildcard query to a prefix query if possible. Basically a
2044
+ * prefix query is any wildcard query that has a '*' as the last character
2045
+ * and no other wildcard characters before it. */
2046
+ if (pattern[len - 1] == '*') {
2047
+ is_prefix = true;
2048
+ for (p = &pattern[len - 2]; p >= pattern; p--) {
2049
+ if (*p == '*' || *p == '?') {
2050
+ is_prefix = false;
2051
+ break;
2052
+ }
2053
+ }
2054
+ }
2055
+ if (is_prefix) {
2056
+ /* chop off the '*' temporarily to create the query */
2057
+ pattern[len - 1] = 0;
2058
+ q = prefixq_new(field, pattern);
2059
+ pattern[len - 1] = '*';
2060
+ }
2061
+ else {
2062
+ q = wcq_new(field, pattern);
2063
+ }
2064
+ MTQMaxTerms(q) = qp->max_clauses;
2065
+ return q;
2023
2066
  }
2024
2067
 
2025
- Phrase *ph_first_word(char *word)
2068
+ static HashSet *add_field(QParser *qp, char *field)
2026
2069
  {
2027
- Phrase *self = ph_create();
2028
- if (word) { /* no point in adding NULL in start */
2029
- self->words[0] = ALLOC(char *);
2030
- self->words[0][0] = estrdup(word);
2031
- self->w_cnt[0] = self->w_capa[0] = 1;
2032
- self->cnt = 1;
2033
- }
2034
- return self;
2070
+ if (qp->allow_any_fields || hs_exists(qp->all_fields, field)) {
2071
+ hs_add(qp->fields, get_cached_field(qp->field_cache, field));
2072
+ }
2073
+ return qp->fields;
2035
2074
  }
2036
2075
 
2037
- Phrase *ph_add_word(Phrase *self, char *word)
2076
+ static HashSet *first_field(QParser *qp, char *field)
2038
2077
  {
2039
- int i;
2040
- if (self->cnt == self->capa) {
2041
- self->capa <<= 1;
2042
- REALLOC_N(self->words, char **, self->capa);
2043
- REALLOC_N(self->w_cnt, int, self->capa);
2044
- REALLOC_N(self->w_capa, int, self->capa);
2045
- }
2046
- i = self->cnt;
2047
- self->cnt++;
2048
- self->words[i] = ALLOC(char *);
2049
- self->words[i][0] = word ? estrdup(word) : NULL;
2050
- self->w_cnt[i] = self->w_capa[i] = 1;
2051
- return self;
2078
+ qp->fields = qp->fields_buf;
2079
+ qp->fields->size = 0;
2080
+ h_clear(qp->fields->ht);
2081
+ return add_field(qp, field);
2052
2082
  }
2053
2083
 
2054
- Phrase *ph_add_multi_word(Phrase *self, char *word)
2084
+ static void ph_destroy(Phrase *self)
2055
2085
  {
2056
- int i = self->cnt - 1;
2086
+ int i;
2087
+ for (i = 0; i < self->size; i++) {
2088
+ ary_destroy(self->positions[i].terms, &free);
2089
+ }
2090
+ free(self->positions);
2091
+ free(self);
2092
+ }
2057
2093
 
2058
- if (!word) return self; /* no point in adding NULL in multi */
2059
2094
 
2060
- if (self->w_cnt[i] >= self->w_capa[i]) {
2061
- self->w_capa[i] <<= 1;
2062
- REALLOC_N(self->words[i], char *, self->w_capa[i]);
2063
- }
2064
- self->words[i][self->w_cnt[i]] = estrdup(word);
2065
- self->w_cnt[i]++;
2095
+ static Phrase *ph_new()
2096
+ {
2097
+ Phrase *self = ALLOC_AND_ZERO(Phrase);
2098
+ self->capa = PHRASE_INIT_CAPA;
2099
+ self->positions = ALLOC_AND_ZERO_N(PhrasePosition, PHRASE_INIT_CAPA);
2066
2100
  return self;
2067
2101
  }
2068
2102
 
2069
- Query *get_normal_phrase_query(QParser *qp, char *field, Phrase *phrase, int slop)
2103
+ static Phrase *ph_first_word(char *word)
2070
2104
  {
2071
- int pos_inc = 0;
2072
- int i;
2073
- Token *token;
2074
- TokenStream *stream;
2075
- char *word;
2076
-
2077
- Query *pq = phq_create();
2078
- ((PhraseQuery *)pq->data)->slop = slop;
2079
-
2080
- for (i = 0; i < phrase->cnt; i++) {
2081
- word = phrase->words[i][0];
2082
- if (!word) {
2083
- pos_inc++;
2084
- } else {
2085
- stream = a_get_ts(qp->analyzer, field, word);
2086
- while ((token = ts_next(stream))) {
2087
- phq_add_term(pq, term_create(field, token->text),
2088
- token->pos_inc + pos_inc);
2089
- pos_inc = 0;
2090
- }
2105
+ Phrase *self = ph_new();
2106
+ if (word) { /* no point in adding NULL in start */
2107
+ self->positions[0].terms = ary_new_type_capa(char *, 1);
2108
+ ary_push(self->positions[0].terms, estrdup(word));
2109
+ self->size = 1;
2091
2110
  }
2092
- }
2093
- return pq;
2111
+ return self;
2094
2112
  }
2095
2113
 
2096
- Query *get_multi_phrase_query(QParser *qp, char *field, Phrase *phrase, int slop)
2114
+ static Phrase *ph_add_word(Phrase *self, char *word)
2097
2115
  {
2098
- int i, j;
2099
- int pos_inc = 0;
2100
- Token *token;
2101
- TokenStream *stream;
2102
- char *word;
2103
- Term **terms = NULL;
2104
- int t_cnt;
2105
-
2106
- Query *mpq = mphq_create();
2107
- ((MultiPhraseQuery *)mpq->data)->slop = slop;
2108
-
2109
- for (i = 0; i < phrase->cnt; i++) {
2110
- word = phrase->words[i][0];
2111
- if (!word) {
2112
- pos_inc++;
2113
- } else {
2114
- t_cnt = phrase->w_cnt[i];
2115
- if (t_cnt > 1) {
2116
- terms = ALLOC_N(Term *, t_cnt);
2117
- for (j = 0; j < t_cnt; j++) {
2118
- word = phrase->words[i][j];
2119
- stream = a_get_ts(qp->analyzer, field, word);
2120
- if ((token = ts_next(stream))) {
2121
- terms[j] = term_create(field, token->text);
2122
- } else {
2123
- t_cnt--; j--;
2124
- }
2125
- }
2126
- /* must advance at least one */
2127
- mphq_add_terms(mpq, terms, t_cnt, pos_inc + 1);
2128
- } else {
2129
- stream = a_get_ts(qp->analyzer, field, word);
2130
- while ((token = ts_next(stream))) {
2131
- terms = ALLOC(Term *);
2132
- terms[0] = term_create(field, token->text);
2133
- mphq_add_terms(mpq, terms, 1, token->pos_inc + pos_inc);
2134
- pos_inc = 0;
2116
+ if (word) {
2117
+ const int index = self->size;
2118
+ PhrasePosition *pp = self->positions;
2119
+ if (index >= self->capa) {
2120
+ self->capa <<= 1;
2121
+ REALLOC_N(pp, PhrasePosition, self->capa);
2122
+ self->positions = pp;
2135
2123
  }
2136
- }
2124
+ pp[index].pos = self->pos_inc;
2125
+ pp[index].terms = ary_new_type_capa(char *, 1);
2126
+ ary_push(pp[index].terms, estrdup(word));
2127
+ self->size++;
2128
+ self->pos_inc = 0;
2129
+ }
2130
+ else {
2131
+ self->pos_inc++;
2137
2132
  }
2138
- }
2139
- return mpq;
2133
+ return self;
2140
2134
  }
2141
2135
 
2142
- Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop_str)
2136
+ static Phrase *ph_add_multi_word(Phrase *self, char *word)
2143
2137
  {
2144
- Query *q;
2145
- int i, j;
2146
- int slop;
2147
-
2148
- if (phrase->cnt == 0) {
2149
- q = NULL;
2150
- } else if (phrase->cnt == 1) {
2151
- if (phrase->w_cnt[0] == 1) {
2152
- FLDS(q, get_term_q(qp, field, phrase->words[0][0]));
2153
- } else {
2154
- Query *bq;
2155
- q = bq_create(false);
2156
- for (j = 0; j < phrase->w_cnt[0]; j++) {
2157
- FLDS(bq, tq_create(term_create(field, phrase->words[0][j])));
2158
- if (bq) bq_add_query(q, bq, BC_SHOULD);
2159
- }
2138
+ const int index = self->size - 1;
2139
+ PhrasePosition *pp = self->positions;
2140
+
2141
+ if (word) {
2142
+ ary_push(pp[index].terms, estrdup(word));
2160
2143
  }
2161
- } else {
2162
- bool multi_phrase = false;
2163
- for (i = 0; i < phrase->cnt; i++) {
2164
- if (phrase->w_cnt[i] > 1) multi_phrase = true;
2144
+ return self;
2145
+ }
2146
+
2147
+ static Query *get_phrase_query(QParser *qp, char *field,
2148
+ Phrase *phrase, char *slop_str)
2149
+ {
2150
+ const int pos_cnt = phrase->size;
2151
+ Query *q = NULL;
2152
+
2153
+ if (pos_cnt == 1) {
2154
+ char **words = phrase->positions[0].terms;
2155
+ const int word_count = ary_size(words);
2156
+ if (word_count == 1) {
2157
+ q = get_term_q(qp, field, words[0]);
2158
+ }
2159
+ else {
2160
+ int i;
2161
+ q = bq_new(false);
2162
+ for (i = 0; i < word_count; i++) {
2163
+ bq_add_query_nr(q, get_term_q(qp, field, words[i]), BC_SHOULD);
2164
+ }
2165
+ }
2165
2166
  }
2166
- slop = qp->def_slop;
2167
- if (slop_str) sscanf(slop_str, "%d", &slop);
2168
- if (multi_phrase) {
2169
- FLDS(q, get_multi_phrase_query(qp, field, phrase, slop));
2170
- } else {
2171
- FLDS(q, get_normal_phrase_query(qp, field, phrase, slop));
2167
+ else if (pos_cnt > 1) {
2168
+ Token *token;
2169
+ TokenStream *stream;
2170
+ int i, j;
2171
+ q = phq_new(field);
2172
+ if (slop_str) {
2173
+ int slop;
2174
+ sscanf(slop_str,"%d",&slop);
2175
+ ((PhraseQuery *)q)->slop = slop;
2176
+ }
2177
+
2178
+ for (i = 0; i < pos_cnt; i++) {
2179
+ int pos_inc = phrase->positions[i].pos; /* Actually holds pos_inc */
2180
+ char **words = phrase->positions[i].terms;
2181
+ const int word_count = ary_size(words);
2182
+
2183
+ if (word_count == 1) {
2184
+ stream = get_cached_ts(qp, field, words[0]);
2185
+ while ((token = ts_next(stream))) {
2186
+ phq_add_term(q, token->text, token->pos_inc + pos_inc);
2187
+ pos_inc = 0;
2188
+ }
2189
+ }
2190
+ else {
2191
+ bool added_position = false;
2192
+
2193
+ for (j = 0; j < word_count; j++) {
2194
+ stream = get_cached_ts(qp, field, words[j]);
2195
+ if ((token = ts_next(stream))) {
2196
+ if (!added_position) {
2197
+ phq_add_term(q, token->text, token->pos_inc + pos_inc);
2198
+ added_position = true;
2199
+ }
2200
+ else {
2201
+ phq_append_multi_term(q, token->text);
2202
+ }
2203
+ }
2204
+ }
2205
+ }
2206
+ }
2172
2207
  }
2173
- }
2174
- ph_destroy(phrase);
2175
- return q;
2208
+ return q;
2209
+ }
2210
+
2211
+ static Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop_str)
2212
+ {
2213
+ Query *q;
2214
+ FLDS(q, get_phrase_query(qp, field, phrase, slop_str));
2215
+ ph_destroy(phrase);
2216
+ return q;
2176
2217
  }
2177
2218
 
2178
- Query *get_range_q(char *field, char *from, char *to, bool inc_lower, bool inc_upper)
2219
+ static Query *get_range_q(const char *field, const char *from, const char *to,
2220
+ bool inc_lower, bool inc_upper)
2179
2221
  {
2180
- return rq_create(field, from, to, inc_lower, inc_upper);
2222
+ return rq_new(field, from, to, inc_lower, inc_upper);
2181
2223
  }
2182
2224
 
2183
2225
  void qp_destroy(QParser *self)
2184
2226
  {
2185
- if (self->close_def_fields) hs_destroy_all(self->def_fields);
2186
- hs_destroy_all(self->all_fields);
2187
- hs_destroy(self->fields_buf);
2188
- a_deref(self->analyzer);
2189
- free(self);
2227
+ if (self->close_def_fields) {
2228
+ hs_destroy(self->def_fields);
2229
+ }
2230
+ hs_destroy(self->all_fields);
2231
+ hs_destroy(self->fields_buf);
2232
+ h_destroy(self->field_cache);
2233
+ h_destroy(self->ts_cache);
2234
+ a_deref(self->analyzer);
2235
+ free(self);
2190
2236
  }
2191
2237
 
2192
- QParser *qp_create(HashSet *all_fields, HashSet *def_fields, Analyzer *analyzer)
2238
+ QParser *qp_new(HashSet *all_fields, HashSet *def_fields, Analyzer *analyzer)
2193
2239
  {
2194
- int i;
2195
- QParser *self = ALLOC(QParser);
2196
- self->or_default = true;
2197
- self->wild_lower = true;
2198
- self->clean_str = false;
2199
- self->handle_parse_errors = false;
2200
- self->allow_any_fields = false;
2201
- self->def_slop = 0;
2202
- self->fields_buf = hs_str_create(NULL);
2203
- self->all_fields = all_fields;
2204
- if (def_fields) {
2205
- self->def_fields = def_fields;
2206
- for (i = 0; i < self->def_fields->size; i++) {
2207
- if (!hs_exists(self->all_fields, self->def_fields->elems[i])) {
2208
- hs_add(self->all_fields, estrdup(self->def_fields->elems[i]));
2209
- }
2240
+ int i;
2241
+ QParser *self = ALLOC(QParser);
2242
+ self->or_default = true;
2243
+ self->wild_lower = true;
2244
+ self->clean_str = false;
2245
+ self->max_clauses = QP_MAX_CLAUSES;
2246
+ self->handle_parse_errors = false;
2247
+ self->allow_any_fields = false;
2248
+ self->def_slop = 0;
2249
+ self->fields_buf = hs_new_str(NULL);
2250
+ self->all_fields = all_fields;
2251
+ if (def_fields) {
2252
+ self->def_fields = def_fields;
2253
+ for (i = 0; i < self->def_fields->size; i++) {
2254
+ if (!hs_exists(self->all_fields, self->def_fields->elems[i])) {
2255
+ hs_add(self->all_fields, estrdup(self->def_fields->elems[i]));
2256
+ }
2257
+ }
2258
+ self->close_def_fields = true;
2210
2259
  }
2211
- self->close_def_fields = true;
2212
- } else {
2213
- self->def_fields = all_fields;
2214
- self->close_def_fields = false;
2215
- }
2216
- self->fields = self->def_fields;
2217
- /* make sure all_fields contains the default fields */
2218
- self->analyzer = analyzer;
2219
- self->buf_index = 0;
2220
- return self;
2260
+ else {
2261
+ self->def_fields = all_fields;
2262
+ self->close_def_fields = false;
2263
+ }
2264
+ self->field_cache = h_new_str((free_ft)NULL, &free);
2265
+ for (i = 0; i < self->all_fields->size; i++) {
2266
+ char *field = estrdup(self->all_fields->elems[i]);
2267
+ h_set(self->field_cache, field, field);
2268
+ }
2269
+ self->fields = self->def_fields;
2270
+ /* make sure all_fields contains the default fields */
2271
+ self->analyzer = analyzer;
2272
+ self->ts_cache = h_new_str(&free, (free_ft)&ts_deref);
2273
+ self->buf_index = 0;
2274
+ mutex_init(&self->mutex, NULL);
2275
+ return self;
2221
2276
  }
2222
2277
 
2223
2278
  /* these chars have meaning within phrases */
2224
2279
  static const char *PHRASE_CHARS = "<>|\"";
2225
2280
 
2226
- void str_insert(char *str, int len, char chr)
2281
+ static void str_insert(char *str, int len, char chr)
2227
2282
  {
2228
- memmove(str+1, str, len*sizeof(char));
2229
- *str = chr;
2283
+ memmove(str+1, str, len*sizeof(char));
2284
+ *str = chr;
2230
2285
  }
2231
2286
 
2232
2287
  char *qp_clean_str(char *str)
2233
2288
  {
2234
- int b, pb = -1;
2235
- int br_cnt = 0;
2236
- bool quote_open = false;
2237
- char *sp, *nsp;
2238
-
2239
- /* leave a little extra */
2240
- char *new_str = ALLOC_N(char, strlen(str)*2 + 1);
2241
-
2242
- for (sp = str, nsp = new_str; *sp; sp++) {
2243
- b = *sp;
2244
- /* ignore escaped characters */
2245
- if (pb == '\\') {
2246
- if (quote_open && strrchr(PHRASE_CHARS, b)) {
2247
- *nsp++ = '\\'; /* this was left off the first time through */
2248
- }
2249
-
2250
- *nsp++ = b;
2251
- /* \\ has escaped itself so has no power. Assign pb random char : */
2252
- pb = ((b == '\\') ? ':' : b);
2253
- continue;
2254
- }
2255
- switch (b) {
2256
- case '\\':
2257
- if (!quote_open) /* We do our own escaping below */
2258
- *nsp++ = b;
2259
- break;
2260
- case '"':
2261
- quote_open = !quote_open;
2262
- *nsp++ = b;
2263
- break;
2264
- case '(':
2265
- if (!quote_open) {
2266
- br_cnt++;
2267
- } else {
2268
- *nsp++ = '\\';
2269
- }
2270
- *nsp++ = b;
2271
- break;
2272
- case ')':
2273
- if (!quote_open) {
2274
- if (br_cnt == 0) {
2275
- str_insert(new_str, (int)(nsp - new_str), '(');
2276
- nsp++;
2277
- } else {
2278
- br_cnt--;
2279
- }
2280
- } else {
2281
- *nsp++ = '\\';
2282
- }
2283
- *nsp++ = b;
2284
- break;
2285
- case '>':
2286
- if (quote_open) {
2287
- if (pb == '<') {
2288
- /* remove the escape character */
2289
- nsp--;
2290
- nsp[-1] = '<';
2291
- } else {
2292
- *nsp++ = '\\';
2293
- }
2289
+ int b, pb = -1;
2290
+ int br_cnt = 0;
2291
+ bool quote_open = false;
2292
+ char *sp, *nsp;
2293
+
2294
+ /* leave a little extra */
2295
+ char *new_str = ALLOC_N(char, strlen(str)*2 + 1);
2296
+
2297
+ for (sp = str, nsp = new_str; *sp; sp++) {
2298
+ b = *sp;
2299
+ /* ignore escaped characters */
2300
+ if (pb == '\\') {
2301
+ if (quote_open && strrchr(PHRASE_CHARS, b)) {
2302
+ *nsp++ = '\\'; /* this was left off the first time through */
2303
+ }
2304
+ *nsp++ = b;
2305
+ /* \\ has escaped itself so has no power. Assign pb random char : */
2306
+ pb = ((b == '\\') ? ':' : b);
2307
+ continue;
2294
2308
  }
2295
- *nsp++ = b;
2296
- break;
2297
- default:
2298
- if (quote_open) {
2299
- if (strrchr(special_char, b) && b != '|') {
2300
- *nsp++ = '\\';
2301
- }
2309
+ switch (b) {
2310
+ case '\\':
2311
+ if (!quote_open) { /* We do our own escaping below */
2312
+ *nsp++ = b;
2313
+ }
2314
+ break;
2315
+ case '"':
2316
+ quote_open = !quote_open;
2317
+ *nsp++ = b;
2318
+ break;
2319
+ case '(':
2320
+ if (!quote_open) {
2321
+ br_cnt++;
2322
+ }
2323
+ else {
2324
+ *nsp++ = '\\';
2325
+ }
2326
+ *nsp++ = b;
2327
+ break;
2328
+ case ')':
2329
+ if (!quote_open) {
2330
+ if (br_cnt == 0) {
2331
+ str_insert(new_str, (int)(nsp - new_str), '(');
2332
+ nsp++;
2333
+ }
2334
+ else {
2335
+ br_cnt--;
2336
+ }
2337
+ }
2338
+ else {
2339
+ *nsp++ = '\\';
2340
+ }
2341
+ *nsp++ = b;
2342
+ break;
2343
+ case '>':
2344
+ if (quote_open) {
2345
+ if (pb == '<') {
2346
+ /* remove the escape character */
2347
+ nsp--;
2348
+ nsp[-1] = '<';
2349
+ }
2350
+ else {
2351
+ *nsp++ = '\\';
2352
+ }
2353
+ }
2354
+ *nsp++ = b;
2355
+ break;
2356
+ default:
2357
+ if (quote_open) {
2358
+ if (strrchr(special_char, b) && b != '|') {
2359
+ *nsp++ = '\\';
2360
+ }
2361
+ }
2362
+ *nsp++ = b;
2302
2363
  }
2303
- *nsp++ = b;
2364
+ pb = b;
2365
+ }
2366
+ if (quote_open) {
2367
+ *nsp++ = '"';
2304
2368
  }
2305
- pb = b;
2306
- }
2307
- if (quote_open) *nsp++ = '"';
2308
- for (;br_cnt > 0; br_cnt--) {
2309
- *nsp++ = ')';
2310
- }
2311
- *nsp = '\0';
2312
- return new_str;
2369
+ for (;br_cnt > 0; br_cnt--) {
2370
+ *nsp++ = ')';
2371
+ }
2372
+ *nsp = '\0';
2373
+ return new_str;
2313
2374
  }
2314
2375
 
2315
2376
  Query *qp_get_bad_query(QParser *qp, char *str)
2316
2377
  {
2317
- Query *q;
2318
- FLDS(q, get_term_q(qp, field, str));
2319
- return q;
2378
+ Query *q;
2379
+ FLDS(q, get_term_q(qp, field, str));
2380
+ return q;
2320
2381
  }
2321
2382
 
2322
2383
  Query *qp_parse(QParser *self, char *qstr)
2323
2384
  {
2324
- if (self->clean_str) {
2325
- self->qstrp = self->qstr = qp_clean_str(qstr);
2326
- } else {
2327
- self->qstrp = self->qstr = qstr;
2328
- }
2329
- self->fields = self->def_fields;
2330
- self->result = NULL;
2331
- yyparse(self);
2332
- if (!self->result && self->handle_parse_errors)
2333
- self->result = qp_get_bad_query(self, self->qstr);
2334
- if (!self->result) self->result = bq_create(false);
2335
- if (self->clean_str) free(self->qstr);
2336
- return self->result;
2385
+ Query *result;
2386
+ mutex_lock(&self->mutex);
2387
+ if (self->clean_str) {
2388
+ self->qstrp = self->qstr = qp_clean_str(qstr);
2389
+ }
2390
+ else {
2391
+ self->qstrp = self->qstr = qstr;
2392
+ }
2393
+ self->fields = self->def_fields;
2394
+ self->result = NULL;
2395
+
2396
+ yyparse(self);
2397
+
2398
+ result = self->result;
2399
+ if (!result && self->handle_parse_errors) {
2400
+ result = qp_get_bad_query(self, self->qstr);
2401
+ }
2402
+ if (!result) {
2403
+ result = bq_new(false);
2404
+ }
2405
+ if (self->clean_str) {
2406
+ free(self->qstr);
2407
+ }
2408
+
2409
+ mutex_unlock(&self->mutex);
2410
+ return result;
2337
2411
  }
2338
2412
 
2339
2413