ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/q_parser.c CHANGED
@@ -80,19 +80,28 @@
80
80
 
81
81
 
82
82
  /* Copy the first part of user declarations. */
83
- #line 1 "src/query_parser/q_parser.y"
83
+ #line 1 "src/q_parser.y"
84
84
 
85
85
  #include <string.h>
86
+ #include <ctype.h>
87
+ #include <wctype.h>
86
88
  #include "search.h"
89
+ #include "array.h"
87
90
 
88
91
  typedef struct Phrase {
89
- int cnt;
90
- int capa;
91
- char ***words;
92
- int *w_cnt;
93
- int *w_capa;
92
+ int size;
93
+ int capa;
94
+ int pos_inc;
95
+ PhrasePosition *positions;
94
96
  } Phrase;
95
97
 
98
+ #define BCA_INIT_CAPA 4
99
+ typedef struct BCArray {
100
+ int size;
101
+ int capa;
102
+ BooleanClause **clauses;
103
+ } BCArray;
104
+
96
105
 
97
106
 
98
107
  /* Enabling traces. */
@@ -114,17 +123,17 @@ typedef struct Phrase {
114
123
  #endif
115
124
 
116
125
  #if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
117
- #line 14 "src/query_parser/q_parser.y"
126
+ #line 23 "src/q_parser.y"
118
127
  typedef union YYSTYPE {
119
- Query *query;
120
- BooleanClause *bcls;
121
- Array *array;
122
- HashSet *hashset;
123
- Phrase *phrase;
124
- char *str;
128
+ Query *query;
129
+ BooleanClause *bcls;
130
+ BCArray *bclss;
131
+ HashSet *hashset;
132
+ Phrase *phrase;
133
+ char *str;
125
134
  } YYSTYPE;
126
135
  /* Line 196 of yacc.c. */
127
- #line 128 "y.tab.c"
136
+ #line 137 "y.tab.c"
128
137
  # define yystype YYSTYPE /* obsolescent; will be withdrawn */
129
138
  # define YYSTYPE_IS_DECLARED 1
130
139
  # define YYSTYPE_IS_TRIVIAL 1
@@ -133,62 +142,58 @@ typedef union YYSTYPE {
133
142
 
134
143
 
135
144
  /* Copy the second part of user declarations. */
136
- #line 22 "src/query_parser/q_parser.y"
145
+ #line 31 "src/q_parser.y"
137
146
 
138
- int yylex(YYSTYPE *lvalp, QParser *qp);
139
- int yyerror(QParser *qp, char const *msg);
147
+ static int yylex(YYSTYPE *lvalp, QParser *qp);
148
+ static int yyerror(QParser *qp, char const *msg);
140
149
 
141
150
  #define PHRASE_INIT_CAPA 4
142
- Query *get_bool_q(Array *bclauses);
151
+ static Query *get_bool_q(BCArray *bca);
143
152
 
144
- Array *first_cls(BooleanClause *cls);
145
- Array *add_and_cls(Array *clauses, BooleanClause *cls);
146
- Array *add_or_cls(Array *clauses, BooleanClause *cls);
147
- Array *add_default_cls(QParser *qp, Array *clauses, BooleanClause *cls);
153
+ static BCArray *first_cls(BooleanClause *boolean_clause);
154
+ static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause);
155
+ static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause);
156
+ static BCArray *add_default_cls(QParser *qp, BCArray *bca, BooleanClause *clause);
148
157
 
149
- BooleanClause *get_bool_cls(Query *q, unsigned int occur);
158
+ static BooleanClause *get_bool_cls(Query *q, unsigned int occur);
150
159
 
151
- Query *get_term_q(QParser *qp, char *field, char *word);
152
- Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop);
153
- Query *get_wild_q(QParser *qp, char *field, char *pattern);
160
+ static Query *get_term_q(QParser *qp, char *field, char *word);
161
+ static Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop);
162
+ static Query *get_wild_q(QParser *qp, char *field, char *pattern);
154
163
 
155
- HashSet *first_field(QParser *qp, char *field);
156
- HashSet *add_field(QParser *qp, char *field);
164
+ static HashSet *first_field(QParser *qp, char *field);
165
+ static HashSet *add_field(QParser *qp, char *field);
157
166
 
158
- Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop);
167
+ static Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop);
159
168
 
160
- Phrase *ph_first_word(char *word);
161
- Phrase *ph_add_word(Phrase *self, char *word);
162
- Phrase *ph_add_multi_word(Phrase *self, char *word);
169
+ static Phrase *ph_first_word(char *word);
170
+ static Phrase *ph_add_word(Phrase *self, char *word);
171
+ static Phrase *ph_add_multi_word(Phrase *self, char *word);
163
172
 
164
- Query *get_range_q(char *field, char *from, char *to,
165
- bool inc_lower, bool inc_upper);
173
+ static Query *get_range_q(const char *field, const char *from, const char *to,
174
+ bool inc_lower, bool inc_upper);
166
175
 
167
176
  #define FLDS(q, func) do {\
168
- char *field;\
169
- if (qp->fields->size == 0) {\
170
- q = NULL;\
171
- } else if (qp->fields->size == 1) {\
172
- field = (char *)qp->fields->elems[0];\
173
- q = func;\
174
- } else {\
175
- int i;Query *sq;\
176
- q = bq_create(false);\
177
- for (i = 0; i < qp->fields->size; i++) {\
178
- field = (char *)qp->fields->elems[i];\
179
- sq = func;\
180
- if (sq) bq_add_query(q, sq, BC_SHOULD);\
181
- }\
182
- if (((BooleanQuery *)q->data)->clause_cnt == 0) {\
183
- q_deref(q);\
184
- q = NULL;\
177
+ char *field;\
178
+ if (qp->fields->size == 0) {\
179
+ q = NULL;\
180
+ } else if (qp->fields->size == 1) {\
181
+ field = (char *)qp->fields->elems[0];\
182
+ q = func;\
183
+ } else {\
184
+ int i;Query *sq;\
185
+ q = bq_new(false);\
186
+ for (i = 0; i < qp->fields->size; i++) {\
187
+ field = (char *)qp->fields->elems[i];\
188
+ sq = func;\
189
+ if (sq) bq_add_query_nr(q, sq, BC_SHOULD);\
190
+ }\
185
191
  }\
186
- }\
187
192
  } while (0)
188
193
 
189
194
 
190
195
  /* Line 219 of yacc.c. */
191
- #line 192 "y.tab.c"
196
+ #line 197 "y.tab.c"
192
197
 
193
198
  #if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__)
194
199
  # define YYSIZE_T __SIZE_TYPE__
@@ -427,12 +432,12 @@ static const yysigned_char yyrhs[] =
427
432
  /* YYRLINE[YYN] -- source line where rule number YYN was defined. */
428
433
  static const unsigned char yyrline[] =
429
434
  {
430
- 0, 90, 90, 91, 93, 94, 95, 96, 98, 99,
431
- 100, 102, 103, 105, 106, 107, 108, 109, 110, 112,
432
- 113, 114, 116, 118, 118, 120, 120, 120, 123, 124,
433
- 126, 127, 128, 129, 131, 132, 133, 134, 135, 137,
434
- 138, 139, 140, 141, 142, 143, 144, 145, 146, 147,
435
- 148
435
+ 0, 95, 95, 96, 98, 99, 100, 101, 103, 104,
436
+ 105, 107, 108, 110, 111, 112, 113, 114, 115, 117,
437
+ 118, 119, 121, 123, 123, 125, 125, 125, 128, 129,
438
+ 131, 132, 133, 134, 136, 137, 138, 139, 140, 142,
439
+ 143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
440
+ 153
436
441
  };
437
442
  #endif
438
443
 
@@ -1240,217 +1245,217 @@ yyreduce:
1240
1245
  switch (yyn)
1241
1246
  {
1242
1247
  case 2:
1243
- #line 90 "src/query_parser/q_parser.y"
1248
+ #line 95 "src/q_parser.y"
1244
1249
  { qp->result = (yyval.query) = NULL; }
1245
1250
  break;
1246
1251
 
1247
1252
  case 3:
1248
- #line 91 "src/query_parser/q_parser.y"
1249
- { qp->result = (yyval.query) = get_bool_q((yyvsp[0].array)); }
1253
+ #line 96 "src/q_parser.y"
1254
+ { qp->result = (yyval.query) = get_bool_q((yyvsp[0].bclss)); }
1250
1255
  break;
1251
1256
 
1252
1257
  case 4:
1253
- #line 93 "src/query_parser/q_parser.y"
1254
- { (yyval.array) = first_cls((yyvsp[0].bcls)); }
1258
+ #line 98 "src/q_parser.y"
1259
+ { (yyval.bclss) = first_cls((yyvsp[0].bcls)); }
1255
1260
  break;
1256
1261
 
1257
1262
  case 5:
1258
- #line 94 "src/query_parser/q_parser.y"
1259
- { (yyval.array) = add_and_cls((yyvsp[-2].array), (yyvsp[0].bcls)); }
1263
+ #line 99 "src/q_parser.y"
1264
+ { (yyval.bclss) = add_and_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
1260
1265
  break;
1261
1266
 
1262
1267
  case 6:
1263
- #line 95 "src/query_parser/q_parser.y"
1264
- { (yyval.array) = add_or_cls((yyvsp[-2].array), (yyvsp[0].bcls)); }
1268
+ #line 100 "src/q_parser.y"
1269
+ { (yyval.bclss) = add_or_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
1265
1270
  break;
1266
1271
 
1267
1272
  case 7:
1268
- #line 96 "src/query_parser/q_parser.y"
1269
- { (yyval.array) = add_default_cls(qp, (yyvsp[-1].array), (yyvsp[0].bcls)); }
1273
+ #line 101 "src/q_parser.y"
1274
+ { (yyval.bclss) = add_default_cls(qp, (yyvsp[-1].bclss), (yyvsp[0].bcls)); }
1270
1275
  break;
1271
1276
 
1272
1277
  case 8:
1273
- #line 98 "src/query_parser/q_parser.y"
1278
+ #line 103 "src/q_parser.y"
1274
1279
  { (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST); }
1275
1280
  break;
1276
1281
 
1277
1282
  case 9:
1278
- #line 99 "src/query_parser/q_parser.y"
1283
+ #line 104 "src/q_parser.y"
1279
1284
  { (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST_NOT); }
1280
1285
  break;
1281
1286
 
1282
1287
  case 10:
1283
- #line 100 "src/query_parser/q_parser.y"
1288
+ #line 105 "src/q_parser.y"
1284
1289
  { (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_SHOULD); }
1285
1290
  break;
1286
1291
 
1287
1292
  case 12:
1288
- #line 103 "src/query_parser/q_parser.y"
1293
+ #line 108 "src/q_parser.y"
1289
1294
  { if ((yyvsp[-2].query)) sscanf((yyvsp[0].str),"%f",&((yyvsp[-2].query)->boost)); (yyval.query)=(yyvsp[-2].query); }
1290
1295
  break;
1291
1296
 
1292
1297
  case 14:
1293
- #line 106 "src/query_parser/q_parser.y"
1294
- { (yyval.query) = get_bool_q((yyvsp[-1].array)); }
1298
+ #line 111 "src/q_parser.y"
1299
+ { (yyval.query) = get_bool_q((yyvsp[-1].bclss)); }
1295
1300
  break;
1296
1301
 
1297
1302
  case 19:
1298
- #line 112 "src/query_parser/q_parser.y"
1303
+ #line 117 "src/q_parser.y"
1299
1304
  { FLDS((yyval.query), get_term_q(qp, field, (yyvsp[0].str))); }
1300
1305
  break;
1301
1306
 
1302
1307
  case 20:
1303
- #line 113 "src/query_parser/q_parser.y"
1308
+ #line 118 "src/q_parser.y"
1304
1309
  { FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-2].str), (yyvsp[0].str))); }
1305
1310
  break;
1306
1311
 
1307
1312
  case 21:
1308
- #line 114 "src/query_parser/q_parser.y"
1313
+ #line 119 "src/q_parser.y"
1309
1314
  { FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-1].str), NULL)); }
1310
1315
  break;
1311
1316
 
1312
1317
  case 22:
1313
- #line 116 "src/query_parser/q_parser.y"
1318
+ #line 121 "src/q_parser.y"
1314
1319
  { FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[0].str))); }
1315
1320
  break;
1316
1321
 
1317
1322
  case 23:
1318
- #line 118 "src/query_parser/q_parser.y"
1323
+ #line 123 "src/q_parser.y"
1319
1324
  { qp->fields = qp->def_fields; }
1320
1325
  break;
1321
1326
 
1322
1327
  case 24:
1323
- #line 119 "src/query_parser/q_parser.y"
1328
+ #line 124 "src/q_parser.y"
1324
1329
  { (yyval.query) = (yyvsp[-1].query); }
1325
1330
  break;
1326
1331
 
1327
1332
  case 25:
1328
- #line 120 "src/query_parser/q_parser.y"
1333
+ #line 125 "src/q_parser.y"
1329
1334
  { qp->fields = qp->all_fields; }
1330
1335
  break;
1331
1336
 
1332
1337
  case 26:
1333
- #line 120 "src/query_parser/q_parser.y"
1338
+ #line 125 "src/q_parser.y"
1334
1339
  {qp->fields = qp->def_fields;}
1335
1340
  break;
1336
1341
 
1337
1342
  case 27:
1338
- #line 121 "src/query_parser/q_parser.y"
1343
+ #line 126 "src/q_parser.y"
1339
1344
  { (yyval.query) = (yyvsp[-1].query); }
1340
1345
  break;
1341
1346
 
1342
1347
  case 28:
1343
- #line 123 "src/query_parser/q_parser.y"
1348
+ #line 128 "src/q_parser.y"
1344
1349
  { (yyval.hashset) = first_field(qp, (yyvsp[0].str)); }
1345
1350
  break;
1346
1351
 
1347
1352
  case 29:
1348
- #line 124 "src/query_parser/q_parser.y"
1353
+ #line 129 "src/q_parser.y"
1349
1354
  { (yyval.hashset) = add_field(qp, (yyvsp[0].str));}
1350
1355
  break;
1351
1356
 
1352
1357
  case 30:
1353
- #line 126 "src/query_parser/q_parser.y"
1358
+ #line 131 "src/q_parser.y"
1354
1359
  { (yyval.query) = get_phrase_q(qp, (yyvsp[-1].phrase), NULL); }
1355
1360
  break;
1356
1361
 
1357
1362
  case 31:
1358
- #line 127 "src/query_parser/q_parser.y"
1363
+ #line 132 "src/q_parser.y"
1359
1364
  { (yyval.query) = get_phrase_q(qp, (yyvsp[-3].phrase), (yyvsp[0].str)); }
1360
1365
  break;
1361
1366
 
1362
1367
  case 32:
1363
- #line 128 "src/query_parser/q_parser.y"
1368
+ #line 133 "src/q_parser.y"
1364
1369
  { (yyval.query) = NULL; }
1365
1370
  break;
1366
1371
 
1367
1372
  case 33:
1368
- #line 129 "src/query_parser/q_parser.y"
1373
+ #line 134 "src/q_parser.y"
1369
1374
  { (yyval.query) = NULL; }
1370
1375
  break;
1371
1376
 
1372
1377
  case 34:
1373
- #line 131 "src/query_parser/q_parser.y"
1378
+ #line 136 "src/q_parser.y"
1374
1379
  { (yyval.phrase) = ph_first_word((yyvsp[0].str)); }
1375
1380
  break;
1376
1381
 
1377
1382
  case 35:
1378
- #line 132 "src/query_parser/q_parser.y"
1383
+ #line 137 "src/q_parser.y"
1379
1384
  { (yyval.phrase) = ph_first_word(NULL); }
1380
1385
  break;
1381
1386
 
1382
1387
  case 36:
1383
- #line 133 "src/query_parser/q_parser.y"
1388
+ #line 138 "src/q_parser.y"
1384
1389
  { (yyval.phrase) = ph_add_word((yyvsp[-1].phrase), (yyvsp[0].str)); }
1385
1390
  break;
1386
1391
 
1387
1392
  case 37:
1388
- #line 134 "src/query_parser/q_parser.y"
1393
+ #line 139 "src/q_parser.y"
1389
1394
  { (yyval.phrase) = ph_add_word((yyvsp[-2].phrase), NULL); }
1390
1395
  break;
1391
1396
 
1392
1397
  case 38:
1393
- #line 135 "src/query_parser/q_parser.y"
1398
+ #line 140 "src/q_parser.y"
1394
1399
  { (yyval.phrase) = ph_add_multi_word((yyvsp[-2].phrase), (yyvsp[0].str)); }
1395
1400
  break;
1396
1401
 
1397
1402
  case 39:
1398
- #line 137 "src/query_parser/q_parser.y"
1403
+ #line 142 "src/q_parser.y"
1399
1404
  { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, true)); }
1400
1405
  break;
1401
1406
 
1402
1407
  case 40:
1403
- #line 138 "src/query_parser/q_parser.y"
1408
+ #line 143 "src/q_parser.y"
1404
1409
  { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, false)); }
1405
1410
  break;
1406
1411
 
1407
1412
  case 41:
1408
- #line 139 "src/query_parser/q_parser.y"
1413
+ #line 144 "src/q_parser.y"
1409
1414
  { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, true)); }
1410
1415
  break;
1411
1416
 
1412
1417
  case 42:
1413
- #line 140 "src/query_parser/q_parser.y"
1418
+ #line 145 "src/q_parser.y"
1414
1419
  { FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, false)); }
1415
1420
  break;
1416
1421
 
1417
1422
  case 43:
1418
- #line 141 "src/query_parser/q_parser.y"
1423
+ #line 146 "src/q_parser.y"
1419
1424
  { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, false)); }
1420
1425
  break;
1421
1426
 
1422
1427
  case 44:
1423
- #line 142 "src/query_parser/q_parser.y"
1428
+ #line 147 "src/q_parser.y"
1424
1429
  { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, true)); }
1425
1430
  break;
1426
1431
 
1427
1432
  case 45:
1428
- #line 143 "src/query_parser/q_parser.y"
1433
+ #line 148 "src/q_parser.y"
1429
1434
  { FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,true, false)); }
1430
1435
  break;
1431
1436
 
1432
1437
  case 46:
1433
- #line 144 "src/query_parser/q_parser.y"
1438
+ #line 149 "src/q_parser.y"
1434
1439
  { FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,false, false)); }
1435
1440
  break;
1436
1441
 
1437
1442
  case 47:
1438
- #line 145 "src/query_parser/q_parser.y"
1443
+ #line 150 "src/q_parser.y"
1439
1444
  { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, false)); }
1440
1445
  break;
1441
1446
 
1442
1447
  case 48:
1443
- #line 146 "src/query_parser/q_parser.y"
1448
+ #line 151 "src/q_parser.y"
1444
1449
  { FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, true)); }
1445
1450
  break;
1446
1451
 
1447
1452
  case 49:
1448
- #line 147 "src/query_parser/q_parser.y"
1453
+ #line 152 "src/q_parser.y"
1449
1454
  { FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,true, false)); }
1450
1455
  break;
1451
1456
 
1452
1457
  case 50:
1453
- #line 148 "src/query_parser/q_parser.y"
1458
+ #line 153 "src/q_parser.y"
1454
1459
  { FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,false, false)); }
1455
1460
  break;
1456
1461
 
@@ -1459,7 +1464,7 @@ yyreduce:
1459
1464
  }
1460
1465
 
1461
1466
  /* Line 1126 of yacc.c. */
1462
- #line 1463 "y.tab.c"
1467
+ #line 1468 "y.tab.c"
1463
1468
 
1464
1469
  yyvsp -= yylen;
1465
1470
  yyssp -= yylen;
@@ -1727,613 +1732,682 @@ yyreturn:
1727
1732
  }
1728
1733
 
1729
1734
 
1730
- #line 150 "src/query_parser/q_parser.y"
1735
+ #line 155 "src/q_parser.y"
1731
1736
 
1732
1737
 
1733
1738
  const char *special_char = "&:()[]{}!+\"~^-|<>=*?";
1734
1739
  const char *not_word = " \t&:()[]{}!+\"~^-|<>=";
1735
1740
 
1736
- int get_word(YYSTYPE *lvalp, QParser *qp)
1741
+ static int get_word(YYSTYPE *lvalp, QParser *qp)
1737
1742
  {
1738
- bool is_wild = false;
1739
- int len;
1740
- char c;
1741
- char *buf = qp->buf[qp->buf_index];
1742
- char *bufp = buf;
1743
- qp->buf_index = (qp->buf_index + 1) % CONC_WORDS;
1744
-
1745
- qp->qstrp--; /* need to back up one character */
1746
-
1747
- while (!strchr(not_word, (c=*qp->qstrp++))) {
1748
- switch (c) {
1749
- case '\\':
1750
- if ((c=*qp->qstrp) == ' ' && c != '\t' && c != '\0') {
1751
- *bufp++ = '\\';
1752
- } else {
1753
- *bufp++ = c;
1754
- qp->qstrp++;
1743
+ bool is_wild = false;
1744
+ int len;
1745
+ char c;
1746
+ char *buf = qp->buf[qp->buf_index];
1747
+ char *bufp = buf;
1748
+ qp->buf_index = (qp->buf_index + 1) % QP_CONC_WORDS;
1749
+
1750
+ qp->qstrp--; /* need to back up one character */
1751
+
1752
+ while (!strchr(not_word, (c=*qp->qstrp++))) {
1753
+ switch (c) {
1754
+ case '\\':
1755
+ if ((c=*qp->qstrp) == ' ' && c != '\t' && c != '\0') {
1756
+ *bufp++ = '\\';
1757
+ }
1758
+ else {
1759
+ *bufp++ = c;
1760
+ qp->qstrp++;
1761
+ }
1762
+ break;
1763
+ case '*': case '?':
1764
+ is_wild = true;
1765
+ /* fall through */
1766
+ default:
1767
+ *bufp++ = c;
1755
1768
  }
1756
- break;
1757
- case '*': case '?':
1758
- is_wild = true;
1759
- default:
1760
- *bufp++ = c;
1761
1769
  }
1762
- }
1763
- qp->qstrp--;
1764
- /* check for keywords. There are only four so we have a bit of a hack which
1765
- * just checks for all of them. */
1766
- *bufp = '\0';
1767
- len = (int)(bufp - buf);
1768
- if (len == 3) {
1769
- if (buf[0] == 'A' && buf[1] == 'N' && buf[2] == 'D') return AND;
1770
- if (buf[0] == 'N' && buf[1] == 'O' && buf[2] == 'T') return NOT;
1771
- if (buf[0] == 'R' && buf[1] == 'E' && buf[2] == 'Q') return REQ;
1772
- }
1773
- if (len == 2 && buf[0] == 'O' && buf[1] == 'R') return OR;
1774
-
1775
- /* found a word so return it. */
1776
- lvalp->str = buf;
1777
- if (is_wild) return WILD_STR;
1778
- return WORD;
1770
+ qp->qstrp--;
1771
+ /* check for keywords. There are only four so we have a bit of a hack which
1772
+ * just checks for all of them. */
1773
+ *bufp = '\0';
1774
+ len = (int)(bufp - buf);
1775
+ if (len == 3) {
1776
+ if (buf[0] == 'A' && buf[1] == 'N' && buf[2] == 'D') return AND;
1777
+ if (buf[0] == 'N' && buf[1] == 'O' && buf[2] == 'T') return NOT;
1778
+ if (buf[0] == 'R' && buf[1] == 'E' && buf[2] == 'Q') return REQ;
1779
+ }
1780
+ if (len == 2 && buf[0] == 'O' && buf[1] == 'R') return OR;
1781
+
1782
+ /* found a word so return it. */
1783
+ lvalp->str = buf;
1784
+ if (is_wild) return WILD_STR;
1785
+ return WORD;
1779
1786
  }
1780
1787
 
1781
- int yylex(YYSTYPE *lvalp, QParser *qp)
1788
+ static int yylex(YYSTYPE *lvalp, QParser *qp)
1782
1789
  {
1783
- char c, nc;
1790
+ char c, nc;
1784
1791
 
1785
- while ((c=*qp->qstrp++) == ' ' || c == '\t')
1786
- ;
1787
- if (c == '\0')
1788
- return 0;
1792
+ while ((c=*qp->qstrp++) == ' ' || c == '\t') {
1793
+ }
1789
1794
 
1790
- if (strchr(special_char, c)) { /* comment */
1791
- nc = *qp->qstrp;
1792
- switch (c) {
1793
- case '-': case '!': return NOT;
1794
- case '+': return REQ;
1795
- case '*':
1796
- if (nc == ':') return c;
1797
- break;
1798
- case '&':
1799
- if (nc == '&') {
1800
- qp->qstrp++;
1801
- return AND;
1795
+ if (c == '\0') return 0;
1796
+
1797
+ if (strchr(special_char, c)) { /* comment */
1798
+ nc = *qp->qstrp;
1799
+ switch (c) {
1800
+ case '-': case '!': return NOT;
1801
+ case '+': return REQ;
1802
+ case '*':
1803
+ if (nc == ':') return c;
1804
+ break;
1805
+ case '&':
1806
+ if (nc == '&') {
1807
+ qp->qstrp++;
1808
+ return AND;
1809
+ }
1810
+ break; /* Don't return single & character. Use in word. */
1811
+ case '|':
1812
+ if (nc == '|') {
1813
+ qp->qstrp++;
1814
+ return OR;
1815
+ }
1816
+ default:
1817
+ return c;
1802
1818
  }
1803
- break; /* Don't return single & character. Use in word. */
1804
- case '|':
1805
- if (nc == '|') {
1806
- qp->qstrp++;
1807
- return OR;
1808
- }
1809
- default:
1810
- return c;
1811
1819
  }
1812
- }
1813
1820
 
1814
- return get_word(lvalp, qp);
1821
+ return get_word(lvalp, qp);
1815
1822
  }
1816
1823
 
1817
- int yyerror(QParser *qp, char const *msg)
1824
+ static int yyerror(QParser *qp, char const *msg)
1818
1825
  {
1819
- if (!qp->handle_parse_errors) {
1820
- if (qp->clean_str) free(qp->qstr);
1821
- RAISE(PARSE_ERROR, (char *)msg);
1822
- }
1823
- return 0;
1826
+ if (!qp->handle_parse_errors) {
1827
+ char buf[1024];
1828
+ buf[1023] = '\0';
1829
+ strncpy(buf, qp->qstr, 1023);
1830
+ if (qp->clean_str) {
1831
+ free(qp->qstr);
1832
+ }
1833
+ mutex_unlock(&qp->mutex);
1834
+ RAISE(PARSE_ERROR, "couldn't parse query ``%s''. Error message "
1835
+ " was %se", buf, (char *)msg);
1836
+ }
1837
+ return 0;
1824
1838
  }
1825
1839
 
1840
+ #define BQ(query) ((BooleanQuery *)(query))
1826
1841
 
1827
- Query *get_bool_q(Array *bclauses)
1842
+ static TokenStream *get_cached_ts(QParser *qp, char *field, char *text)
1828
1843
  {
1829
- Query *q;
1830
- BooleanQuery *bq;
1831
- BooleanClause *bc;
1832
-
1833
- if (bclauses->size == 0) {
1834
- ary_destroy(bclauses);
1835
- q = NULL;
1836
- } else if (bclauses->size == 1) {
1837
- bc = (BooleanClause *)bclauses->elems[0];
1838
- q = bc->query;
1839
- free(bc);
1840
- ary_destroy(bclauses);
1841
- } else {
1842
- q = bq_create(false);
1843
- /* copy clauses into query */
1844
- bq = (BooleanQuery *)q->data;
1845
- bq->clause_cnt = bclauses->size;
1846
- bq->clause_capa = bclauses->allocated;
1847
- free(bq->clauses);
1848
- bq->clauses = (BooleanClause **)bclauses->elems;
1849
- free(bclauses);
1850
- }
1851
- return q;
1844
+ TokenStream *ts = h_get(qp->ts_cache, field);
1845
+ if (!ts) {
1846
+ ts = a_get_ts(qp->analyzer, field, text);
1847
+ h_set(qp->ts_cache, estrdup(field), ts);
1848
+ }
1849
+ else {
1850
+ ts->reset(ts, text);
1851
+ }
1852
+ return ts;
1852
1853
  }
1853
1854
 
1854
-
1855
- Array *first_cls(BooleanClause *cls)
1855
+ static char *get_cached_field(HashTable *field_cache, const char *field)
1856
1856
  {
1857
- Array *clauses = ary_create(0, NULL);
1858
- if (cls) ary_append(clauses, cls);
1859
- return clauses;
1857
+ char *cached_field = h_get(field_cache, field);
1858
+ if (!cached_field) {
1859
+ cached_field = estrdup(field);
1860
+ h_set(field_cache, cached_field, cached_field);
1861
+ }
1862
+ return cached_field;
1860
1863
  }
1861
1864
 
1862
- Array *add_and_cls(Array *clauses, BooleanClause *clause)
1865
+ static Query *get_bool_q(BCArray *bca)
1863
1866
  {
1864
- if (clause) {
1865
- BooleanClause *last_cl;
1866
- if (clauses->size == 1) {
1867
- last_cl = clauses->elems[0];
1868
- if (!last_cl->is_prohibited) bc_set_occur(last_cl, BC_MUST);
1869
- }
1867
+ Query *q;
1868
+ const int clause_count = bca->size;
1870
1869
 
1871
- if (!clause->is_prohibited) bc_set_occur(clause, BC_MUST);
1872
- ary_append(clauses, clause);
1873
- }
1874
- return clauses;
1870
+ if (clause_count == 0) {
1871
+ q = NULL;
1872
+ free(bca->clauses);
1873
+ }
1874
+ else if (clause_count == 1) {
1875
+ BooleanClause *bc = bca->clauses[0];
1876
+ q = bc->query;
1877
+ free(bc);
1878
+ free(bca->clauses);
1879
+ }
1880
+ else {
1881
+ q = bq_new(false);
1882
+ /* copy clauses into query */
1883
+
1884
+ BQ(q)->clause_cnt = clause_count;
1885
+ BQ(q)->clause_capa = bca->capa;
1886
+ free(BQ(q)->clauses);
1887
+ BQ(q)->clauses = bca->clauses;
1888
+ }
1889
+ free(bca);
1890
+ return q;
1875
1891
  }
1876
1892
 
1877
- Array *add_or_cls(Array *clauses, BooleanClause *clause)
1893
+ static void bca_add_clause(BCArray *bca, BooleanClause *clause)
1878
1894
  {
1879
- if (clause) ary_append(clauses, clause);
1880
- return clauses;
1895
+ if (bca->size >= bca->capa) {
1896
+ bca->capa <<= 1;
1897
+ REALLOC_N(bca->clauses, BooleanClause *, bca->capa);
1898
+ }
1899
+ bca->clauses[bca->size] = clause;
1900
+ bca->size++;
1881
1901
  }
1882
1902
 
1883
- Array *add_default_cls(QParser *qp, Array *clauses, BooleanClause *clause)
1903
+ static BCArray *first_cls(BooleanClause *clause)
1884
1904
  {
1885
- if (qp->or_default) {
1886
- add_or_cls(clauses, clause);
1887
- } else {
1888
- add_and_cls(clauses, clause);
1889
- }
1890
- return clauses;
1905
+ BCArray *bca = ALLOC_AND_ZERO(BCArray);
1906
+ bca->capa = BCA_INIT_CAPA;
1907
+ bca->clauses = ALLOC_N(BooleanClause *, BCA_INIT_CAPA);
1908
+ if (clause) {
1909
+ bca_add_clause(bca, clause);
1910
+ }
1911
+ return bca;
1891
1912
  }
1892
1913
 
1893
- BooleanClause *get_bool_cls(Query *q, unsigned int occur)
1914
+ static BCArray *add_and_cls(BCArray *bca, BooleanClause *clause)
1894
1915
  {
1895
- if (q) return bc_create(q, occur);
1896
- else return NULL;
1916
+ if (clause) {
1917
+ if (bca->size == 1) {
1918
+ if (!bca->clauses[0]->is_prohibited) {
1919
+ bc_set_occur(bca->clauses[0], BC_MUST);
1920
+ }
1921
+ }
1922
+ if (!clause->is_prohibited) {
1923
+ bc_set_occur(clause, BC_MUST);
1924
+ }
1925
+ bca_add_clause(bca, clause);
1926
+ }
1927
+ return bca;
1897
1928
  }
1898
1929
 
1899
- Query *get_term_q(QParser *qp, char *field, char *word)
1930
+ static BCArray *add_or_cls(BCArray *bca, BooleanClause *clause)
1900
1931
  {
1901
- Query *q;
1902
- Token *token;
1903
- TokenStream *stream = a_get_ts(qp->analyzer, field, word);
1904
-
1905
- if ((token = ts_next(stream)) == NULL) {
1906
- q = NULL;
1907
- } else {
1908
- Term *term = term_create(field, token->text);
1909
- if ((token = ts_next(stream)) == NULL) {
1910
- q = tq_create(term);
1911
- } else {
1912
- q = phq_create();
1913
- phq_add_term(q, term, 0);
1914
- do {
1915
- phq_add_term(q, term_create(field, token->text), token->pos_inc);
1916
- } while ((token = ts_next(stream)) != NULL);
1932
+ if (clause) {
1933
+ bca_add_clause(bca, clause);
1917
1934
  }
1918
- }
1919
- return q;
1935
+ return bca;
1920
1936
  }
1921
1937
 
1922
- Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
1938
+ static BCArray *add_default_cls(QParser *qp, BCArray *bca,
1939
+ BooleanClause *clause)
1923
1940
  {
1924
- Query *q;
1925
- Token *token;
1926
- TokenStream *stream = a_get_ts(qp->analyzer, field, word);
1927
-
1928
- if ((token = ts_next(stream)) == NULL) {
1929
- q = NULL;
1930
- } else {
1931
- /* it only makes sense to find one term in a fuzzy query */
1932
- Term *term = term_create(field, token->text);
1933
- if (slop_str) {
1934
- float slop;
1935
- sscanf(slop_str, "%f", &slop);
1936
- q = fuzq_create_mp(term, slop, DEF_PRE_LEN);
1937
- } else {
1938
- q = fuzq_create(term);
1941
+ if (qp->or_default) {
1942
+ add_or_cls(bca, clause);
1943
+ }
1944
+ else {
1945
+ add_and_cls(bca, clause);
1939
1946
  }
1940
- }
1941
- return q;
1947
+ return bca;
1942
1948
  }
1943
1949
 
1944
- Query *get_wild_q(QParser *qp, char *field, char *pattern)
1950
+ static BooleanClause *get_bool_cls(Query *q, unsigned int occur)
1945
1951
  {
1946
- Query *q;
1947
- bool is_prefix = false;
1948
- char *p;
1949
- int len = (int)strlen(pattern);
1950
-
1951
- if (qp->wild_lower) lower_str(pattern);
1952
-
1953
- /* simplify the wildcard query to a prefix query if possible. Basically a
1954
- * prefix query is any wildcard query that has a '*' as the last character
1955
- * and no other wildcard characters before it. */
1956
- if (pattern[len-1] == '*') {
1957
- is_prefix = true;
1958
- for (p = &pattern[len-2]; p >= pattern; p--) {
1959
- if (*p == '*' || *p == '?') {
1960
- is_prefix = false;
1961
- break;
1962
- }
1952
+ if (q) {
1953
+ return bc_new(q, occur);
1954
+ }
1955
+ else {
1956
+ return NULL;
1963
1957
  }
1964
- }
1965
-
1966
- if (is_prefix) {
1967
- /* chop off the '*' temporarily to create the query */
1968
- pattern[len-1] = 0;
1969
- q = prefixq_create(term_create(field, pattern));;
1970
- pattern[len-1] = '*';
1971
- } else {
1972
- q = wcq_create(term_create(field, pattern));;
1973
- }
1974
- return q;
1975
1958
  }
1976
1959
 
1977
- HashSet *add_field(QParser *qp, char *field)
1960
+ static Query *get_term_q(QParser *qp, char *field, char *word)
1978
1961
  {
1979
- char *orig_field;
1980
- if ((orig_field = hs_orig(qp->all_fields, field)) != NULL) {
1981
- hs_add(qp->fields, orig_field);
1982
- } else if (qp->allow_any_fields) {
1983
- field = estrdup(field);
1984
- hs_add(qp->all_fields, field);
1985
- hs_add(qp->fields, field);
1986
- }
1987
- return qp->fields;
1962
+ Query *q;
1963
+ Token *token;
1964
+ TokenStream *stream = get_cached_ts(qp, field, word);
1965
+
1966
+ if ((token = ts_next(stream)) == NULL) {
1967
+ q = NULL;
1968
+ }
1969
+ else {
1970
+ q = tq_new(field, token->text);
1971
+ if ((token = ts_next(stream)) != NULL) {
1972
+ /* Less likely case, destroy the term query and create a
1973
+ * phrase query instead */
1974
+ Query *phq = phq_new(field);
1975
+ phq_add_term(phq, ((TermQuery *)q)->term, 0);
1976
+ q->destroy_i(q);
1977
+ q = phq;
1978
+ do {
1979
+ phq_add_term(q, token->text, token->pos_inc);
1980
+ } while ((token = ts_next(stream)) != NULL);
1981
+ }
1982
+ }
1983
+ return q;
1988
1984
  }
1989
1985
 
1990
- HashSet *first_field(QParser *qp, char *field)
1986
+ static Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
1991
1987
  {
1992
- qp->fields = qp->fields_buf;
1993
- qp->fields->size = 0;
1994
- h_clear(qp->fields->ht);
1995
- return add_field(qp, field);
1988
+ Query *q;
1989
+ Token *token;
1990
+ TokenStream *stream = get_cached_ts(qp, field, word);
1991
+
1992
+ if ((token = ts_next(stream)) == NULL) {
1993
+ q = NULL;
1994
+ }
1995
+ else {
1996
+ /* it only makes sense to find one term in a fuzzy query */
1997
+ float slop = DEF_MIN_SIM;
1998
+ if (slop_str) {
1999
+ sscanf(slop_str, "%f", &slop);
2000
+ }
2001
+ q = fuzq_new_conf(field, token->text, slop, DEF_PRE_LEN,
2002
+ qp->max_clauses);
2003
+ }
2004
+ return q;
1996
2005
  }
1997
2006
 
1998
- void ph_destroy(Phrase *self)
2007
+ static char *lower_str(char *str)
1999
2008
  {
2000
- int i, j;
2001
- for (i = 0; i < self->cnt; i++) {
2002
- for (j = 0; j < self->w_cnt[i]; j++) {
2003
- free(self->words[i][j]);
2009
+ const int max_len = (int)strlen(str) + 1;
2010
+ int cnt;
2011
+ wchar_t *wstr = ALLOC_N(wchar_t, max_len);
2012
+ if ((cnt = mbstowcs(wstr, str, max_len)) > 0) {
2013
+ wchar_t *w = wstr;
2014
+ while (*w) {
2015
+ *w = towlower(*w);
2016
+ w++;
2017
+ }
2018
+ wcstombs(str, wstr, max_len);
2004
2019
  }
2005
- free(self->words[i]);
2006
- }
2007
- free(self->words);
2008
- free(self->w_cnt);
2009
- free(self->w_capa);
2010
- free(self);
2020
+ else {
2021
+ char *s = str;
2022
+ while (*s) {
2023
+ *s = tolower(*s);
2024
+ s++;
2025
+ }
2026
+ }
2027
+ free(wstr);
2028
+ str[max_len] = '\0';
2029
+ return str;
2011
2030
  }
2012
2031
 
2013
-
2014
- Phrase *ph_create()
2032
+ static Query *get_wild_q(QParser *qp, char *field, char *pattern)
2015
2033
  {
2016
- Phrase *self = ALLOC(Phrase);
2017
- self->cnt = 0;
2018
- self->capa = PHRASE_INIT_CAPA;
2019
- self->words = ALLOC_N(char **, PHRASE_INIT_CAPA);
2020
- self->w_cnt = ALLOC_N(int, PHRASE_INIT_CAPA);
2021
- self->w_capa = ALLOC_N(int, PHRASE_INIT_CAPA);
2022
- return self;
2034
+ Query *q;
2035
+ bool is_prefix = false;
2036
+ char *p;
2037
+ int len = (int)strlen(pattern);
2038
+
2039
+ if (qp->wild_lower) {
2040
+ lower_str(pattern);
2041
+ }
2042
+
2043
+ /* simplify the wildcard query to a prefix query if possible. Basically a
2044
+ * prefix query is any wildcard query that has a '*' as the last character
2045
+ * and no other wildcard characters before it. */
2046
+ if (pattern[len - 1] == '*') {
2047
+ is_prefix = true;
2048
+ for (p = &pattern[len - 2]; p >= pattern; p--) {
2049
+ if (*p == '*' || *p == '?') {
2050
+ is_prefix = false;
2051
+ break;
2052
+ }
2053
+ }
2054
+ }
2055
+ if (is_prefix) {
2056
+ /* chop off the '*' temporarily to create the query */
2057
+ pattern[len - 1] = 0;
2058
+ q = prefixq_new(field, pattern);
2059
+ pattern[len - 1] = '*';
2060
+ }
2061
+ else {
2062
+ q = wcq_new(field, pattern);
2063
+ }
2064
+ MTQMaxTerms(q) = qp->max_clauses;
2065
+ return q;
2023
2066
  }
2024
2067
 
2025
- Phrase *ph_first_word(char *word)
2068
+ static HashSet *add_field(QParser *qp, char *field)
2026
2069
  {
2027
- Phrase *self = ph_create();
2028
- if (word) { /* no point in adding NULL in start */
2029
- self->words[0] = ALLOC(char *);
2030
- self->words[0][0] = estrdup(word);
2031
- self->w_cnt[0] = self->w_capa[0] = 1;
2032
- self->cnt = 1;
2033
- }
2034
- return self;
2070
+ if (qp->allow_any_fields || hs_exists(qp->all_fields, field)) {
2071
+ hs_add(qp->fields, get_cached_field(qp->field_cache, field));
2072
+ }
2073
+ return qp->fields;
2035
2074
  }
2036
2075
 
2037
- Phrase *ph_add_word(Phrase *self, char *word)
2076
+ static HashSet *first_field(QParser *qp, char *field)
2038
2077
  {
2039
- int i;
2040
- if (self->cnt == self->capa) {
2041
- self->capa <<= 1;
2042
- REALLOC_N(self->words, char **, self->capa);
2043
- REALLOC_N(self->w_cnt, int, self->capa);
2044
- REALLOC_N(self->w_capa, int, self->capa);
2045
- }
2046
- i = self->cnt;
2047
- self->cnt++;
2048
- self->words[i] = ALLOC(char *);
2049
- self->words[i][0] = word ? estrdup(word) : NULL;
2050
- self->w_cnt[i] = self->w_capa[i] = 1;
2051
- return self;
2078
+ qp->fields = qp->fields_buf;
2079
+ qp->fields->size = 0;
2080
+ h_clear(qp->fields->ht);
2081
+ return add_field(qp, field);
2052
2082
  }
2053
2083
 
2054
- Phrase *ph_add_multi_word(Phrase *self, char *word)
2084
+ static void ph_destroy(Phrase *self)
2055
2085
  {
2056
- int i = self->cnt - 1;
2086
+ int i;
2087
+ for (i = 0; i < self->size; i++) {
2088
+ ary_destroy(self->positions[i].terms, &free);
2089
+ }
2090
+ free(self->positions);
2091
+ free(self);
2092
+ }
2057
2093
 
2058
- if (!word) return self; /* no point in adding NULL in multi */
2059
2094
 
2060
- if (self->w_cnt[i] >= self->w_capa[i]) {
2061
- self->w_capa[i] <<= 1;
2062
- REALLOC_N(self->words[i], char *, self->w_capa[i]);
2063
- }
2064
- self->words[i][self->w_cnt[i]] = estrdup(word);
2065
- self->w_cnt[i]++;
2095
+ static Phrase *ph_new()
2096
+ {
2097
+ Phrase *self = ALLOC_AND_ZERO(Phrase);
2098
+ self->capa = PHRASE_INIT_CAPA;
2099
+ self->positions = ALLOC_AND_ZERO_N(PhrasePosition, PHRASE_INIT_CAPA);
2066
2100
  return self;
2067
2101
  }
2068
2102
 
2069
- Query *get_normal_phrase_query(QParser *qp, char *field, Phrase *phrase, int slop)
2103
+ static Phrase *ph_first_word(char *word)
2070
2104
  {
2071
- int pos_inc = 0;
2072
- int i;
2073
- Token *token;
2074
- TokenStream *stream;
2075
- char *word;
2076
-
2077
- Query *pq = phq_create();
2078
- ((PhraseQuery *)pq->data)->slop = slop;
2079
-
2080
- for (i = 0; i < phrase->cnt; i++) {
2081
- word = phrase->words[i][0];
2082
- if (!word) {
2083
- pos_inc++;
2084
- } else {
2085
- stream = a_get_ts(qp->analyzer, field, word);
2086
- while ((token = ts_next(stream))) {
2087
- phq_add_term(pq, term_create(field, token->text),
2088
- token->pos_inc + pos_inc);
2089
- pos_inc = 0;
2090
- }
2105
+ Phrase *self = ph_new();
2106
+ if (word) { /* no point in adding NULL in start */
2107
+ self->positions[0].terms = ary_new_type_capa(char *, 1);
2108
+ ary_push(self->positions[0].terms, estrdup(word));
2109
+ self->size = 1;
2091
2110
  }
2092
- }
2093
- return pq;
2111
+ return self;
2094
2112
  }
2095
2113
 
2096
- Query *get_multi_phrase_query(QParser *qp, char *field, Phrase *phrase, int slop)
2114
+ static Phrase *ph_add_word(Phrase *self, char *word)
2097
2115
  {
2098
- int i, j;
2099
- int pos_inc = 0;
2100
- Token *token;
2101
- TokenStream *stream;
2102
- char *word;
2103
- Term **terms = NULL;
2104
- int t_cnt;
2105
-
2106
- Query *mpq = mphq_create();
2107
- ((MultiPhraseQuery *)mpq->data)->slop = slop;
2108
-
2109
- for (i = 0; i < phrase->cnt; i++) {
2110
- word = phrase->words[i][0];
2111
- if (!word) {
2112
- pos_inc++;
2113
- } else {
2114
- t_cnt = phrase->w_cnt[i];
2115
- if (t_cnt > 1) {
2116
- terms = ALLOC_N(Term *, t_cnt);
2117
- for (j = 0; j < t_cnt; j++) {
2118
- word = phrase->words[i][j];
2119
- stream = a_get_ts(qp->analyzer, field, word);
2120
- if ((token = ts_next(stream))) {
2121
- terms[j] = term_create(field, token->text);
2122
- } else {
2123
- t_cnt--; j--;
2124
- }
2125
- }
2126
- /* must advance at least one */
2127
- mphq_add_terms(mpq, terms, t_cnt, pos_inc + 1);
2128
- } else {
2129
- stream = a_get_ts(qp->analyzer, field, word);
2130
- while ((token = ts_next(stream))) {
2131
- terms = ALLOC(Term *);
2132
- terms[0] = term_create(field, token->text);
2133
- mphq_add_terms(mpq, terms, 1, token->pos_inc + pos_inc);
2134
- pos_inc = 0;
2116
+ if (word) {
2117
+ const int index = self->size;
2118
+ PhrasePosition *pp = self->positions;
2119
+ if (index >= self->capa) {
2120
+ self->capa <<= 1;
2121
+ REALLOC_N(pp, PhrasePosition, self->capa);
2122
+ self->positions = pp;
2135
2123
  }
2136
- }
2124
+ pp[index].pos = self->pos_inc;
2125
+ pp[index].terms = ary_new_type_capa(char *, 1);
2126
+ ary_push(pp[index].terms, estrdup(word));
2127
+ self->size++;
2128
+ self->pos_inc = 0;
2129
+ }
2130
+ else {
2131
+ self->pos_inc++;
2137
2132
  }
2138
- }
2139
- return mpq;
2133
+ return self;
2140
2134
  }
2141
2135
 
2142
- Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop_str)
2136
+ static Phrase *ph_add_multi_word(Phrase *self, char *word)
2143
2137
  {
2144
- Query *q;
2145
- int i, j;
2146
- int slop;
2147
-
2148
- if (phrase->cnt == 0) {
2149
- q = NULL;
2150
- } else if (phrase->cnt == 1) {
2151
- if (phrase->w_cnt[0] == 1) {
2152
- FLDS(q, get_term_q(qp, field, phrase->words[0][0]));
2153
- } else {
2154
- Query *bq;
2155
- q = bq_create(false);
2156
- for (j = 0; j < phrase->w_cnt[0]; j++) {
2157
- FLDS(bq, tq_create(term_create(field, phrase->words[0][j])));
2158
- if (bq) bq_add_query(q, bq, BC_SHOULD);
2159
- }
2138
+ const int index = self->size - 1;
2139
+ PhrasePosition *pp = self->positions;
2140
+
2141
+ if (word) {
2142
+ ary_push(pp[index].terms, estrdup(word));
2160
2143
  }
2161
- } else {
2162
- bool multi_phrase = false;
2163
- for (i = 0; i < phrase->cnt; i++) {
2164
- if (phrase->w_cnt[i] > 1) multi_phrase = true;
2144
+ return self;
2145
+ }
2146
+
2147
+ static Query *get_phrase_query(QParser *qp, char *field,
2148
+ Phrase *phrase, char *slop_str)
2149
+ {
2150
+ const int pos_cnt = phrase->size;
2151
+ Query *q = NULL;
2152
+
2153
+ if (pos_cnt == 1) {
2154
+ char **words = phrase->positions[0].terms;
2155
+ const int word_count = ary_size(words);
2156
+ if (word_count == 1) {
2157
+ q = get_term_q(qp, field, words[0]);
2158
+ }
2159
+ else {
2160
+ int i;
2161
+ q = bq_new(false);
2162
+ for (i = 0; i < word_count; i++) {
2163
+ bq_add_query_nr(q, get_term_q(qp, field, words[i]), BC_SHOULD);
2164
+ }
2165
+ }
2165
2166
  }
2166
- slop = qp->def_slop;
2167
- if (slop_str) sscanf(slop_str, "%d", &slop);
2168
- if (multi_phrase) {
2169
- FLDS(q, get_multi_phrase_query(qp, field, phrase, slop));
2170
- } else {
2171
- FLDS(q, get_normal_phrase_query(qp, field, phrase, slop));
2167
+ else if (pos_cnt > 1) {
2168
+ Token *token;
2169
+ TokenStream *stream;
2170
+ int i, j;
2171
+ q = phq_new(field);
2172
+ if (slop_str) {
2173
+ int slop;
2174
+ sscanf(slop_str,"%d",&slop);
2175
+ ((PhraseQuery *)q)->slop = slop;
2176
+ }
2177
+
2178
+ for (i = 0; i < pos_cnt; i++) {
2179
+ int pos_inc = phrase->positions[i].pos; /* Actually holds pos_inc */
2180
+ char **words = phrase->positions[i].terms;
2181
+ const int word_count = ary_size(words);
2182
+
2183
+ if (word_count == 1) {
2184
+ stream = get_cached_ts(qp, field, words[0]);
2185
+ while ((token = ts_next(stream))) {
2186
+ phq_add_term(q, token->text, token->pos_inc + pos_inc);
2187
+ pos_inc = 0;
2188
+ }
2189
+ }
2190
+ else {
2191
+ bool added_position = false;
2192
+
2193
+ for (j = 0; j < word_count; j++) {
2194
+ stream = get_cached_ts(qp, field, words[j]);
2195
+ if ((token = ts_next(stream))) {
2196
+ if (!added_position) {
2197
+ phq_add_term(q, token->text, token->pos_inc + pos_inc);
2198
+ added_position = true;
2199
+ }
2200
+ else {
2201
+ phq_append_multi_term(q, token->text);
2202
+ }
2203
+ }
2204
+ }
2205
+ }
2206
+ }
2172
2207
  }
2173
- }
2174
- ph_destroy(phrase);
2175
- return q;
2208
+ return q;
2209
+ }
2210
+
2211
+ static Query *get_phrase_q(QParser *qp, Phrase *phrase, char *slop_str)
2212
+ {
2213
+ Query *q;
2214
+ FLDS(q, get_phrase_query(qp, field, phrase, slop_str));
2215
+ ph_destroy(phrase);
2216
+ return q;
2176
2217
  }
2177
2218
 
2178
- Query *get_range_q(char *field, char *from, char *to, bool inc_lower, bool inc_upper)
2219
+ static Query *get_range_q(const char *field, const char *from, const char *to,
2220
+ bool inc_lower, bool inc_upper)
2179
2221
  {
2180
- return rq_create(field, from, to, inc_lower, inc_upper);
2222
+ return rq_new(field, from, to, inc_lower, inc_upper);
2181
2223
  }
2182
2224
 
2183
2225
  void qp_destroy(QParser *self)
2184
2226
  {
2185
- if (self->close_def_fields) hs_destroy_all(self->def_fields);
2186
- hs_destroy_all(self->all_fields);
2187
- hs_destroy(self->fields_buf);
2188
- a_deref(self->analyzer);
2189
- free(self);
2227
+ if (self->close_def_fields) {
2228
+ hs_destroy(self->def_fields);
2229
+ }
2230
+ hs_destroy(self->all_fields);
2231
+ hs_destroy(self->fields_buf);
2232
+ h_destroy(self->field_cache);
2233
+ h_destroy(self->ts_cache);
2234
+ a_deref(self->analyzer);
2235
+ free(self);
2190
2236
  }
2191
2237
 
2192
- QParser *qp_create(HashSet *all_fields, HashSet *def_fields, Analyzer *analyzer)
2238
+ QParser *qp_new(HashSet *all_fields, HashSet *def_fields, Analyzer *analyzer)
2193
2239
  {
2194
- int i;
2195
- QParser *self = ALLOC(QParser);
2196
- self->or_default = true;
2197
- self->wild_lower = true;
2198
- self->clean_str = false;
2199
- self->handle_parse_errors = false;
2200
- self->allow_any_fields = false;
2201
- self->def_slop = 0;
2202
- self->fields_buf = hs_str_create(NULL);
2203
- self->all_fields = all_fields;
2204
- if (def_fields) {
2205
- self->def_fields = def_fields;
2206
- for (i = 0; i < self->def_fields->size; i++) {
2207
- if (!hs_exists(self->all_fields, self->def_fields->elems[i])) {
2208
- hs_add(self->all_fields, estrdup(self->def_fields->elems[i]));
2209
- }
2240
+ int i;
2241
+ QParser *self = ALLOC(QParser);
2242
+ self->or_default = true;
2243
+ self->wild_lower = true;
2244
+ self->clean_str = false;
2245
+ self->max_clauses = QP_MAX_CLAUSES;
2246
+ self->handle_parse_errors = false;
2247
+ self->allow_any_fields = false;
2248
+ self->def_slop = 0;
2249
+ self->fields_buf = hs_new_str(NULL);
2250
+ self->all_fields = all_fields;
2251
+ if (def_fields) {
2252
+ self->def_fields = def_fields;
2253
+ for (i = 0; i < self->def_fields->size; i++) {
2254
+ if (!hs_exists(self->all_fields, self->def_fields->elems[i])) {
2255
+ hs_add(self->all_fields, estrdup(self->def_fields->elems[i]));
2256
+ }
2257
+ }
2258
+ self->close_def_fields = true;
2210
2259
  }
2211
- self->close_def_fields = true;
2212
- } else {
2213
- self->def_fields = all_fields;
2214
- self->close_def_fields = false;
2215
- }
2216
- self->fields = self->def_fields;
2217
- /* make sure all_fields contains the default fields */
2218
- self->analyzer = analyzer;
2219
- self->buf_index = 0;
2220
- return self;
2260
+ else {
2261
+ self->def_fields = all_fields;
2262
+ self->close_def_fields = false;
2263
+ }
2264
+ self->field_cache = h_new_str((free_ft)NULL, &free);
2265
+ for (i = 0; i < self->all_fields->size; i++) {
2266
+ char *field = estrdup(self->all_fields->elems[i]);
2267
+ h_set(self->field_cache, field, field);
2268
+ }
2269
+ self->fields = self->def_fields;
2270
+ /* make sure all_fields contains the default fields */
2271
+ self->analyzer = analyzer;
2272
+ self->ts_cache = h_new_str(&free, (free_ft)&ts_deref);
2273
+ self->buf_index = 0;
2274
+ mutex_init(&self->mutex, NULL);
2275
+ return self;
2221
2276
  }
2222
2277
 
2223
2278
  /* these chars have meaning within phrases */
2224
2279
  static const char *PHRASE_CHARS = "<>|\"";
2225
2280
 
2226
- void str_insert(char *str, int len, char chr)
2281
+ static void str_insert(char *str, int len, char chr)
2227
2282
  {
2228
- memmove(str+1, str, len*sizeof(char));
2229
- *str = chr;
2283
+ memmove(str+1, str, len*sizeof(char));
2284
+ *str = chr;
2230
2285
  }
2231
2286
 
2232
2287
  char *qp_clean_str(char *str)
2233
2288
  {
2234
- int b, pb = -1;
2235
- int br_cnt = 0;
2236
- bool quote_open = false;
2237
- char *sp, *nsp;
2238
-
2239
- /* leave a little extra */
2240
- char *new_str = ALLOC_N(char, strlen(str)*2 + 1);
2241
-
2242
- for (sp = str, nsp = new_str; *sp; sp++) {
2243
- b = *sp;
2244
- /* ignore escaped characters */
2245
- if (pb == '\\') {
2246
- if (quote_open && strrchr(PHRASE_CHARS, b)) {
2247
- *nsp++ = '\\'; /* this was left off the first time through */
2248
- }
2249
-
2250
- *nsp++ = b;
2251
- /* \\ has escaped itself so has no power. Assign pb random char : */
2252
- pb = ((b == '\\') ? ':' : b);
2253
- continue;
2254
- }
2255
- switch (b) {
2256
- case '\\':
2257
- if (!quote_open) /* We do our own escaping below */
2258
- *nsp++ = b;
2259
- break;
2260
- case '"':
2261
- quote_open = !quote_open;
2262
- *nsp++ = b;
2263
- break;
2264
- case '(':
2265
- if (!quote_open) {
2266
- br_cnt++;
2267
- } else {
2268
- *nsp++ = '\\';
2269
- }
2270
- *nsp++ = b;
2271
- break;
2272
- case ')':
2273
- if (!quote_open) {
2274
- if (br_cnt == 0) {
2275
- str_insert(new_str, (int)(nsp - new_str), '(');
2276
- nsp++;
2277
- } else {
2278
- br_cnt--;
2279
- }
2280
- } else {
2281
- *nsp++ = '\\';
2282
- }
2283
- *nsp++ = b;
2284
- break;
2285
- case '>':
2286
- if (quote_open) {
2287
- if (pb == '<') {
2288
- /* remove the escape character */
2289
- nsp--;
2290
- nsp[-1] = '<';
2291
- } else {
2292
- *nsp++ = '\\';
2293
- }
2289
+ int b, pb = -1;
2290
+ int br_cnt = 0;
2291
+ bool quote_open = false;
2292
+ char *sp, *nsp;
2293
+
2294
+ /* leave a little extra */
2295
+ char *new_str = ALLOC_N(char, strlen(str)*2 + 1);
2296
+
2297
+ for (sp = str, nsp = new_str; *sp; sp++) {
2298
+ b = *sp;
2299
+ /* ignore escaped characters */
2300
+ if (pb == '\\') {
2301
+ if (quote_open && strrchr(PHRASE_CHARS, b)) {
2302
+ *nsp++ = '\\'; /* this was left off the first time through */
2303
+ }
2304
+ *nsp++ = b;
2305
+ /* \\ has escaped itself so has no power. Assign pb random char : */
2306
+ pb = ((b == '\\') ? ':' : b);
2307
+ continue;
2294
2308
  }
2295
- *nsp++ = b;
2296
- break;
2297
- default:
2298
- if (quote_open) {
2299
- if (strrchr(special_char, b) && b != '|') {
2300
- *nsp++ = '\\';
2301
- }
2309
+ switch (b) {
2310
+ case '\\':
2311
+ if (!quote_open) { /* We do our own escaping below */
2312
+ *nsp++ = b;
2313
+ }
2314
+ break;
2315
+ case '"':
2316
+ quote_open = !quote_open;
2317
+ *nsp++ = b;
2318
+ break;
2319
+ case '(':
2320
+ if (!quote_open) {
2321
+ br_cnt++;
2322
+ }
2323
+ else {
2324
+ *nsp++ = '\\';
2325
+ }
2326
+ *nsp++ = b;
2327
+ break;
2328
+ case ')':
2329
+ if (!quote_open) {
2330
+ if (br_cnt == 0) {
2331
+ str_insert(new_str, (int)(nsp - new_str), '(');
2332
+ nsp++;
2333
+ }
2334
+ else {
2335
+ br_cnt--;
2336
+ }
2337
+ }
2338
+ else {
2339
+ *nsp++ = '\\';
2340
+ }
2341
+ *nsp++ = b;
2342
+ break;
2343
+ case '>':
2344
+ if (quote_open) {
2345
+ if (pb == '<') {
2346
+ /* remove the escape character */
2347
+ nsp--;
2348
+ nsp[-1] = '<';
2349
+ }
2350
+ else {
2351
+ *nsp++ = '\\';
2352
+ }
2353
+ }
2354
+ *nsp++ = b;
2355
+ break;
2356
+ default:
2357
+ if (quote_open) {
2358
+ if (strrchr(special_char, b) && b != '|') {
2359
+ *nsp++ = '\\';
2360
+ }
2361
+ }
2362
+ *nsp++ = b;
2302
2363
  }
2303
- *nsp++ = b;
2364
+ pb = b;
2365
+ }
2366
+ if (quote_open) {
2367
+ *nsp++ = '"';
2304
2368
  }
2305
- pb = b;
2306
- }
2307
- if (quote_open) *nsp++ = '"';
2308
- for (;br_cnt > 0; br_cnt--) {
2309
- *nsp++ = ')';
2310
- }
2311
- *nsp = '\0';
2312
- return new_str;
2369
+ for (;br_cnt > 0; br_cnt--) {
2370
+ *nsp++ = ')';
2371
+ }
2372
+ *nsp = '\0';
2373
+ return new_str;
2313
2374
  }
2314
2375
 
2315
2376
  Query *qp_get_bad_query(QParser *qp, char *str)
2316
2377
  {
2317
- Query *q;
2318
- FLDS(q, get_term_q(qp, field, str));
2319
- return q;
2378
+ Query *q;
2379
+ FLDS(q, get_term_q(qp, field, str));
2380
+ return q;
2320
2381
  }
2321
2382
 
2322
2383
  Query *qp_parse(QParser *self, char *qstr)
2323
2384
  {
2324
- if (self->clean_str) {
2325
- self->qstrp = self->qstr = qp_clean_str(qstr);
2326
- } else {
2327
- self->qstrp = self->qstr = qstr;
2328
- }
2329
- self->fields = self->def_fields;
2330
- self->result = NULL;
2331
- yyparse(self);
2332
- if (!self->result && self->handle_parse_errors)
2333
- self->result = qp_get_bad_query(self, self->qstr);
2334
- if (!self->result) self->result = bq_create(false);
2335
- if (self->clean_str) free(self->qstr);
2336
- return self->result;
2385
+ Query *result;
2386
+ mutex_lock(&self->mutex);
2387
+ if (self->clean_str) {
2388
+ self->qstrp = self->qstr = qp_clean_str(qstr);
2389
+ }
2390
+ else {
2391
+ self->qstrp = self->qstr = qstr;
2392
+ }
2393
+ self->fields = self->def_fields;
2394
+ self->result = NULL;
2395
+
2396
+ yyparse(self);
2397
+
2398
+ result = self->result;
2399
+ if (!result && self->handle_parse_errors) {
2400
+ result = qp_get_bad_query(self, self->qstr);
2401
+ }
2402
+ if (!result) {
2403
+ result = bq_new(false);
2404
+ }
2405
+ if (self->clean_str) {
2406
+ free(self->qstr);
2407
+ }
2408
+
2409
+ mutex_unlock(&self->mutex);
2410
+ return result;
2337
2411
  }
2338
2412
 
2339
2413