isomorfeus-ferret 0.12.6 → 0.13.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +101 -19
- data/README.md +85 -16
- data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
- data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
- data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
- data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
- data/ext/isomorfeus_ferret_ext/bzlib_blocksort.c +1094 -0
- data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
- data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
- data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
- data/ext/isomorfeus_ferret_ext/bzlib_huffman.c +205 -0
- data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
- data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
- data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
- data/ext/isomorfeus_ferret_ext/frb_index.c +513 -464
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
- data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
- data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
- data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
- data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
- data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
- data/ext/isomorfeus_ferret_ext/frt_document.h +10 -9
- data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
- data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +91 -200
- data/ext/isomorfeus_ferret_ext/frt_global.h +7 -18
- data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
- data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_index.c +714 -384
- data/ext/isomorfeus_ferret_ext/frt_index.h +274 -290
- data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +68 -91
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +46 -84
- data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
- data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
- data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
- data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
- data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +22 -112
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
- data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
- data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
- data/ext/isomorfeus_ferret_ext/test.c +0 -17
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
- data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
- data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_fields.c +111 -100
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
- data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
- data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
- data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
- data/ext/isomorfeus_ferret_ext/test_index.c +373 -363
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
- data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -64
- data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
- data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
- data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
- data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +113 -58
- data/ext/isomorfeus_ferret_ext/email.rl +0 -21
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
- data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
- data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
- data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
- data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
- data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
- data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
- data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
- data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -155,12 +155,15 @@
|
|
155
155
|
|
156
156
|
#include <string.h>
|
157
157
|
#include <ctype.h>
|
158
|
-
#include <wctype.h>
|
159
158
|
#include <assert.h>
|
160
159
|
#include "frt_global.h"
|
161
160
|
#include "frt_except.h"
|
162
161
|
#include "frt_search.h"
|
163
162
|
#include "frt_array.h"
|
163
|
+
#include <ruby/encoding.h>
|
164
|
+
|
165
|
+
extern rb_encoding *utf8_encoding;
|
166
|
+
extern int utf8_mbmaxlen;
|
164
167
|
|
165
168
|
typedef struct Phrase {
|
166
169
|
int size;
|
@@ -180,7 +183,7 @@ float frt_qp_default_fuzzy_min_sim = 0.5;
|
|
180
183
|
int frt_qp_default_fuzzy_pre_len = 0;
|
181
184
|
|
182
185
|
|
183
|
-
#line
|
186
|
+
#line 187 "frt_q_parser.c"
|
184
187
|
|
185
188
|
# ifndef YY_CAST
|
186
189
|
# ifdef __cplusplus
|
@@ -237,7 +240,7 @@ extern int yydebug;
|
|
237
240
|
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
|
238
241
|
union YYSTYPE
|
239
242
|
{
|
240
|
-
#line
|
243
|
+
#line 116 "frt_q_parser.y"
|
241
244
|
|
242
245
|
FrtQuery *query;
|
243
246
|
FrtBooleanClause *bcls;
|
@@ -246,7 +249,7 @@ union YYSTYPE
|
|
246
249
|
Phrase *phrase;
|
247
250
|
char *str;
|
248
251
|
|
249
|
-
#line
|
252
|
+
#line 253 "frt_q_parser.c"
|
250
253
|
|
251
254
|
};
|
252
255
|
typedef union YYSTYPE YYSTYPE;
|
@@ -257,7 +260,7 @@ typedef union YYSTYPE YYSTYPE;
|
|
257
260
|
|
258
261
|
|
259
262
|
|
260
|
-
int yyparse (FrtQParser *qp);
|
263
|
+
int yyparse (FrtQParser *qp, rb_encoding *encoding);
|
261
264
|
|
262
265
|
|
263
266
|
|
@@ -312,10 +315,10 @@ typedef enum yysymbol_kind_t yysymbol_kind_t;
|
|
312
315
|
|
313
316
|
|
314
317
|
/* Second part of user prologue. */
|
315
|
-
#line
|
318
|
+
#line 124 "frt_q_parser.y"
|
316
319
|
|
317
320
|
static int yylex(YYSTYPE *lvalp, FrtQParser *qp);
|
318
|
-
static int yyerror(FrtQParser *qp, char const *msg);
|
321
|
+
static int yyerror(FrtQParser *qp, rb_encoding *encoding, char const *msg);
|
319
322
|
|
320
323
|
#define PHRASE_INIT_CAPA 4
|
321
324
|
static FrtQuery *get_bool_q(FrtBCArray *bca);
|
@@ -323,29 +326,26 @@ static FrtQuery *get_bool_q(FrtBCArray *bca);
|
|
323
326
|
static FrtBCArray *first_cls(FrtBooleanClause *boolean_clause);
|
324
327
|
static FrtBCArray *add_and_cls(FrtBCArray *bca, FrtBooleanClause *clause);
|
325
328
|
static FrtBCArray *add_or_cls(FrtBCArray *bca, FrtBooleanClause *clause);
|
326
|
-
static FrtBCArray *add_default_cls(FrtQParser *qp, FrtBCArray *bca,
|
327
|
-
FrtBooleanClause *clause);
|
329
|
+
static FrtBCArray *add_default_cls(FrtQParser *qp, FrtBCArray *bca, FrtBooleanClause *clause);
|
328
330
|
static void bca_destroy(FrtBCArray *bca);
|
329
331
|
|
330
332
|
static FrtBooleanClause *get_bool_cls(FrtQuery *q, FrtBCType occur);
|
331
333
|
|
332
|
-
static FrtQuery *get_term_q(FrtQParser *qp,
|
333
|
-
static FrtQuery *get_fuzzy_q(FrtQParser *qp,
|
334
|
-
|
335
|
-
static FrtQuery *get_wild_q(FrtQParser *qp, FrtSymbol field, char *pattern);
|
334
|
+
static FrtQuery *get_term_q(FrtQParser *qp, ID field, char *word, rb_encoding *encoding);
|
335
|
+
static FrtQuery *get_fuzzy_q(FrtQParser *qp, ID field, char *word, char *slop, rb_encoding *encoding);
|
336
|
+
static FrtQuery *get_wild_q(FrtQParser *qp, ID field, char *pattern, rb_encoding *encoding);
|
336
337
|
|
337
338
|
static FrtHashSet *first_field(FrtQParser *qp, const char *field_name);
|
338
339
|
static FrtHashSet *add_field(FrtQParser *qp, const char *field_name);
|
339
340
|
|
340
|
-
static FrtQuery *get_phrase_q(FrtQParser *qp, Phrase *phrase, char *slop);
|
341
|
+
static FrtQuery *get_phrase_q(FrtQParser *qp, Phrase *phrase, char *slop, rb_encoding *encoding);
|
341
342
|
|
342
343
|
static Phrase *ph_first_word(char *word);
|
343
344
|
static Phrase *ph_add_word(Phrase *self, char *word);
|
344
345
|
static Phrase *ph_add_multi_word(Phrase *self, char *word);
|
345
346
|
static void ph_destroy(Phrase *self);
|
346
347
|
|
347
|
-
static FrtQuery *get_r_q(FrtQParser *qp,
|
348
|
-
bool inc_lower, bool inc_upper);
|
348
|
+
static FrtQuery *get_r_q(FrtQParser *qp, ID field, char *from, char *to, bool inc_lower, bool inc_upper, rb_encoding *encoding);
|
349
349
|
|
350
350
|
static void qp_push_fields(FrtQParser *self, FrtHashSet *fields, bool destroy);
|
351
351
|
static void qp_pop_fields(FrtQParser *self);
|
@@ -359,17 +359,17 @@ static void qp_pop_fields(FrtQParser *self);
|
|
359
359
|
*/
|
360
360
|
#define FLDS(q, func) do {\
|
361
361
|
FRT_TRY {\
|
362
|
-
|
362
|
+
ID field;\
|
363
363
|
if (qp->fields->size == 0) {\
|
364
364
|
q = NULL;\
|
365
365
|
} else if (qp->fields->size == 1) {\
|
366
|
-
field = (
|
366
|
+
field = (ID)qp->fields->first->elem;\
|
367
367
|
q = func;\
|
368
368
|
} else {\
|
369
369
|
FrtQuery *volatile sq; FrtHashSetEntry *volatile hse;\
|
370
370
|
q = frt_bq_new_max(false, qp->max_clauses);\
|
371
371
|
for (hse = qp->fields->first; hse; hse = hse->next) {\
|
372
|
-
field = (
|
372
|
+
field = (ID)hse->elem;\
|
373
373
|
sq = func;\
|
374
374
|
FRT_TRY\
|
375
375
|
if (sq) frt_bq_add_query_nr(q, sq, FRT_BC_SHOULD);\
|
@@ -969,7 +969,7 @@ enum { YYENOMEM = -2 };
|
|
969
969
|
} \
|
970
970
|
else \
|
971
971
|
{ \
|
972
|
-
yyerror (qp, YY_("syntax error: cannot back up")); \
|
972
|
+
yyerror (qp, encoding, YY_("syntax error: cannot back up")); \
|
973
973
|
YYERROR; \
|
974
974
|
} \
|
975
975
|
while (0)
|
@@ -1002,7 +1002,7 @@ do { \
|
|
1002
1002
|
{ \
|
1003
1003
|
YYFPRINTF (stderr, "%s ", Title); \
|
1004
1004
|
yy_symbol_print (stderr, \
|
1005
|
-
Kind, Value, qp); \
|
1005
|
+
Kind, Value, qp, encoding); \
|
1006
1006
|
YYFPRINTF (stderr, "\n"); \
|
1007
1007
|
} \
|
1008
1008
|
} while (0)
|
@@ -1014,11 +1014,12 @@ do { \
|
|
1014
1014
|
|
1015
1015
|
static void
|
1016
1016
|
yy_symbol_value_print (FILE *yyo,
|
1017
|
-
yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, FrtQParser *qp)
|
1017
|
+
yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, FrtQParser *qp, rb_encoding *encoding)
|
1018
1018
|
{
|
1019
1019
|
FILE *yyoutput = yyo;
|
1020
1020
|
YY_USE (yyoutput);
|
1021
1021
|
YY_USE (qp);
|
1022
|
+
YY_USE (encoding);
|
1022
1023
|
if (!yyvaluep)
|
1023
1024
|
return;
|
1024
1025
|
YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
|
@@ -1033,12 +1034,12 @@ yy_symbol_value_print (FILE *yyo,
|
|
1033
1034
|
|
1034
1035
|
static void
|
1035
1036
|
yy_symbol_print (FILE *yyo,
|
1036
|
-
yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, FrtQParser *qp)
|
1037
|
+
yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, FrtQParser *qp, rb_encoding *encoding)
|
1037
1038
|
{
|
1038
1039
|
YYFPRINTF (yyo, "%s %s (",
|
1039
1040
|
yykind < YYNTOKENS ? "token" : "nterm", yysymbol_name (yykind));
|
1040
1041
|
|
1041
|
-
yy_symbol_value_print (yyo, yykind, yyvaluep, qp);
|
1042
|
+
yy_symbol_value_print (yyo, yykind, yyvaluep, qp, encoding);
|
1042
1043
|
YYFPRINTF (yyo, ")");
|
1043
1044
|
}
|
1044
1045
|
|
@@ -1072,7 +1073,7 @@ do { \
|
|
1072
1073
|
|
1073
1074
|
static void
|
1074
1075
|
yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp,
|
1075
|
-
int yyrule, FrtQParser *qp)
|
1076
|
+
int yyrule, FrtQParser *qp, rb_encoding *encoding)
|
1076
1077
|
{
|
1077
1078
|
int yylno = yyrline[yyrule];
|
1078
1079
|
int yynrhs = yyr2[yyrule];
|
@@ -1085,7 +1086,7 @@ yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp,
|
|
1085
1086
|
YYFPRINTF (stderr, " $%d = ", yyi + 1);
|
1086
1087
|
yy_symbol_print (stderr,
|
1087
1088
|
YY_ACCESSING_SYMBOL (+yyssp[yyi + 1 - yynrhs]),
|
1088
|
-
&yyvsp[(yyi + 1) - (yynrhs)], qp);
|
1089
|
+
&yyvsp[(yyi + 1) - (yynrhs)], qp, encoding);
|
1089
1090
|
YYFPRINTF (stderr, "\n");
|
1090
1091
|
}
|
1091
1092
|
}
|
@@ -1093,7 +1094,7 @@ yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp,
|
|
1093
1094
|
# define YY_REDUCE_PRINT(Rule) \
|
1094
1095
|
do { \
|
1095
1096
|
if (yydebug) \
|
1096
|
-
yy_reduce_print (yyssp, yyvsp, Rule, qp); \
|
1097
|
+
yy_reduce_print (yyssp, yyvsp, Rule, qp, encoding); \
|
1097
1098
|
} while (0)
|
1098
1099
|
|
1099
1100
|
/* Nonzero means print parse trace. It is left uninitialized so that
|
@@ -1134,10 +1135,11 @@ int yydebug;
|
|
1134
1135
|
|
1135
1136
|
static void
|
1136
1137
|
yydestruct (const char *yymsg,
|
1137
|
-
yysymbol_kind_t yykind, YYSTYPE *yyvaluep, FrtQParser *qp)
|
1138
|
+
yysymbol_kind_t yykind, YYSTYPE *yyvaluep, FrtQParser *qp, rb_encoding *encoding)
|
1138
1139
|
{
|
1139
1140
|
YY_USE (yyvaluep);
|
1140
1141
|
YY_USE (qp);
|
1142
|
+
YY_USE (encoding);
|
1141
1143
|
if (!yymsg)
|
1142
1144
|
yymsg = "Deleting";
|
1143
1145
|
YY_SYMBOL_PRINT (yymsg, yykind, yyvaluep, yylocationp);
|
@@ -1148,67 +1150,67 @@ yydestruct (const char *yymsg,
|
|
1148
1150
|
case YYSYMBOL_bool_q: /* bool_q */
|
1149
1151
|
#line 221 "frt_q_parser.y"
|
1150
1152
|
{ if (((*yyvaluep).query) && qp->destruct) frt_q_deref(((*yyvaluep).query)); }
|
1151
|
-
#line
|
1153
|
+
#line 1154 "frt_q_parser.c"
|
1152
1154
|
break;
|
1153
1155
|
|
1154
1156
|
case YYSYMBOL_bool_clss: /* bool_clss */
|
1155
1157
|
#line 223 "frt_q_parser.y"
|
1156
1158
|
{ if (((*yyvaluep).bclss) && qp->destruct) bca_destroy(((*yyvaluep).bclss)); }
|
1157
|
-
#line
|
1159
|
+
#line 1160 "frt_q_parser.c"
|
1158
1160
|
break;
|
1159
1161
|
|
1160
1162
|
case YYSYMBOL_bool_cls: /* bool_cls */
|
1161
1163
|
#line 222 "frt_q_parser.y"
|
1162
1164
|
{ if (((*yyvaluep).bcls) && qp->destruct) frt_bc_deref(((*yyvaluep).bcls)); }
|
1163
|
-
#line
|
1165
|
+
#line 1166 "frt_q_parser.c"
|
1164
1166
|
break;
|
1165
1167
|
|
1166
1168
|
case YYSYMBOL_boosted_q: /* boosted_q */
|
1167
1169
|
#line 221 "frt_q_parser.y"
|
1168
1170
|
{ if (((*yyvaluep).query) && qp->destruct) frt_q_deref(((*yyvaluep).query)); }
|
1169
|
-
#line
|
1171
|
+
#line 1172 "frt_q_parser.c"
|
1170
1172
|
break;
|
1171
1173
|
|
1172
1174
|
case YYSYMBOL_q: /* q */
|
1173
1175
|
#line 221 "frt_q_parser.y"
|
1174
1176
|
{ if (((*yyvaluep).query) && qp->destruct) frt_q_deref(((*yyvaluep).query)); }
|
1175
|
-
#line
|
1177
|
+
#line 1178 "frt_q_parser.c"
|
1176
1178
|
break;
|
1177
1179
|
|
1178
1180
|
case YYSYMBOL_term_q: /* term_q */
|
1179
1181
|
#line 221 "frt_q_parser.y"
|
1180
1182
|
{ if (((*yyvaluep).query) && qp->destruct) frt_q_deref(((*yyvaluep).query)); }
|
1181
|
-
#line
|
1183
|
+
#line 1184 "frt_q_parser.c"
|
1182
1184
|
break;
|
1183
1185
|
|
1184
1186
|
case YYSYMBOL_wild_q: /* wild_q */
|
1185
1187
|
#line 221 "frt_q_parser.y"
|
1186
1188
|
{ if (((*yyvaluep).query) && qp->destruct) frt_q_deref(((*yyvaluep).query)); }
|
1187
|
-
#line
|
1189
|
+
#line 1190 "frt_q_parser.c"
|
1188
1190
|
break;
|
1189
1191
|
|
1190
1192
|
case YYSYMBOL_field_q: /* field_q */
|
1191
1193
|
#line 221 "frt_q_parser.y"
|
1192
1194
|
{ if (((*yyvaluep).query) && qp->destruct) frt_q_deref(((*yyvaluep).query)); }
|
1193
|
-
#line
|
1195
|
+
#line 1196 "frt_q_parser.c"
|
1194
1196
|
break;
|
1195
1197
|
|
1196
1198
|
case YYSYMBOL_phrase_q: /* phrase_q */
|
1197
1199
|
#line 221 "frt_q_parser.y"
|
1198
1200
|
{ if (((*yyvaluep).query) && qp->destruct) frt_q_deref(((*yyvaluep).query)); }
|
1199
|
-
#line
|
1201
|
+
#line 1202 "frt_q_parser.c"
|
1200
1202
|
break;
|
1201
1203
|
|
1202
1204
|
case YYSYMBOL_ph_words: /* ph_words */
|
1203
1205
|
#line 224 "frt_q_parser.y"
|
1204
1206
|
{ if (((*yyvaluep).phrase) && qp->destruct) ph_destroy(((*yyvaluep).phrase)); }
|
1205
|
-
#line
|
1207
|
+
#line 1208 "frt_q_parser.c"
|
1206
1208
|
break;
|
1207
1209
|
|
1208
1210
|
case YYSYMBOL_range_q: /* range_q */
|
1209
1211
|
#line 221 "frt_q_parser.y"
|
1210
1212
|
{ if (((*yyvaluep).query) && qp->destruct) frt_q_deref(((*yyvaluep).query)); }
|
1211
|
-
#line
|
1213
|
+
#line 1214 "frt_q_parser.c"
|
1212
1214
|
break;
|
1213
1215
|
|
1214
1216
|
default:
|
@@ -1227,7 +1229,7 @@ yydestruct (const char *yymsg,
|
|
1227
1229
|
`----------*/
|
1228
1230
|
|
1229
1231
|
int
|
1230
|
-
yyparse (FrtQParser *qp)
|
1232
|
+
yyparse (FrtQParser *qp, rb_encoding *encoding)
|
1231
1233
|
{
|
1232
1234
|
/* Lookahead token kind. */
|
1233
1235
|
int yychar;
|
@@ -1484,269 +1486,269 @@ yyreduce:
|
|
1484
1486
|
case 2: /* bool_q: %empty */
|
1485
1487
|
#line 226 "frt_q_parser.y"
|
1486
1488
|
{ qp->result = (yyval.query) = NULL; }
|
1487
|
-
#line
|
1489
|
+
#line 1490 "frt_q_parser.c"
|
1488
1490
|
break;
|
1489
1491
|
|
1490
1492
|
case 3: /* bool_q: bool_clss */
|
1491
1493
|
#line 227 "frt_q_parser.y"
|
1492
1494
|
{ T qp->result = (yyval.query) = get_bool_q((yyvsp[0].bclss)); E }
|
1493
|
-
#line
|
1495
|
+
#line 1496 "frt_q_parser.c"
|
1494
1496
|
break;
|
1495
1497
|
|
1496
1498
|
case 4: /* bool_clss: bool_cls */
|
1497
1499
|
#line 229 "frt_q_parser.y"
|
1498
1500
|
{ T (yyval.bclss) = first_cls((yyvsp[0].bcls)); E }
|
1499
|
-
#line
|
1501
|
+
#line 1502 "frt_q_parser.c"
|
1500
1502
|
break;
|
1501
1503
|
|
1502
1504
|
case 5: /* bool_clss: bool_clss AND bool_cls */
|
1503
1505
|
#line 230 "frt_q_parser.y"
|
1504
1506
|
{ T (yyval.bclss) = add_and_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); E }
|
1505
|
-
#line
|
1507
|
+
#line 1508 "frt_q_parser.c"
|
1506
1508
|
break;
|
1507
1509
|
|
1508
1510
|
case 6: /* bool_clss: bool_clss OR bool_cls */
|
1509
1511
|
#line 231 "frt_q_parser.y"
|
1510
1512
|
{ T (yyval.bclss) = add_or_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); E }
|
1511
|
-
#line
|
1513
|
+
#line 1514 "frt_q_parser.c"
|
1512
1514
|
break;
|
1513
1515
|
|
1514
1516
|
case 7: /* bool_clss: bool_clss bool_cls */
|
1515
1517
|
#line 232 "frt_q_parser.y"
|
1516
1518
|
{ T (yyval.bclss) = add_default_cls(qp, (yyvsp[-1].bclss), (yyvsp[0].bcls)); E }
|
1517
|
-
#line
|
1519
|
+
#line 1520 "frt_q_parser.c"
|
1518
1520
|
break;
|
1519
1521
|
|
1520
1522
|
case 8: /* bool_cls: REQ boosted_q */
|
1521
1523
|
#line 234 "frt_q_parser.y"
|
1522
1524
|
{ T (yyval.bcls) = get_bool_cls((yyvsp[0].query), FRT_BC_MUST); E }
|
1523
|
-
#line
|
1525
|
+
#line 1526 "frt_q_parser.c"
|
1524
1526
|
break;
|
1525
1527
|
|
1526
1528
|
case 9: /* bool_cls: NOT boosted_q */
|
1527
1529
|
#line 235 "frt_q_parser.y"
|
1528
1530
|
{ T (yyval.bcls) = get_bool_cls((yyvsp[0].query), FRT_BC_MUST_NOT); E }
|
1529
|
-
#line
|
1531
|
+
#line 1532 "frt_q_parser.c"
|
1530
1532
|
break;
|
1531
1533
|
|
1532
1534
|
case 10: /* bool_cls: boosted_q */
|
1533
1535
|
#line 236 "frt_q_parser.y"
|
1534
1536
|
{ T (yyval.bcls) = get_bool_cls((yyvsp[0].query), FRT_BC_SHOULD); E }
|
1535
|
-
#line
|
1537
|
+
#line 1538 "frt_q_parser.c"
|
1536
1538
|
break;
|
1537
1539
|
|
1538
1540
|
case 12: /* boosted_q: q '^' QWRD */
|
1539
1541
|
#line 239 "frt_q_parser.y"
|
1540
1542
|
{ T if ((yyvsp[-2].query)) sscanf((yyvsp[0].str),"%f",&((yyvsp[-2].query)->boost)); (yyval.query)=(yyvsp[-2].query); E }
|
1541
|
-
#line
|
1543
|
+
#line 1544 "frt_q_parser.c"
|
1542
1544
|
break;
|
1543
1545
|
|
1544
1546
|
case 14: /* q: '(' ')' */
|
1545
1547
|
#line 242 "frt_q_parser.y"
|
1546
1548
|
{ T (yyval.query) = frt_bq_new_max(true, qp->max_clauses); E }
|
1547
|
-
#line
|
1549
|
+
#line 1550 "frt_q_parser.c"
|
1548
1550
|
break;
|
1549
1551
|
|
1550
1552
|
case 15: /* q: '(' bool_clss ')' */
|
1551
1553
|
#line 243 "frt_q_parser.y"
|
1552
1554
|
{ T (yyval.query) = get_bool_q((yyvsp[-1].bclss)); E }
|
1553
|
-
#line
|
1555
|
+
#line 1556 "frt_q_parser.c"
|
1554
1556
|
break;
|
1555
1557
|
|
1556
1558
|
case 20: /* term_q: QWRD */
|
1557
1559
|
#line 249 "frt_q_parser.y"
|
1558
|
-
{ FLDS((yyval.query), get_term_q(qp, field, (yyvsp[0].str))); Y}
|
1559
|
-
#line
|
1560
|
+
{ FLDS((yyval.query), get_term_q(qp, field, (yyvsp[0].str), encoding)); Y}
|
1561
|
+
#line 1562 "frt_q_parser.c"
|
1560
1562
|
break;
|
1561
1563
|
|
1562
1564
|
case 21: /* term_q: QWRD '~' QWRD */
|
1563
1565
|
#line 250 "frt_q_parser.y"
|
1564
|
-
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-2].str), (yyvsp[0].str))); Y}
|
1565
|
-
#line
|
1566
|
+
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-2].str), (yyvsp[0].str), encoding)); Y}
|
1567
|
+
#line 1568 "frt_q_parser.c"
|
1566
1568
|
break;
|
1567
1569
|
|
1568
1570
|
case 22: /* term_q: QWRD '~' */
|
1569
1571
|
#line 251 "frt_q_parser.y"
|
1570
|
-
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-1].str), NULL)); Y}
|
1571
|
-
#line
|
1572
|
+
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-1].str), NULL, encoding)); Y}
|
1573
|
+
#line 1574 "frt_q_parser.c"
|
1572
1574
|
break;
|
1573
1575
|
|
1574
1576
|
case 23: /* wild_q: WILD_STR */
|
1575
1577
|
#line 253 "frt_q_parser.y"
|
1576
|
-
{ FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[0].str))); Y}
|
1577
|
-
#line
|
1578
|
+
{ FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[0].str), encoding)); Y}
|
1579
|
+
#line 1580 "frt_q_parser.c"
|
1578
1580
|
break;
|
1579
1581
|
|
1580
1582
|
case 24: /* $@1: %empty */
|
1581
1583
|
#line 255 "frt_q_parser.y"
|
1582
1584
|
{ qp_pop_fields(qp); }
|
1583
|
-
#line
|
1585
|
+
#line 1586 "frt_q_parser.c"
|
1584
1586
|
break;
|
1585
1587
|
|
1586
1588
|
case 25: /* field_q: field ':' q $@1 */
|
1587
1589
|
#line 256 "frt_q_parser.y"
|
1588
1590
|
{ (yyval.query) = (yyvsp[-1].query); }
|
1589
|
-
#line
|
1591
|
+
#line 1592 "frt_q_parser.c"
|
1590
1592
|
break;
|
1591
1593
|
|
1592
1594
|
case 26: /* $@2: %empty */
|
1593
1595
|
#line 257 "frt_q_parser.y"
|
1594
1596
|
{ qp_push_fields(qp, qp->all_fields, false); }
|
1595
|
-
#line
|
1597
|
+
#line 1598 "frt_q_parser.c"
|
1596
1598
|
break;
|
1597
1599
|
|
1598
1600
|
case 27: /* $@3: %empty */
|
1599
1601
|
#line 257 "frt_q_parser.y"
|
1600
1602
|
{ qp_pop_fields(qp); }
|
1601
|
-
#line
|
1603
|
+
#line 1604 "frt_q_parser.c"
|
1602
1604
|
break;
|
1603
1605
|
|
1604
1606
|
case 28: /* field_q: '*' $@2 ':' q $@3 */
|
1605
1607
|
#line 258 "frt_q_parser.y"
|
1606
1608
|
{ (yyval.query) = (yyvsp[-1].query); }
|
1607
|
-
#line
|
1609
|
+
#line 1610 "frt_q_parser.c"
|
1608
1610
|
break;
|
1609
1611
|
|
1610
1612
|
case 29: /* field: QWRD */
|
1611
1613
|
#line 260 "frt_q_parser.y"
|
1612
1614
|
{ (yyval.hashset) = first_field(qp, (yyvsp[0].str)); }
|
1613
|
-
#line
|
1615
|
+
#line 1616 "frt_q_parser.c"
|
1614
1616
|
break;
|
1615
1617
|
|
1616
1618
|
case 30: /* field: field '|' QWRD */
|
1617
1619
|
#line 261 "frt_q_parser.y"
|
1618
1620
|
{ (yyval.hashset) = add_field(qp, (yyvsp[0].str));}
|
1619
|
-
#line
|
1621
|
+
#line 1622 "frt_q_parser.c"
|
1620
1622
|
break;
|
1621
1623
|
|
1622
1624
|
case 31: /* phrase_q: '"' ph_words '"' */
|
1623
1625
|
#line 263 "frt_q_parser.y"
|
1624
|
-
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-1].phrase), NULL); }
|
1625
|
-
#line
|
1626
|
+
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-1].phrase), NULL, encoding); }
|
1627
|
+
#line 1628 "frt_q_parser.c"
|
1626
1628
|
break;
|
1627
1629
|
|
1628
1630
|
case 32: /* phrase_q: '"' ph_words '"' '~' QWRD */
|
1629
1631
|
#line 264 "frt_q_parser.y"
|
1630
|
-
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-3].phrase), (yyvsp[0].str)); }
|
1631
|
-
#line
|
1632
|
+
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-3].phrase), (yyvsp[0].str), encoding); }
|
1633
|
+
#line 1634 "frt_q_parser.c"
|
1632
1634
|
break;
|
1633
1635
|
|
1634
1636
|
case 33: /* phrase_q: '"' '"' */
|
1635
1637
|
#line 265 "frt_q_parser.y"
|
1636
1638
|
{ (yyval.query) = NULL; }
|
1637
|
-
#line
|
1639
|
+
#line 1640 "frt_q_parser.c"
|
1638
1640
|
break;
|
1639
1641
|
|
1640
1642
|
case 34: /* phrase_q: '"' '"' '~' QWRD */
|
1641
1643
|
#line 266 "frt_q_parser.y"
|
1642
1644
|
{ (yyval.query) = NULL; (void)(yyvsp[0].str);}
|
1643
|
-
#line
|
1645
|
+
#line 1646 "frt_q_parser.c"
|
1644
1646
|
break;
|
1645
1647
|
|
1646
1648
|
case 35: /* ph_words: QWRD */
|
1647
1649
|
#line 268 "frt_q_parser.y"
|
1648
1650
|
{ (yyval.phrase) = ph_first_word((yyvsp[0].str)); }
|
1649
|
-
#line
|
1651
|
+
#line 1652 "frt_q_parser.c"
|
1650
1652
|
break;
|
1651
1653
|
|
1652
1654
|
case 36: /* ph_words: '<' '>' */
|
1653
1655
|
#line 269 "frt_q_parser.y"
|
1654
1656
|
{ (yyval.phrase) = ph_first_word(NULL); }
|
1655
|
-
#line
|
1657
|
+
#line 1658 "frt_q_parser.c"
|
1656
1658
|
break;
|
1657
1659
|
|
1658
1660
|
case 37: /* ph_words: ph_words QWRD */
|
1659
1661
|
#line 270 "frt_q_parser.y"
|
1660
1662
|
{ (yyval.phrase) = ph_add_word((yyvsp[-1].phrase), (yyvsp[0].str)); }
|
1661
|
-
#line
|
1663
|
+
#line 1664 "frt_q_parser.c"
|
1662
1664
|
break;
|
1663
1665
|
|
1664
1666
|
case 38: /* ph_words: ph_words '<' '>' */
|
1665
1667
|
#line 271 "frt_q_parser.y"
|
1666
1668
|
{ (yyval.phrase) = ph_add_word((yyvsp[-2].phrase), NULL); }
|
1667
|
-
#line
|
1669
|
+
#line 1670 "frt_q_parser.c"
|
1668
1670
|
break;
|
1669
1671
|
|
1670
1672
|
case 39: /* ph_words: ph_words '|' QWRD */
|
1671
1673
|
#line 272 "frt_q_parser.y"
|
1672
1674
|
{ (yyval.phrase) = ph_add_multi_word((yyvsp[-2].phrase), (yyvsp[0].str)); }
|
1673
|
-
#line
|
1675
|
+
#line 1676 "frt_q_parser.c"
|
1674
1676
|
break;
|
1675
1677
|
|
1676
1678
|
case 40: /* range_q: '[' QWRD QWRD ']' */
|
1677
1679
|
#line 274 "frt_q_parser.y"
|
1678
|
-
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), true, true)); Y}
|
1679
|
-
#line
|
1680
|
+
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), true, true, encoding)); Y}
|
1681
|
+
#line 1682 "frt_q_parser.c"
|
1680
1682
|
break;
|
1681
1683
|
|
1682
1684
|
case 41: /* range_q: '[' QWRD QWRD '}' */
|
1683
1685
|
#line 275 "frt_q_parser.y"
|
1684
|
-
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), true, false)); Y}
|
1685
|
-
#line
|
1686
|
+
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), true, false, encoding)); Y}
|
1687
|
+
#line 1688 "frt_q_parser.c"
|
1686
1688
|
break;
|
1687
1689
|
|
1688
1690
|
case 42: /* range_q: '{' QWRD QWRD ']' */
|
1689
1691
|
#line 276 "frt_q_parser.y"
|
1690
|
-
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), false, true)); Y}
|
1691
|
-
#line
|
1692
|
+
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), false, true, encoding)); Y}
|
1693
|
+
#line 1694 "frt_q_parser.c"
|
1692
1694
|
break;
|
1693
1695
|
|
1694
1696
|
case 43: /* range_q: '{' QWRD QWRD '}' */
|
1695
1697
|
#line 277 "frt_q_parser.y"
|
1696
|
-
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), false, false)); Y}
|
1697
|
-
#line
|
1698
|
+
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), false, false, encoding)); Y}
|
1699
|
+
#line 1700 "frt_q_parser.c"
|
1698
1700
|
break;
|
1699
1701
|
|
1700
1702
|
case 44: /* range_q: '<' QWRD '}' */
|
1701
1703
|
#line 278 "frt_q_parser.y"
|
1702
|
-
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[-1].str), false, false)); Y}
|
1703
|
-
#line
|
1704
|
+
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[-1].str), false, false, encoding)); Y}
|
1705
|
+
#line 1706 "frt_q_parser.c"
|
1704
1706
|
break;
|
1705
1707
|
|
1706
1708
|
case 45: /* range_q: '<' QWRD ']' */
|
1707
1709
|
#line 279 "frt_q_parser.y"
|
1708
|
-
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[-1].str), false, true)); Y}
|
1709
|
-
#line
|
1710
|
+
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[-1].str), false, true, encoding)); Y}
|
1711
|
+
#line 1712 "frt_q_parser.c"
|
1710
1712
|
break;
|
1711
1713
|
|
1712
1714
|
case 46: /* range_q: '[' QWRD '>' */
|
1713
1715
|
#line 280 "frt_q_parser.y"
|
1714
|
-
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-1].str), NULL,true, false)); Y}
|
1715
|
-
#line
|
1716
|
+
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-1].str), NULL,true, false, encoding)); Y}
|
1717
|
+
#line 1718 "frt_q_parser.c"
|
1716
1718
|
break;
|
1717
1719
|
|
1718
1720
|
case 47: /* range_q: '{' QWRD '>' */
|
1719
1721
|
#line 281 "frt_q_parser.y"
|
1720
|
-
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-1].str), NULL,false, false)); Y}
|
1721
|
-
#line
|
1722
|
+
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-1].str), NULL,false, false, encoding)); Y}
|
1723
|
+
#line 1724 "frt_q_parser.c"
|
1722
1724
|
break;
|
1723
1725
|
|
1724
1726
|
case 48: /* range_q: '<' QWRD */
|
1725
1727
|
#line 282 "frt_q_parser.y"
|
1726
|
-
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[0].str), false, false)); Y}
|
1727
|
-
#line
|
1728
|
+
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[0].str), false, false, encoding)); Y}
|
1729
|
+
#line 1730 "frt_q_parser.c"
|
1728
1730
|
break;
|
1729
1731
|
|
1730
1732
|
case 49: /* range_q: '<' '=' QWRD */
|
1731
1733
|
#line 283 "frt_q_parser.y"
|
1732
|
-
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[0].str), false, true)); Y}
|
1733
|
-
#line
|
1734
|
+
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[0].str), false, true, encoding)); Y}
|
1735
|
+
#line 1736 "frt_q_parser.c"
|
1734
1736
|
break;
|
1735
1737
|
|
1736
1738
|
case 50: /* range_q: '>' '=' QWRD */
|
1737
1739
|
#line 284 "frt_q_parser.y"
|
1738
|
-
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[0].str), NULL,true, false)); Y}
|
1739
|
-
#line
|
1740
|
+
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[0].str), NULL,true, false, encoding)); Y}
|
1741
|
+
#line 1742 "frt_q_parser.c"
|
1740
1742
|
break;
|
1741
1743
|
|
1742
1744
|
case 51: /* range_q: '>' QWRD */
|
1743
1745
|
#line 285 "frt_q_parser.y"
|
1744
|
-
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[0].str), NULL,false, false)); Y}
|
1745
|
-
#line
|
1746
|
+
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[0].str), NULL,false, false, encoding)); Y}
|
1747
|
+
#line 1748 "frt_q_parser.c"
|
1746
1748
|
break;
|
1747
1749
|
|
1748
1750
|
|
1749
|
-
#line
|
1751
|
+
#line 1752 "frt_q_parser.c"
|
1750
1752
|
|
1751
1753
|
default: break;
|
1752
1754
|
}
|
@@ -1793,7 +1795,7 @@ yyerrlab:
|
|
1793
1795
|
if (!yyerrstatus)
|
1794
1796
|
{
|
1795
1797
|
++yynerrs;
|
1796
|
-
yyerror (qp, YY_("syntax error"));
|
1798
|
+
yyerror (qp, encoding, YY_("syntax error"));
|
1797
1799
|
}
|
1798
1800
|
|
1799
1801
|
if (yyerrstatus == 3)
|
@@ -1810,7 +1812,7 @@ yyerrlab:
|
|
1810
1812
|
else
|
1811
1813
|
{
|
1812
1814
|
yydestruct ("Error: discarding",
|
1813
|
-
yytoken, &yylval, qp);
|
1815
|
+
yytoken, &yylval, qp, encoding);
|
1814
1816
|
yychar = YYEMPTY;
|
1815
1817
|
}
|
1816
1818
|
}
|
@@ -1866,7 +1868,7 @@ yyerrlab1:
|
|
1866
1868
|
|
1867
1869
|
|
1868
1870
|
yydestruct ("Error: popping",
|
1869
|
-
YY_ACCESSING_SYMBOL (yystate), yyvsp, qp);
|
1871
|
+
YY_ACCESSING_SYMBOL (yystate), yyvsp, qp, encoding);
|
1870
1872
|
YYPOPSTACK (1);
|
1871
1873
|
yystate = *yyssp;
|
1872
1874
|
YY_STACK_PRINT (yyss, yyssp);
|
@@ -1904,7 +1906,7 @@ yyabortlab:
|
|
1904
1906
|
| yyexhaustedlab -- YYNOMEM (memory exhaustion) comes here. |
|
1905
1907
|
`-----------------------------------------------------------*/
|
1906
1908
|
yyexhaustedlab:
|
1907
|
-
yyerror (qp, YY_("memory exhausted"));
|
1909
|
+
yyerror (qp, encoding, YY_("memory exhausted"));
|
1908
1910
|
yyresult = 2;
|
1909
1911
|
goto yyreturnlab;
|
1910
1912
|
|
@@ -1919,7 +1921,7 @@ yyreturnlab:
|
|
1919
1921
|
user semantic actions for why this is necessary. */
|
1920
1922
|
yytoken = YYTRANSLATE (yychar);
|
1921
1923
|
yydestruct ("Cleanup: discarding lookahead",
|
1922
|
-
yytoken, &yylval, qp);
|
1924
|
+
yytoken, &yylval, qp, encoding);
|
1923
1925
|
}
|
1924
1926
|
/* Do not reclaim the symbols of the rule whose action triggered
|
1925
1927
|
this YYABORT or YYACCEPT. */
|
@@ -1928,7 +1930,7 @@ yyreturnlab:
|
|
1928
1930
|
while (yyssp != yyss)
|
1929
1931
|
{
|
1930
1932
|
yydestruct ("Cleanup: popping",
|
1931
|
-
YY_ACCESSING_SYMBOL (+*yyssp), yyvsp, qp);
|
1933
|
+
YY_ACCESSING_SYMBOL (+*yyssp), yyvsp, qp, encoding);
|
1932
1934
|
YYPOPSTACK (1);
|
1933
1935
|
}
|
1934
1936
|
#ifndef yyoverflow
|
@@ -2102,8 +2104,9 @@ static int yylex(YYSTYPE *lvalp, FrtQParser *qp)
|
|
2102
2104
|
* It is responsible for clearing any memory that was allocated during the
|
2103
2105
|
* parsing process.
|
2104
2106
|
*/
|
2105
|
-
static int yyerror(FrtQParser *qp, char const *msg)
|
2107
|
+
static int yyerror(FrtQParser *qp, rb_encoding *encoding, char const *msg)
|
2106
2108
|
{
|
2109
|
+
(void)encoding;
|
2107
2110
|
qp->destruct = true;
|
2108
2111
|
if (!qp->handle_parse_errors) {
|
2109
2112
|
char buf[1024];
|
@@ -2133,22 +2136,21 @@ static int yyerror(FrtQParser *qp, char const *msg)
|
|
2133
2136
|
* This method returns the query parser for a particular field and sets it up
|
2134
2137
|
* with the text to be tokenized.
|
2135
2138
|
*/
|
2136
|
-
static FrtTokenStream *get_cached_ts(FrtQParser *qp,
|
2137
|
-
{
|
2139
|
+
static FrtTokenStream *get_cached_ts(FrtQParser *qp, ID field, char *text, rb_encoding *encoding) {
|
2138
2140
|
FrtTokenStream *ts;
|
2139
2141
|
if (frt_hs_exists(qp->tokenized_fields, (void *)field)) {
|
2140
2142
|
ts = (FrtTokenStream *)frt_h_get(qp->ts_cache, (void *)field);
|
2141
2143
|
if (!ts) {
|
2142
|
-
ts = frt_a_get_ts(qp->analyzer, field, text);
|
2144
|
+
ts = frt_a_get_ts(qp->analyzer, field, text, encoding);
|
2143
2145
|
frt_h_set(qp->ts_cache, (void *)field, ts);
|
2144
2146
|
}
|
2145
2147
|
else {
|
2146
|
-
ts->reset(ts, text);
|
2148
|
+
ts->reset(ts, text, encoding);
|
2147
2149
|
}
|
2148
2150
|
}
|
2149
2151
|
else {
|
2150
2152
|
ts = qp->non_tokenizer;
|
2151
|
-
ts->reset(ts, text);
|
2153
|
+
ts->reset(ts, text, encoding);
|
2152
2154
|
}
|
2153
2155
|
return ts;
|
2154
2156
|
}
|
@@ -2305,11 +2307,10 @@ static FrtBooleanClause *get_bool_cls(FrtQuery *q, FrtBCType occur)
|
|
2305
2307
|
* what we want as it will match any documents containing the same email
|
2306
2308
|
* address and tokenized with the same tokenizer.
|
2307
2309
|
*/
|
2308
|
-
static FrtQuery *get_term_q(FrtQParser *qp,
|
2309
|
-
{
|
2310
|
+
static FrtQuery *get_term_q(FrtQParser *qp, ID field, char *word, rb_encoding *encoding) {
|
2310
2311
|
FrtQuery *q;
|
2311
2312
|
FrtToken *token;
|
2312
|
-
FrtTokenStream *stream = get_cached_ts(qp, field, word);
|
2313
|
+
FrtTokenStream *stream = get_cached_ts(qp, field, word, encoding);
|
2313
2314
|
|
2314
2315
|
if ((token = frt_ts_next(stream)) == NULL) {
|
2315
2316
|
q = NULL;
|
@@ -2343,11 +2344,10 @@ static FrtQuery *get_term_q(FrtQParser *qp, FrtSymbol field, char *word)
|
|
2343
2344
|
* will be used. If there are any more tokens after tokenization, they will be
|
2344
2345
|
* ignored.
|
2345
2346
|
*/
|
2346
|
-
static FrtQuery *get_fuzzy_q(FrtQParser *qp,
|
2347
|
-
{
|
2347
|
+
static FrtQuery *get_fuzzy_q(FrtQParser *qp, ID field, char *word, char *slop_str, rb_encoding *encoding) {
|
2348
2348
|
FrtQuery *q;
|
2349
2349
|
FrtToken *token;
|
2350
|
-
FrtTokenStream *stream = get_cached_ts(qp, field, word);
|
2350
|
+
FrtTokenStream *stream = get_cached_ts(qp, field, word, encoding);
|
2351
2351
|
|
2352
2352
|
if ((token = frt_ts_next(stream)) == NULL) {
|
2353
2353
|
q = NULL;
|
@@ -2365,31 +2365,20 @@ static FrtQuery *get_fuzzy_q(FrtQParser *qp, FrtSymbol field, char *word, char *
|
|
2365
2365
|
}
|
2366
2366
|
|
2367
2367
|
/**
|
2368
|
-
* Downcase a string taking
|
2369
|
-
* character sets.
|
2368
|
+
* Downcase a string taking encoding into account and works for multibyte character sets.
|
2370
2369
|
*/
|
2371
|
-
static char *lower_str(char *str)
|
2372
|
-
|
2373
|
-
const int max_len =
|
2374
|
-
|
2375
|
-
|
2376
|
-
|
2377
|
-
|
2378
|
-
|
2379
|
-
|
2380
|
-
|
2381
|
-
|
2382
|
-
|
2383
|
-
}
|
2384
|
-
else {
|
2385
|
-
char *s = str;
|
2386
|
-
while (*s) {
|
2387
|
-
*s = tolower(*s);
|
2388
|
-
s++;
|
2389
|
-
}
|
2390
|
-
}
|
2391
|
-
free(wstr);
|
2392
|
-
str[max_len] = '\0';
|
2370
|
+
static char *lower_str(char *str, int len, rb_encoding *enc) {
|
2371
|
+
OnigCaseFoldType fold_type = ONIGENC_CASE_DOWNCASE;
|
2372
|
+
const int max_len = len + 20; // CASE_MAPPING_ADDITIONAL_LENGTH
|
2373
|
+
char *buf = FRT_ALLOC_N(char, max_len);
|
2374
|
+
char *buf_end = buf + max_len + 19;
|
2375
|
+
const OnigUChar *t = (const OnigUChar *)str;
|
2376
|
+
|
2377
|
+
len = enc->case_map(&fold_type, &t, (const OnigUChar *)(str + len), (OnigUChar *)buf, (OnigUChar *)buf_end, enc);
|
2378
|
+
memcpy(str, buf, len);
|
2379
|
+
str[len] = '\0';
|
2380
|
+
free(buf);
|
2381
|
+
|
2393
2382
|
return str;
|
2394
2383
|
}
|
2395
2384
|
|
@@ -2402,8 +2391,7 @@ static char *lower_str(char *str)
|
|
2402
2391
|
* optimized to a MatchAllQuery if the pattern is '*' or a PrefixQuery if the
|
2403
2392
|
* only wild char (*, ?) in the pattern is a '*' at the end of the pattern.
|
2404
2393
|
*/
|
2405
|
-
static FrtQuery *get_wild_q(FrtQParser *qp,
|
2406
|
-
{
|
2394
|
+
static FrtQuery *get_wild_q(FrtQParser *qp, ID field, char *pattern, rb_encoding *encoding) {
|
2407
2395
|
FrtQuery *q;
|
2408
2396
|
bool is_prefix = false;
|
2409
2397
|
char *p;
|
@@ -2411,7 +2399,7 @@ static FrtQuery *get_wild_q(FrtQParser *qp, FrtSymbol field, char *pattern)
|
|
2411
2399
|
|
2412
2400
|
if (qp->wild_lower
|
2413
2401
|
&& (!qp->tokenized_fields || frt_hs_exists(qp->tokenized_fields, (void *)field))) {
|
2414
|
-
lower_str(pattern);
|
2402
|
+
lower_str(pattern, len, encoding);
|
2415
2403
|
}
|
2416
2404
|
|
2417
2405
|
/* simplify the wildcard query to a prefix query if possible. Basically a
|
@@ -2446,9 +2434,8 @@ static FrtQuery *get_wild_q(FrtQParser *qp, FrtSymbol field, char *pattern)
|
|
2446
2434
|
/**
|
2447
2435
|
* Adds another field to the top of the FieldStack.
|
2448
2436
|
*/
|
2449
|
-
static FrtHashSet *add_field(FrtQParser *qp, const char *field_name)
|
2450
|
-
|
2451
|
-
FrtSymbol field = rb_intern(field_name);
|
2437
|
+
static FrtHashSet *add_field(FrtQParser *qp, const char *field_name) {
|
2438
|
+
ID field = rb_intern(field_name);
|
2452
2439
|
if (qp->allow_any_fields || frt_hs_exists(qp->all_fields, (void *)field)) {
|
2453
2440
|
frt_hs_add(qp->fields, (void *)field);
|
2454
2441
|
}
|
@@ -2574,8 +2561,7 @@ static Phrase *ph_add_multi_word(Phrase *self, char *word)
|
|
2574
2561
|
* This problem can easily be solved by using the StandardTokenizer or any
|
2575
2562
|
* custom tokenizer which will leave dbalmain@gmail.com as a single token.
|
2576
2563
|
*/
|
2577
|
-
static FrtQuery *get_phrase_query(FrtQParser *qp,
|
2578
|
-
{
|
2564
|
+
static FrtQuery *get_phrase_query(FrtQParser *qp, ID field, Phrase *phrase, char *slop_str, rb_encoding *encoding) {
|
2579
2565
|
const int pos_cnt = phrase->size;
|
2580
2566
|
FrtQuery *q = NULL;
|
2581
2567
|
|
@@ -2583,7 +2569,7 @@ static FrtQuery *get_phrase_query(FrtQParser *qp, FrtSymbol field, Phrase *phras
|
|
2583
2569
|
char **words = phrase->positions[0].terms;
|
2584
2570
|
const int word_count = frt_ary_size(words);
|
2585
2571
|
if (word_count == 1) {
|
2586
|
-
q = get_term_q(qp, field, words[0]);
|
2572
|
+
q = get_term_q(qp, field, words[0], encoding);
|
2587
2573
|
}
|
2588
2574
|
else {
|
2589
2575
|
int i;
|
@@ -2592,7 +2578,7 @@ static FrtQuery *get_phrase_query(FrtQParser *qp, FrtSymbol field, Phrase *phras
|
|
2592
2578
|
char *last_word = NULL;
|
2593
2579
|
|
2594
2580
|
for (i = 0; i < word_count; i++) {
|
2595
|
-
token = frt_ts_next(get_cached_ts(qp, field, words[i]));
|
2581
|
+
token = frt_ts_next(get_cached_ts(qp, field, words[i], encoding));
|
2596
2582
|
if (token) {
|
2597
2583
|
free(words[i]);
|
2598
2584
|
last_word = words[i] = frt_estrdup(token->text);
|
@@ -2644,7 +2630,7 @@ static FrtQuery *get_phrase_query(FrtQParser *qp, FrtSymbol field, Phrase *phras
|
|
2644
2630
|
pos_inc += phrase->positions[i].pos + 1; /* Actually holds pos_inc*/
|
2645
2631
|
|
2646
2632
|
if (word_count == 1) {
|
2647
|
-
stream = get_cached_ts(qp, field, words[0]);
|
2633
|
+
stream = get_cached_ts(qp, field, words[0], encoding);
|
2648
2634
|
while ((token = frt_ts_next(stream))) {
|
2649
2635
|
if (token->pos_inc) {
|
2650
2636
|
frt_phq_add_term(q, token->text,
|
@@ -2661,7 +2647,7 @@ static FrtQuery *get_phrase_query(FrtQParser *qp, FrtSymbol field, Phrase *phras
|
|
2661
2647
|
bool added_position = false;
|
2662
2648
|
|
2663
2649
|
for (j = 0; j < word_count; j++) {
|
2664
|
-
stream = get_cached_ts(qp, field, words[j]);
|
2650
|
+
stream = get_cached_ts(qp, field, words[j], encoding);
|
2665
2651
|
if ((token = frt_ts_next(stream))) {
|
2666
2652
|
if (!added_position) {
|
2667
2653
|
frt_phq_add_term(q, token->text,
|
@@ -2685,10 +2671,10 @@ static FrtQuery *get_phrase_query(FrtQParser *qp, FrtSymbol field, Phrase *phras
|
|
2685
2671
|
* the query parser as the all PhraseQuery didn't work well for this. Once the
|
2686
2672
|
* PhraseQuery has been built the Phrase object needs to be destroyed.
|
2687
2673
|
*/
|
2688
|
-
static FrtQuery *get_phrase_q(FrtQParser *qp, Phrase *phrase, char *slop_str)
|
2674
|
+
static FrtQuery *get_phrase_q(FrtQParser *qp, Phrase *phrase, char *slop_str, rb_encoding *encoding)
|
2689
2675
|
{
|
2690
2676
|
FrtQuery *volatile q = NULL;
|
2691
|
-
FLDS(q, get_phrase_query(qp, field, phrase, slop_str));
|
2677
|
+
FLDS(q, get_phrase_query(qp, field, phrase, slop_str, encoding));
|
2692
2678
|
ph_destroy(phrase);
|
2693
2679
|
return q;
|
2694
2680
|
}
|
@@ -2699,29 +2685,26 @@ static FrtQuery *get_phrase_q(FrtQParser *qp, Phrase *phrase, char *slop_str)
|
|
2699
2685
|
* Just like with WildCardQuery, RangeQuery needs to downcase its terms if the
|
2700
2686
|
* tokenizer also downcased its terms.
|
2701
2687
|
*/
|
2702
|
-
static FrtQuery *get_r_q(FrtQParser *qp,
|
2703
|
-
{
|
2688
|
+
static FrtQuery *get_r_q(FrtQParser *qp, ID field, char *from, char *to, bool inc_lower, bool inc_upper, rb_encoding *encoding) {
|
2704
2689
|
FrtQuery *rq;
|
2705
2690
|
if (qp->wild_lower
|
2706
2691
|
&& (!qp->tokenized_fields || frt_hs_exists(qp->tokenized_fields, (void *)field))) {
|
2707
|
-
if (from)
|
2708
|
-
lower_str(from);
|
2709
|
-
|
2710
|
-
|
2711
|
-
lower_str(to);
|
2712
|
-
}
|
2692
|
+
if (from)
|
2693
|
+
lower_str(from, strlen(from), encoding);
|
2694
|
+
if (to)
|
2695
|
+
lower_str(to, strlen(to), encoding);
|
2713
2696
|
}
|
2714
2697
|
/*
|
2715
2698
|
* terms don't get tokenized as it doesn't really make sense to do so for
|
2716
2699
|
* range queries.
|
2717
2700
|
|
2718
2701
|
if (from) {
|
2719
|
-
FrtTokenStream *stream = get_cached_ts(qp, field, from);
|
2702
|
+
FrtTokenStream *stream = get_cached_ts(qp, field, from, encoding);
|
2720
2703
|
FrtToken *token = frt_ts_next(stream);
|
2721
2704
|
from = token ? frt_estrdup(token->text) : NULL;
|
2722
2705
|
}
|
2723
2706
|
if (to) {
|
2724
|
-
FrtTokenStream *stream = get_cached_ts(qp, field, to);
|
2707
|
+
FrtTokenStream *stream = get_cached_ts(qp, field, to, encoding);
|
2725
2708
|
FrtToken *token = frt_ts_next(stream);
|
2726
2709
|
to = token ? frt_estrdup(token->text) : NULL;
|
2727
2710
|
}
|
@@ -2789,20 +2772,16 @@ void frt_qp_destroy(FrtQParser *self)
|
|
2789
2772
|
assert(NULL == self->fields_top);
|
2790
2773
|
|
2791
2774
|
frt_h_destroy(self->ts_cache);
|
2792
|
-
|
2775
|
+
frt_ts_deref(self->non_tokenizer);
|
2793
2776
|
frt_a_deref(self->analyzer);
|
2794
2777
|
free(self);
|
2795
2778
|
}
|
2796
2779
|
|
2797
|
-
|
2798
|
-
|
2799
|
-
|
2800
|
-
|
2801
|
-
*
|
2802
|
-
*/
|
2803
|
-
FrtQParser *frt_qp_new(FrtAnalyzer *analyzer)
|
2804
|
-
{
|
2805
|
-
FrtQParser *self = FRT_ALLOC(FrtQParser);
|
2780
|
+
FrtQParser *frt_qp_alloc() {
|
2781
|
+
return FRT_ALLOC(FrtQParser);
|
2782
|
+
}
|
2783
|
+
|
2784
|
+
FrtQParser *frt_qp_init(FrtQParser *self, FrtAnalyzer *analyzer) {
|
2806
2785
|
self->or_default = true;
|
2807
2786
|
self->wild_lower = true;
|
2808
2787
|
self->clean_str = false;
|
@@ -2830,8 +2809,18 @@ FrtQParser *frt_qp_new(FrtAnalyzer *analyzer)
|
|
2830
2809
|
return self;
|
2831
2810
|
}
|
2832
2811
|
|
2833
|
-
|
2834
|
-
|
2812
|
+
/**
|
2813
|
+
* Creates a new QueryParser setting all boolean parameters to their defaults.
|
2814
|
+
* If +def_fields+ is NULL then +all_fields+ is used in place of +def_fields+.
|
2815
|
+
* Not also that this method ensures that all fields that exist in
|
2816
|
+
* +def_fields+ must also exist in +all_fields+. This should make sense.
|
2817
|
+
*/
|
2818
|
+
FrtQParser *frt_qp_new(FrtAnalyzer *analyzer) {
|
2819
|
+
FrtQParser *self = frt_qp_alloc();
|
2820
|
+
return frt_qp_init(self, analyzer);
|
2821
|
+
}
|
2822
|
+
|
2823
|
+
void frt_qp_add_field(FrtQParser *self, ID field, bool is_default, bool is_tokenized) {
|
2835
2824
|
frt_hs_add(self->all_fields, (void *)field);
|
2836
2825
|
if (is_default) {
|
2837
2826
|
frt_hs_add(self->def_fields, (void *)field);
|
@@ -2961,12 +2950,12 @@ char *frt_qp_clean_str(char *str)
|
|
2961
2950
|
* analyzer. It then turns these tokens (if any) into a boolean query. If it
|
2962
2951
|
* fails to find any tokens, this method will return NULL.
|
2963
2952
|
*/
|
2964
|
-
static FrtQuery *qp_get_bad_query(FrtQParser *qp, char *str)
|
2953
|
+
static FrtQuery *qp_get_bad_query(FrtQParser *qp, char *str, rb_encoding *encoding)
|
2965
2954
|
{
|
2966
2955
|
FrtQuery *volatile q = NULL;
|
2967
2956
|
qp->recovering = true;
|
2968
2957
|
assert(qp->fields_top->next == NULL);
|
2969
|
-
FLDS(q, get_term_q(qp, field, str));
|
2958
|
+
FLDS(q, get_term_q(qp, field, str, encoding));
|
2970
2959
|
return q;
|
2971
2960
|
}
|
2972
2961
|
|
@@ -2978,40 +2967,63 @@ static FrtQuery *qp_get_bad_query(FrtQParser *qp, char *str)
|
|
2978
2967
|
* and turns them into a boolean query on the default fields.
|
2979
2968
|
*/
|
2980
2969
|
|
2981
|
-
FrtQuery *qp_parse(FrtQParser *self, char *
|
2970
|
+
FrtQuery *qp_parse(FrtQParser *self, char *query_string, rb_encoding *encoding)
|
2982
2971
|
{
|
2983
2972
|
FrtQuery *result = NULL;
|
2973
|
+
char *qstr;
|
2974
|
+
unsigned char *dp_start = NULL;
|
2975
|
+
|
2984
2976
|
frt_mutex_lock(&self->mutex);
|
2985
2977
|
/* if qp->fields_top->next is not NULL we have a left over field-stack
|
2986
2978
|
* object that was not popped during the last query parse */
|
2987
2979
|
assert(NULL == self->fields_top->next);
|
2988
2980
|
|
2981
|
+
/* encode query_string to utf8 for futher processing unless it is utf8 encoded */
|
2982
|
+
if (encoding == utf8_encoding) {
|
2983
|
+
qstr = query_string;
|
2984
|
+
} else {
|
2985
|
+
/* assume query is sbc encoded und encoding to utf results in maximum utf mbc expansion */
|
2986
|
+
const unsigned char *sp = (unsigned char *)query_string;
|
2987
|
+
int query_string_len = strlen(query_string);
|
2988
|
+
int dp_length = query_string_len * utf8_mbmaxlen + 1;
|
2989
|
+
unsigned char *dp = FRT_ALLOC_N(unsigned char, dp_length);
|
2990
|
+
dp_start = dp;
|
2991
|
+
rb_econv_t *ec = rb_econv_open(rb_enc_name(encoding), rb_enc_name(utf8_encoding), RUBY_ECONV_INVALID_REPLACE);
|
2992
|
+
assert(ec != NULL);
|
2993
|
+
rb_econv_convert(ec, &sp, (unsigned char *)query_string + query_string_len, &dp, (unsigned char *)dp + dp_length - 1, 0);
|
2994
|
+
rb_econv_close(ec);
|
2995
|
+
*dp = '\0';
|
2996
|
+
qstr = (char *)dp_start;
|
2997
|
+
}
|
2998
|
+
|
2989
2999
|
self->recovering = self->destruct = false;
|
3000
|
+
|
2990
3001
|
if (self->clean_str) {
|
2991
3002
|
self->qstrp = self->qstr = frt_qp_clean_str(qstr);
|
2992
|
-
}
|
2993
|
-
else {
|
3003
|
+
} else {
|
2994
3004
|
self->qstrp = self->qstr = qstr;
|
2995
3005
|
}
|
2996
3006
|
self->fields = self->def_fields;
|
2997
3007
|
self->result = NULL;
|
2998
3008
|
|
2999
|
-
if (0 == yyparse(self))
|
3009
|
+
if (0 == yyparse(self, encoding))
|
3000
3010
|
result = self->result;
|
3001
|
-
|
3011
|
+
|
3002
3012
|
if (!result && self->handle_parse_errors) {
|
3003
3013
|
self->destruct = false;
|
3004
|
-
result = qp_get_bad_query(self, self->qstr);
|
3014
|
+
result = qp_get_bad_query(self, self->qstr, encoding);
|
3005
3015
|
}
|
3006
|
-
if (self->destruct && !self->handle_parse_errors)
|
3016
|
+
if (self->destruct && !self->handle_parse_errors)
|
3007
3017
|
FRT_RAISE(FRT_PARSE_ERROR, frt_xmsg_buffer);
|
3008
|
-
|
3009
|
-
if (!result)
|
3018
|
+
|
3019
|
+
if (!result)
|
3010
3020
|
result = frt_bq_new(false);
|
3011
|
-
|
3012
|
-
if (self->clean_str)
|
3021
|
+
|
3022
|
+
if (self->clean_str)
|
3013
3023
|
free(self->qstr);
|
3014
|
-
|
3024
|
+
if (dp_start)
|
3025
|
+
free(dp_start);
|
3026
|
+
|
3015
3027
|
frt_mutex_unlock(&self->mutex);
|
3016
3028
|
return result;
|
3017
3029
|
}
|