isomorfeus-ferret 0.12.7 → 0.13.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (166) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +85 -13
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
  9. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
  10. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  11. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  12. data/ext/isomorfeus_ferret_ext/bzlib_blocksort.c +1094 -0
  13. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  14. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  15. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  16. data/ext/isomorfeus_ferret_ext/bzlib_huffman.c +205 -0
  17. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  18. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  19. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  20. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  21. data/ext/isomorfeus_ferret_ext/frb_index.c +497 -495
  22. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  23. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  24. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  25. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  26. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  27. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  28. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  29. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  30. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  31. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  32. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  33. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
  34. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +91 -200
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -18
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  42. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  43. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  44. data/ext/isomorfeus_ferret_ext/frt_index.c +603 -410
  45. data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
  46. data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
  47. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  48. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  49. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +68 -91
  50. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  51. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  52. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  53. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  54. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  55. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  56. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
  57. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  58. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  59. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  60. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  61. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  62. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
  63. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  64. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  65. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  66. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  67. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  68. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  69. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  70. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  71. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +22 -112
  72. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  73. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  74. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  75. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  76. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  77. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  78. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  79. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  80. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  81. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  82. data/ext/isomorfeus_ferret_ext/test.c +0 -17
  83. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  84. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  85. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  86. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  87. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  88. data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
  89. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  90. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  91. data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
  92. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  93. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  94. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  95. data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
  96. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  97. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  98. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  99. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  100. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  101. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  102. data/ext/isomorfeus_ferret_ext/test_search.c +60 -64
  103. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  104. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  105. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  106. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  107. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  108. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  109. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  110. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  111. data/lib/isomorfeus/ferret/version.rb +1 -1
  112. metadata +27 -57
  113. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  114. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  115. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  116. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  117. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  118. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  119. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  120. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  121. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  122. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  144. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  145. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  146. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  147. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  148. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  149. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  150. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  151. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  152. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  153. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  154. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  155. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  156. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  157. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  158. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  159. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  160. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  161. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  162. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  163. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  164. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  165. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  166. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -1,7 +1,9 @@
1
1
  #include "frt_search.h"
2
2
  #include "test.h"
3
3
 
4
- static FrtSymbol num;
4
+ #undef close
5
+
6
+ static ID num;
5
7
 
6
8
  extern void prepare_filter_index(FrtStore *store);
7
9
 
@@ -63,7 +65,7 @@ static void test_const_score_query_hash(TestCase *tc, void *data)
63
65
 
64
66
  TestSuite *ts_q_const_score(TestSuite *suite)
65
67
  {
66
- FrtStore *store = frt_open_ram_store();
68
+ FrtStore *store = frt_open_ram_store(NULL);
67
69
  FrtIndexReader *ir;
68
70
  FrtSearcher *searcher;
69
71
 
@@ -72,7 +74,7 @@ TestSuite *ts_q_const_score(TestSuite *suite)
72
74
  suite = ADD_SUITE(suite);
73
75
 
74
76
  prepare_filter_index(store);
75
- ir = frt_ir_open(store);
77
+ ir = frt_ir_open(NULL, store);
76
78
  searcher = frt_isea_new(ir);
77
79
 
78
80
  tst_run_test(suite, test_const_score_query, (void *)searcher);
@@ -1,7 +1,9 @@
1
1
  #include "frt_search.h"
2
2
  #include "test.h"
3
3
 
4
- static FrtSymbol num, flipflop;
4
+ #undef close
5
+
6
+ static ID num, flipflop;
5
7
 
6
8
  extern void prepare_filter_index(FrtStore *store);
7
9
 
@@ -40,7 +42,7 @@ static void test_filtered_query(TestCase *tc, void *data)
40
42
 
41
43
  TestSuite *ts_q_filtered(TestSuite *suite)
42
44
  {
43
- FrtStore *store = frt_open_ram_store();
45
+ FrtStore *store = frt_open_ram_store(NULL);
44
46
  FrtIndexReader *ir;
45
47
  FrtSearcher *searcher;
46
48
 
@@ -50,7 +52,7 @@ TestSuite *ts_q_filtered(TestSuite *suite)
50
52
  suite = ADD_SUITE(suite);
51
53
 
52
54
  prepare_filter_index(store);
53
- ir = frt_ir_open(store);
55
+ ir = frt_ir_open(NULL, store);
54
56
  searcher = frt_isea_new(ir);
55
57
 
56
58
  tst_run_test(suite, test_filtered_query, (void *)searcher);
@@ -1,19 +1,22 @@
1
1
  #include "frt_search.h"
2
2
  #include "test.h"
3
3
 
4
+ #undef close
5
+
4
6
  #define ARRAY_SIZE 20
5
7
 
6
- static FrtSymbol field;
8
+ static ID field;
7
9
 
8
10
  static void add_doc(const char *text, FrtIndexWriter *iw)
9
11
  {
10
12
  FrtDocument *doc = frt_doc_new();
11
- frt_doc_add_field(doc, frt_df_add_data(frt_df_new(field), (char *)text));
13
+ rb_encoding *enc = rb_enc_find("ASCII-8BIT");
14
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(field), (char *)text, enc));
12
15
  frt_iw_add_doc(iw, doc);
13
16
  frt_doc_destroy(doc);
14
17
  }
15
18
 
16
- void check_to_s(TestCase *tc, FrtQuery *query, FrtSymbol field, const char *q_str);
19
+ void check_to_s(TestCase *tc, FrtQuery *query, ID field, const char *q_str);
17
20
 
18
21
  static void do_prefix_test(TestCase *tc, FrtSearcher *searcher, const char *qstr, const char *expected_hits, int pre_len, float min_sim)
19
22
  {
@@ -30,11 +33,11 @@ static void test_fuzziness(TestCase *tc, void *data)
30
33
  FrtSearcher *sea;
31
34
  FrtTopDocs *top_docs;
32
35
  FrtQuery *q;
33
- FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES, FRT_TERM_VECTOR_YES);
36
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_COMPRESSION_NONE, FRT_INDEX_YES, FRT_TERM_VECTOR_YES);
34
37
  frt_index_create(store, fis);
35
38
  frt_fis_deref(fis);
36
39
 
37
- iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), NULL);
40
+ iw = frt_iw_open(NULL, store, frt_whitespace_analyzer_new(false), NULL);
38
41
 
39
42
  add_doc("aaaaa", iw);
40
43
  add_doc("aaaab", iw);
@@ -47,7 +50,7 @@ static void test_fuzziness(TestCase *tc, void *data)
47
50
  add_doc("aaaaaaaaaaaaaaaaaaaaaaa", iw); /* test max_distances problem */
48
51
  frt_iw_close(iw);
49
52
 
50
- ir = frt_ir_open(store);
53
+ ir = frt_ir_open(NULL, store);
51
54
  sea = frt_isea_new(ir);
52
55
 
53
56
  q = frt_fuzq_new_conf(field, "aaaaa", 0.0, 5, 10);
@@ -117,16 +120,16 @@ static void test_fuzziness_long(TestCase *tc, void *data)
117
120
  FrtIndexReader *ir;
118
121
  FrtTopDocs *top_docs;
119
122
  FrtQuery *q;
120
- FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES, FRT_TERM_VECTOR_YES);
123
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_COMPRESSION_NONE, FRT_INDEX_YES, FRT_TERM_VECTOR_YES);
121
124
  frt_index_create(store, fis);
122
125
  frt_fis_deref(fis);
123
126
 
124
- iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), NULL);
127
+ iw = frt_iw_open(NULL, store, frt_whitespace_analyzer_new(false), NULL);
125
128
 
126
129
  add_doc("aaaaaaa", iw);
127
130
  add_doc("segment", iw);
128
131
  frt_iw_close(iw);
129
- ir = frt_ir_open(store);
132
+ ir = frt_ir_open(NULL, store);
130
133
  sea = frt_isea_new(ir);
131
134
 
132
135
  /* not similar enough: */
@@ -225,7 +228,7 @@ static void test_fuzzy_query_hash(TestCase *tc, void *data)
225
228
 
226
229
  TestSuite *ts_q_fuzzy(TestSuite *suite)
227
230
  {
228
- FrtStore *store = frt_open_ram_store();
231
+ FrtStore *store = frt_open_ram_store(NULL);
229
232
 
230
233
  field = rb_intern("field");
231
234
 
@@ -1,13 +1,15 @@
1
1
  #include "frt_search.h"
2
2
  #include "test.h"
3
3
 
4
+ extern rb_encoding *utf8_encoding;
5
+
4
6
  typedef struct QPTestPair {
5
7
  const char *qstr;
6
8
  const char *qres;
7
9
  } QPTestPair;
8
10
 
9
11
  #define PARSER_TEST(str, res) do {\
10
- FrtQuery *q = qp_parse(parser, (char *)str);\
12
+ FrtQuery *q = qp_parse(parser, (char *)str, enc);\
11
13
  char *qres = q->to_s(q, rb_intern("xx"));\
12
14
  Asequal(res, qres);\
13
15
  frt_q_deref(q);\
@@ -141,7 +143,7 @@ static void test_q_parser(TestCase *tc, void *data)
141
143
  {"f1:(aaa f2:bbb ccc)", "f1:aaa f2:bbb f1:ccc"}
142
144
  };
143
145
  (void)data;
144
-
146
+ rb_encoding *enc = rb_enc_find("ASCII-8BIT");
145
147
  FRT_REF(analyzer);
146
148
  parser = frt_qp_new(analyzer);
147
149
  frt_qp_add_field(parser, rb_intern("xx"), true, true);
@@ -186,7 +188,7 @@ static void test_q_parser(TestCase *tc, void *data)
186
188
  static void test_q_parser_standard_analyzer(TestCase *tc, void *data)
187
189
  {
188
190
  int i;
189
- FrtAnalyzer *analyzer = frt_mb_standard_analyzer_new(true);
191
+ FrtAnalyzer *analyzer = frt_standard_analyzer_new(true);
190
192
  FrtQParser *parser;
191
193
  QPTestPair pairs[] = {
192
194
  {"", ""},
@@ -307,6 +309,7 @@ static void test_q_parser_standard_analyzer(TestCase *tc, void *data)
307
309
  */
308
310
  };
309
311
  (void)data;
312
+ rb_encoding *enc = rb_enc_find("ASCII-8BIT");
310
313
 
311
314
  FRT_REF(analyzer);
312
315
  parser = frt_qp_new(analyzer);
@@ -390,6 +393,38 @@ static void test_qp_bad_queries(TestCase *tc, void *data)
390
393
  {"::|)*&one)(*two(*&\"", "\"one two\"~1"}
391
394
  };
392
395
  (void)data;
396
+ rb_encoding *enc = rb_enc_find("ASCII-8BIT");
397
+
398
+ parser = frt_qp_new(frt_letter_analyzer_new(true));
399
+ frt_qp_add_field(parser, rb_intern("xx"), true, true);
400
+ frt_qp_add_field(parser, rb_intern("f1"), false, true);
401
+ frt_qp_add_field(parser, rb_intern("f2"), false, true);
402
+ frt_qp_add_field(parser, rb_intern("field"), false, true);
403
+
404
+ parser->handle_parse_errors = true;
405
+
406
+ for (i = 0; i < FRT_NELEMS(pairs); i++) {
407
+ PARSER_TEST(pairs[i].qstr, pairs[i].qres);
408
+ }
409
+ parser->clean_str = true;
410
+ for (i = 0; i < FRT_NELEMS(pairs); i++) {
411
+ PARSER_TEST(pairs[i].qstr, pairs[i].qres);
412
+ }
413
+ frt_qp_destroy(parser);
414
+ }
415
+
416
+ static void test_mb_qp_bad_queries(TestCase *tc, void *data)
417
+ {
418
+ int i;
419
+ FrtQParser *parser;
420
+ QPTestPair pairs[] = {
421
+ {"[, ]", ""},
422
+ {"::*word", "word"},
423
+ {"::))*&)(*^&*(", ""},
424
+ {"::|)*&one)(*two(*&\"", "\"one two\"~1"}
425
+ };
426
+ (void)data;
427
+ rb_encoding *enc = utf8_encoding;
393
428
 
394
429
  parser = frt_qp_new(frt_letter_analyzer_new(true));
395
430
  frt_qp_add_field(parser, rb_intern("xx"), true, true);
@@ -414,20 +449,21 @@ static void test_qp_prefix_query(TestCase *tc, void *data)
414
449
  FrtQParser *parser;
415
450
  FrtQuery *q;
416
451
  (void)data;
452
+ rb_encoding *enc = rb_enc_find("ASCII-8BIT");
417
453
 
418
454
  parser = frt_qp_new(frt_letter_analyzer_new(true));
419
455
  frt_qp_add_field(parser, rb_intern("xx"), true, true);
420
456
 
421
- q = qp_parse(parser, (char *)"asdg*");
457
+ q = qp_parse(parser, (char *)"asdg*", enc);
422
458
  Aiequal(PREFIX_QUERY, q->type);
423
459
  frt_q_deref(q);
424
- q = qp_parse(parser, (char *)"a?dg*");
460
+ q = qp_parse(parser, (char *)"a?dg*", enc);
425
461
  Aiequal(WILD_CARD_QUERY, q->type);
426
462
  frt_q_deref(q);
427
- q = qp_parse(parser, (char *)"a*dg*");
463
+ q = qp_parse(parser, (char *)"a*dg*", enc);
428
464
  Aiequal(WILD_CARD_QUERY, q->type);
429
465
  frt_q_deref(q);
430
- q = qp_parse(parser, (char *)"asdg*a");
466
+ q = qp_parse(parser, (char *)"asdg*a", enc);
431
467
  Aiequal(WILD_CARD_QUERY, q->type);
432
468
  frt_q_deref(q);
433
469
  frt_qp_destroy(parser);
@@ -437,6 +473,7 @@ static void test_qp_keyword_switch(TestCase *tc, void *data)
437
473
  {
438
474
  FrtQParser *parser;
439
475
  (void)data;
476
+ rb_encoding *enc = rb_enc_find("ASCII-8BIT");
440
477
 
441
478
  parser = frt_qp_new(frt_letter_analyzer_new(true));
442
479
  frt_qp_add_field(parser, rb_intern("xx"), true, true);
@@ -457,6 +494,7 @@ TestSuite *ts_q_parser(TestSuite *suite)
457
494
  tst_run_test(suite, test_q_parser_standard_analyzer, NULL);
458
495
  tst_run_test(suite, test_qp_clean_str, NULL);
459
496
  tst_run_test(suite, test_qp_bad_queries, NULL);
497
+ tst_run_test(suite, test_mb_qp_bad_queries, NULL);
460
498
  tst_run_test(suite, test_qp_prefix_query, NULL);
461
499
  tst_run_test(suite, test_qp_keyword_switch, NULL);
462
500
 
@@ -1,6 +1,8 @@
1
1
  #include "frt_search.h"
2
2
  #include "test.h"
3
3
 
4
+ #undef close
5
+
4
6
  #define ARRAY_SIZE 20
5
7
  #define TEST_SE(query, ir, expected) do { \
6
8
  FrtSpanEnum *__se = ((FrtSpanQuery *)query)->get_spans(query, ir); \
@@ -10,12 +12,13 @@
10
12
  free(__tmp); \
11
13
  } while(0)
12
14
 
13
- static FrtSymbol field;
15
+ static ID field;
14
16
 
15
17
  static void add_doc(const char *text, FrtIndexWriter *iw)
16
18
  {
17
19
  FrtDocument *doc = frt_doc_new();
18
- frt_doc_add_field(doc, frt_df_add_data(frt_df_new(field), (char *)text));
20
+ rb_encoding *enc = rb_enc_find("ASCII-8BIT");
21
+ frt_doc_add_field(doc, frt_df_add_data(frt_df_new(field), (char *)text, enc));
19
22
  frt_iw_add_doc(iw, doc);
20
23
  frt_doc_destroy(doc);
21
24
  }
@@ -25,7 +28,7 @@ static void span_test_setup(FrtStore *store)
25
28
  {
26
29
  const char **d;
27
30
  FrtIndexWriter *iw;
28
- FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES, FRT_TERM_VECTOR_YES);
31
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_COMPRESSION_NONE, FRT_INDEX_YES, FRT_TERM_VECTOR_YES);
29
32
  const char *data[] = {
30
33
  "start finish one two three four five six seven",
31
34
  "start one finish two three four five six seven",
@@ -63,7 +66,7 @@ static void span_test_setup(FrtStore *store)
63
66
  frt_index_create(store, fis);
64
67
  frt_fis_deref(fis);
65
68
 
66
- iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), NULL);
69
+ iw = frt_iw_open(NULL, store, frt_whitespace_analyzer_new(false), NULL);
67
70
 
68
71
  for (d = data; *d != NULL; d++) {
69
72
  add_doc(*d, iw);
@@ -78,7 +81,7 @@ static void test_span_term(TestCase *tc, void *data)
78
81
  FrtSearcher *sea;
79
82
  FrtQuery *tq;
80
83
 
81
- ir = frt_ir_open(store);
84
+ ir = frt_ir_open(NULL, store);
82
85
  sea = frt_isea_new(ir);
83
86
 
84
87
  tq = frt_spantq_new(rb_intern("notafield"), "nine");
@@ -132,7 +135,7 @@ static void test_span_multi_term(TestCase *tc, void *data)
132
135
  FrtSearcher *sea;
133
136
  FrtQuery *mtq;
134
137
 
135
- ir = frt_ir_open(store);
138
+ ir = frt_ir_open(NULL, store);
136
139
  sea = frt_isea_new(ir);
137
140
 
138
141
  mtq = frt_spanmtq_new(rb_intern("notafield"));
@@ -206,7 +209,7 @@ static void test_span_prefix(TestCase *tc, void *data)
206
209
  FrtQuery *prq;
207
210
  char *tmp;
208
211
 
209
- ir = frt_ir_open(store);
212
+ ir = frt_ir_open(NULL, store);
210
213
  sea = frt_isea_new(ir);
211
214
 
212
215
  prq = frt_spanprq_new(rb_intern("notafield"), "fl");
@@ -261,7 +264,7 @@ static void test_span_first(TestCase *tc, void *data)
261
264
  FrtSearcher *sea;
262
265
  FrtQuery *q;
263
266
 
264
- ir = frt_ir_open(store);
267
+ ir = frt_ir_open(NULL, store);
265
268
  sea = frt_isea_new(ir);
266
269
 
267
270
  q = frt_spanfq_new_nr(frt_spantq_new(field, "finish"), 1);
@@ -311,7 +314,7 @@ static void test_span_or(TestCase *tc, void *data)
311
314
  FrtSearcher *sea;
312
315
  FrtQuery *q;
313
316
 
314
- ir = frt_ir_open(store);
317
+ ir = frt_ir_open(NULL, store);
315
318
  sea = frt_isea_new(ir);
316
319
  q = frt_spanoq_new();
317
320
  tst_check_hits(tc, sea, q, "", -1);
@@ -365,7 +368,7 @@ static void test_span_near(TestCase *tc, void *data)
365
368
  FrtSearcher *sea;
366
369
  FrtQuery *q;
367
370
 
368
- ir = frt_ir_open(store);
371
+ ir = frt_ir_open(NULL, store);
369
372
  sea = frt_isea_new(ir);
370
373
 
371
374
  q = frt_spannq_new(0, true);
@@ -466,7 +469,7 @@ static void test_span_not(TestCase *tc, void *data)
466
469
  FrtSearcher *sea;
467
470
  FrtQuery *q, *nearq0, *nearq1;
468
471
 
469
- ir = frt_ir_open(store);
472
+ ir = frt_ir_open(NULL, store);
470
473
  sea = frt_isea_new(ir);
471
474
 
472
475
  nearq0 = frt_spannq_new(4, true);
@@ -551,7 +554,7 @@ static void test_span_not_hash(TestCase *tc, void *data)
551
554
 
552
555
  TestSuite *ts_q_span(TestSuite *suite)
553
556
  {
554
- FrtStore *store = frt_open_ram_store();
557
+ FrtStore *store = frt_open_ram_store(NULL);
555
558
  field = rb_intern("field");
556
559
  span_test_setup(store);
557
560
 
@@ -10,7 +10,7 @@ void test_write_to(TestCase *tc, void *data)
10
10
  {
11
11
  int i;
12
12
  char *tmp;
13
- FrtStore *ram_store = frt_open_ram_store();
13
+ FrtStore *ram_store = frt_open_ram_store(NULL);
14
14
  FrtStore *fs_store = frt_open_fs_store("./test/testdir/store");
15
15
  char str[18] = "³³ øãíøäÄ";
16
16
  char buf[18000] = "";
@@ -43,7 +43,7 @@ void test_write_to(TestCase *tc, void *data)
43
43
 
44
44
  Aiequal(17021, fs_store->length(fs_store, "_rw_funny_string.cfs"));
45
45
  frt_store_deref(ram_store);
46
- ram_store = frt_open_ram_store_and_copy(fs_store, false);
46
+ ram_store = frt_open_ram_store_and_copy(NULL, fs_store, false);
47
47
 
48
48
  istream = ram_store->open_input(ram_store, "_rw_funny_string.cfs");
49
49
  Asequal(str, tmp = frt_is_read_string(istream));
@@ -63,7 +63,7 @@ void test_write_to(TestCase *tc, void *data)
63
63
  */
64
64
  TestSuite *ts_ram_store(TestSuite *suite)
65
65
  {
66
- FrtStore *store = frt_open_ram_store();
66
+ FrtStore *store = frt_open_ram_store(NULL);
67
67
 
68
68
  suite = ADD_SUITE(suite);
69
69