isomorfeus-ferret 0.12.6 → 0.13.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (249) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +85 -16
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  88. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  89. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  90. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  91. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  92. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  93. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  94. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  95. data/ext/isomorfeus_ferret_ext/bzlib_blocksort.c +1094 -0
  96. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  97. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  98. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  99. data/ext/isomorfeus_ferret_ext/bzlib_huffman.c +205 -0
  100. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  101. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  102. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  103. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  104. data/ext/isomorfeus_ferret_ext/frb_index.c +513 -464
  105. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  106. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  107. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  108. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  109. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  110. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  111. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  112. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  113. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  114. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  115. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  116. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -9
  117. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  118. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  119. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  120. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  121. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +2 -0
  122. data/ext/isomorfeus_ferret_ext/frt_global.c +91 -200
  123. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -18
  124. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  125. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  126. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  127. data/ext/isomorfeus_ferret_ext/frt_index.c +714 -384
  128. data/ext/isomorfeus_ferret_ext/frt_index.h +274 -290
  129. data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
  130. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  131. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  132. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +68 -91
  133. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  134. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  135. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  136. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  137. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  138. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  139. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
  140. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  141. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  142. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  143. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  144. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  145. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +46 -84
  146. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  147. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  148. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  149. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  150. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  151. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  152. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  153. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  154. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +22 -112
  155. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  156. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  157. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  158. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  159. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  160. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  161. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  162. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  163. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  164. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  165. data/ext/isomorfeus_ferret_ext/test.c +0 -17
  166. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  167. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  168. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  169. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  170. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  171. data/ext/isomorfeus_ferret_ext/test_fields.c +111 -100
  172. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  173. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  174. data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
  175. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  176. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  177. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  178. data/ext/isomorfeus_ferret_ext/test_index.c +373 -363
  179. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  180. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  181. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  182. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  183. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  184. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  185. data/ext/isomorfeus_ferret_ext/test_search.c +60 -64
  186. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  187. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  188. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  189. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  190. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  191. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  192. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  193. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  194. data/lib/isomorfeus/ferret/version.rb +1 -1
  195. metadata +113 -58
  196. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  197. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  198. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  199. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  200. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  201. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  202. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  203. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  204. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  205. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  206. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  207. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  208. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  209. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  210. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  211. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  212. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  213. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  214. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  215. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  216. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  217. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  218. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  219. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  220. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  221. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  222. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  223. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  224. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  225. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  226. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  227. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  228. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  229. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  230. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  231. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  232. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  233. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  234. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  235. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  236. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  237. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  238. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  239. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  240. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  241. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  242. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  243. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  244. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  245. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  246. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  247. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  248. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  249. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -0,0 +1,328 @@
1
+ /*
2
+ xxHash - Extremely Fast Hash algorithm
3
+ Header File
4
+ Copyright (C) 2012-2016, Yann Collet.
5
+
6
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
+
8
+ Redistribution and use in source and binary forms, with or without
9
+ modification, are permitted provided that the following conditions are
10
+ met:
11
+
12
+ * Redistributions of source code must retain the above copyright
13
+ notice, this list of conditions and the following disclaimer.
14
+ * Redistributions in binary form must reproduce the above
15
+ copyright notice, this list of conditions and the following disclaimer
16
+ in the documentation and/or other materials provided with the
17
+ distribution.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+
31
+ You can contact the author at :
32
+ - xxHash source repository : https://github.com/Cyan4973/xxHash
33
+ */
34
+
35
+ /* Notice extracted from xxHash homepage :
36
+
37
+ xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
38
+ It also successfully passes all tests from the SMHasher suite.
39
+
40
+ Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
41
+
42
+ Name Speed Q.Score Author
43
+ xxHash 5.4 GB/s 10
44
+ CrapWow 3.2 GB/s 2 Andrew
45
+ MumurHash 3a 2.7 GB/s 10 Austin Appleby
46
+ SpookyHash 2.0 GB/s 10 Bob Jenkins
47
+ SBox 1.4 GB/s 9 Bret Mulvey
48
+ Lookup3 1.2 GB/s 9 Bob Jenkins
49
+ SuperFastHash 1.2 GB/s 1 Paul Hsieh
50
+ CityHash64 1.05 GB/s 10 Pike & Alakuijala
51
+ FNV 0.55 GB/s 5 Fowler, Noll, Vo
52
+ CRC32 0.43 GB/s 9
53
+ MD5-32 0.33 GB/s 10 Ronald L. Rivest
54
+ SHA1-32 0.28 GB/s 10
55
+
56
+ Q.Score is a measure of quality of the hash function.
57
+ It depends on successfully passing SMHasher test set.
58
+ 10 is a perfect score.
59
+
60
+ A 64-bit version, named XXH64, is available since r35.
61
+ It offers much better speed, but for 64-bit applications only.
62
+ Name Speed on 64 bits Speed on 32 bits
63
+ XXH64 13.8 GB/s 1.9 GB/s
64
+ XXH32 6.8 GB/s 6.0 GB/s
65
+ */
66
+
67
+ #ifndef XXHASH_H_5627135585666179
68
+ #define XXHASH_H_5627135585666179 1
69
+
70
+ #if defined (__cplusplus)
71
+ extern "C" {
72
+ #endif
73
+
74
+
75
+ /* ****************************
76
+ * Definitions
77
+ ******************************/
78
+ #include <stddef.h> /* size_t */
79
+ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
80
+
81
+
82
+ /* ****************************
83
+ * API modifier
84
+ ******************************/
85
+ /** XXH_INLINE_ALL (and XXH_PRIVATE_API)
86
+ * This is useful to include xxhash functions in `static` mode
87
+ * in order to inline them, and remove their symbol from the public list.
88
+ * Inlining can offer dramatic performance improvement on small keys.
89
+ * Methodology :
90
+ * #define XXH_INLINE_ALL
91
+ * #include "lz4xxhash.h"
92
+ * `xxhash.c` is automatically included.
93
+ * It's not useful to compile and link it as a separate module.
94
+ */
95
+ #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
96
+ # ifndef XXH_STATIC_LINKING_ONLY
97
+ # define XXH_STATIC_LINKING_ONLY
98
+ # endif
99
+ # if defined(__GNUC__)
100
+ # define XXH_PUBLIC_API static __inline __attribute__((unused))
101
+ # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
102
+ # define XXH_PUBLIC_API static inline
103
+ # elif defined(_MSC_VER)
104
+ # define XXH_PUBLIC_API static __inline
105
+ # else
106
+ /* this version may generate warnings for unused static functions */
107
+ # define XXH_PUBLIC_API static
108
+ # endif
109
+ #else
110
+ # define XXH_PUBLIC_API /* do nothing */
111
+ #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
112
+
113
+ /*! XXH_NAMESPACE, aka Namespace Emulation :
114
+ *
115
+ * If you want to include _and expose_ xxHash functions from within your own library,
116
+ * but also want to avoid symbol collisions with other libraries which may also include xxHash,
117
+ *
118
+ * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
119
+ * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
120
+ *
121
+ * Note that no change is required within the calling program as long as it includes `xxhash.h` :
122
+ * regular symbol name will be automatically translated by this header.
123
+ */
124
+ #ifdef XXH_NAMESPACE
125
+ # define XXH_CAT(A,B) A##B
126
+ # define XXH_NAME2(A,B) XXH_CAT(A,B)
127
+ # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
128
+ # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
129
+ # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
130
+ # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
131
+ # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
132
+ # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
133
+ # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
134
+ # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
135
+ # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
136
+ # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
137
+ # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
138
+ # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
139
+ # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
140
+ # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
141
+ # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
142
+ # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
143
+ # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
144
+ # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
145
+ # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
146
+ #endif
147
+
148
+
149
+ /* *************************************
150
+ * Version
151
+ ***************************************/
152
+ #define XXH_VERSION_MAJOR 0
153
+ #define XXH_VERSION_MINOR 6
154
+ #define XXH_VERSION_RELEASE 5
155
+ #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
156
+ XXH_PUBLIC_API unsigned XXH_versionNumber (void);
157
+
158
+
159
+ /*-**********************************************************************
160
+ * 32-bit hash
161
+ ************************************************************************/
162
+ typedef unsigned int XXH32_hash_t;
163
+
164
+ /*! XXH32() :
165
+ Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
166
+ The memory between input & input+length must be valid (allocated and read-accessible).
167
+ "seed" can be used to alter the result predictably.
168
+ Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
169
+ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
170
+
171
+ /*====== Streaming ======*/
172
+ typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
173
+ XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
174
+ XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
175
+ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
176
+
177
+ XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed);
178
+ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
179
+ XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
180
+
181
+ /*
182
+ * Streaming functions generate the xxHash of an input provided in multiple segments.
183
+ * Note that, for small input, they are slower than single-call functions, due to state management.
184
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
185
+ *
186
+ * XXH state must first be allocated, using XXH*_createState() .
187
+ *
188
+ * Start a new hash by initializing state with a seed, using XXH*_reset().
189
+ *
190
+ * Then, feed the hash state by calling XXH*_update() as many times as necessary.
191
+ * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
192
+ *
193
+ * Finally, a hash value can be produced anytime, by using XXH*_digest().
194
+ * This function returns the nn-bits hash as an int or long long.
195
+ *
196
+ * It's still possible to continue inserting input into the hash state after a digest,
197
+ * and generate some new hashes later on, by calling again XXH*_digest().
198
+ *
199
+ * When done, free XXH state space if it was allocated dynamically.
200
+ */
201
+
202
+ /*====== Canonical representation ======*/
203
+
204
+ typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
205
+ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
206
+ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
207
+
208
+ /* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
209
+ * The canonical representation uses human-readable write convention, aka big-endian (large digits first).
210
+ * These functions allow transformation of hash result into and from its canonical format.
211
+ * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
212
+ */
213
+
214
+
215
+ #ifndef XXH_NO_LONG_LONG
216
+ /*-**********************************************************************
217
+ * 64-bit hash
218
+ ************************************************************************/
219
+ typedef unsigned long long XXH64_hash_t;
220
+
221
+ /*! XXH64() :
222
+ Calculate the 64-bit hash of sequence of length "len" stored at memory address "input".
223
+ "seed" can be used to alter the result predictably.
224
+ This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
225
+ */
226
+ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
227
+
228
+ /*====== Streaming ======*/
229
+ typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
230
+ XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
231
+ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
232
+ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
233
+
234
+ XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
235
+ XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
236
+ XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
237
+
238
+ /*====== Canonical representation ======*/
239
+ typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
240
+ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
241
+ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
242
+ #endif /* XXH_NO_LONG_LONG */
243
+
244
+
245
+
246
+ #ifdef XXH_STATIC_LINKING_ONLY
247
+
248
+ /* ================================================================================================
249
+ This section contains declarations which are not guaranteed to remain stable.
250
+ They may change in future versions, becoming incompatible with a different version of the library.
251
+ These declarations should only be used with static linking.
252
+ Never use them in association with dynamic linking !
253
+ =================================================================================================== */
254
+
255
+ /* These definitions are only present to allow
256
+ * static allocation of XXH state, on stack or in a struct for example.
257
+ * Never **ever** use members directly. */
258
+
259
+ #if !defined (__VMS) \
260
+ && (defined (__cplusplus) \
261
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
262
+ # include <stdint.h>
263
+
264
+ struct XXH32_state_s {
265
+ uint32_t total_len_32;
266
+ uint32_t large_len;
267
+ uint32_t v1;
268
+ uint32_t v2;
269
+ uint32_t v3;
270
+ uint32_t v4;
271
+ uint32_t mem32[4];
272
+ uint32_t memsize;
273
+ uint32_t reserved; /* never read nor write, might be removed in a future version */
274
+ }; /* typedef'd to XXH32_state_t */
275
+
276
+ struct XXH64_state_s {
277
+ uint64_t total_len;
278
+ uint64_t v1;
279
+ uint64_t v2;
280
+ uint64_t v3;
281
+ uint64_t v4;
282
+ uint64_t mem64[4];
283
+ uint32_t memsize;
284
+ uint32_t reserved[2]; /* never read nor write, might be removed in a future version */
285
+ }; /* typedef'd to XXH64_state_t */
286
+
287
+ # else
288
+
289
+ struct XXH32_state_s {
290
+ unsigned total_len_32;
291
+ unsigned large_len;
292
+ unsigned v1;
293
+ unsigned v2;
294
+ unsigned v3;
295
+ unsigned v4;
296
+ unsigned mem32[4];
297
+ unsigned memsize;
298
+ unsigned reserved; /* never read nor write, might be removed in a future version */
299
+ }; /* typedef'd to XXH32_state_t */
300
+
301
+ # ifndef XXH_NO_LONG_LONG /* remove 64-bit support */
302
+ struct XXH64_state_s {
303
+ unsigned long long total_len;
304
+ unsigned long long v1;
305
+ unsigned long long v2;
306
+ unsigned long long v3;
307
+ unsigned long long v4;
308
+ unsigned long long mem64[4];
309
+ unsigned memsize;
310
+ unsigned reserved[2]; /* never read nor write, might be removed in a future version */
311
+ }; /* typedef'd to XXH64_state_t */
312
+ # endif
313
+
314
+ # endif
315
+
316
+
317
+ #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
318
+ # include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */
319
+ #endif
320
+
321
+ #endif /* XXH_STATIC_LINKING_ONLY */
322
+
323
+
324
+ #if defined (__cplusplus)
325
+ }
326
+ #endif
327
+
328
+ #endif /* XXHASH_H_5627135585666179 */
@@ -12,48 +12,29 @@
12
12
 
13
13
  #include "stem_UTF_8_arabic.h"
14
14
  #include "stem_UTF_8_armenian.h"
15
- #include "stem_ISO_8859_1_basque.h"
16
15
  #include "stem_UTF_8_basque.h"
17
- #include "stem_ISO_8859_1_catalan.h"
18
16
  #include "stem_UTF_8_catalan.h"
19
- #include "stem_ISO_8859_1_danish.h"
20
17
  #include "stem_UTF_8_danish.h"
21
- #include "stem_ISO_8859_1_dutch.h"
22
18
  #include "stem_UTF_8_dutch.h"
23
- #include "stem_ISO_8859_1_english.h"
24
19
  #include "stem_UTF_8_english.h"
25
- #include "stem_ISO_8859_1_finnish.h"
26
20
  #include "stem_UTF_8_finnish.h"
27
- #include "stem_ISO_8859_1_french.h"
28
21
  #include "stem_UTF_8_french.h"
29
- #include "stem_ISO_8859_1_german.h"
30
22
  #include "stem_UTF_8_german.h"
31
23
  #include "stem_UTF_8_greek.h"
32
24
  #include "stem_UTF_8_hindi.h"
33
- #include "stem_ISO_8859_2_hungarian.h"
34
25
  #include "stem_UTF_8_hungarian.h"
35
- #include "stem_ISO_8859_1_indonesian.h"
36
26
  #include "stem_UTF_8_indonesian.h"
37
- #include "stem_ISO_8859_1_irish.h"
38
27
  #include "stem_UTF_8_irish.h"
39
- #include "stem_ISO_8859_1_italian.h"
40
28
  #include "stem_UTF_8_italian.h"
41
29
  #include "stem_UTF_8_lithuanian.h"
42
30
  #include "stem_UTF_8_nepali.h"
43
- #include "stem_ISO_8859_1_norwegian.h"
44
31
  #include "stem_UTF_8_norwegian.h"
45
- #include "stem_ISO_8859_1_porter.h"
46
32
  #include "stem_UTF_8_porter.h"
47
- #include "stem_ISO_8859_1_portuguese.h"
48
33
  #include "stem_UTF_8_portuguese.h"
49
- #include "stem_ISO_8859_2_romanian.h"
50
34
  #include "stem_UTF_8_romanian.h"
51
- #include "stem_KOI8_R_russian.h"
52
35
  #include "stem_UTF_8_russian.h"
53
36
  #include "stem_UTF_8_serbian.h"
54
- #include "stem_ISO_8859_1_spanish.h"
55
37
  #include "stem_UTF_8_spanish.h"
56
- #include "stem_ISO_8859_1_swedish.h"
57
38
  #include "stem_UTF_8_swedish.h"
58
39
  #include "stem_UTF_8_tamil.h"
59
40
  #include "stem_UTF_8_turkish.h"
@@ -61,9 +42,6 @@
61
42
 
62
43
  typedef enum {
63
44
  ENC_UNKNOWN=0,
64
- ENC_ISO_8859_1,
65
- ENC_ISO_8859_2,
66
- ENC_KOI8_R,
67
45
  ENC_UTF_8
68
46
  } stemmer_encoding_t;
69
47
 
@@ -72,9 +50,6 @@ struct stemmer_encoding {
72
50
  stemmer_encoding_t enc;
73
51
  };
74
52
  static const struct stemmer_encoding encodings[] = {
75
- {"ISO_8859_1", ENC_ISO_8859_1},
76
- {"ISO_8859_2", ENC_ISO_8859_2},
77
- {"KOI8_R", ENC_KOI8_R},
78
53
  {"UTF_8", ENC_UTF_8},
79
54
  {0,ENC_UNKNOWN}
80
55
  };
@@ -92,94 +67,54 @@ static const struct stemmer_modules modules[] = {
92
67
  {"arabic", ENC_UTF_8, arabic_UTF_8_create_env, arabic_UTF_8_close_env, arabic_UTF_8_stem},
93
68
  {"arm", ENC_UTF_8, armenian_UTF_8_create_env, armenian_UTF_8_close_env, armenian_UTF_8_stem},
94
69
  {"armenian", ENC_UTF_8, armenian_UTF_8_create_env, armenian_UTF_8_close_env, armenian_UTF_8_stem},
95
- {"baq", ENC_ISO_8859_1, basque_ISO_8859_1_create_env, basque_ISO_8859_1_close_env, basque_ISO_8859_1_stem},
96
70
  {"baq", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem},
97
- {"basque", ENC_ISO_8859_1, basque_ISO_8859_1_create_env, basque_ISO_8859_1_close_env, basque_ISO_8859_1_stem},
98
71
  {"basque", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem},
99
- {"ca", ENC_ISO_8859_1, catalan_ISO_8859_1_create_env, catalan_ISO_8859_1_close_env, catalan_ISO_8859_1_stem},
100
72
  {"ca", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem},
101
- {"cat", ENC_ISO_8859_1, catalan_ISO_8859_1_create_env, catalan_ISO_8859_1_close_env, catalan_ISO_8859_1_stem},
102
73
  {"cat", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem},
103
- {"catalan", ENC_ISO_8859_1, catalan_ISO_8859_1_create_env, catalan_ISO_8859_1_close_env, catalan_ISO_8859_1_stem},
104
74
  {"catalan", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem},
105
- {"da", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
106
75
  {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
107
- {"dan", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
108
76
  {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
109
- {"danish", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
110
77
  {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
111
- {"de", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
112
78
  {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
113
- {"deu", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
114
79
  {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
115
- {"dut", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
116
80
  {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
117
- {"dutch", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
118
81
  {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
119
82
  {"el", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem},
120
83
  {"ell", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem},
121
- {"en", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
122
84
  {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
123
- {"eng", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
124
85
  {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
125
- {"english", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
126
86
  {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
127
- {"es", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
128
87
  {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
129
- {"esl", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
130
88
  {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
131
- {"eu", ENC_ISO_8859_1, basque_ISO_8859_1_create_env, basque_ISO_8859_1_close_env, basque_ISO_8859_1_stem},
132
89
  {"eu", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem},
133
- {"eus", ENC_ISO_8859_1, basque_ISO_8859_1_create_env, basque_ISO_8859_1_close_env, basque_ISO_8859_1_stem},
134
90
  {"eus", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem},
135
- {"fi", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
136
91
  {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
137
- {"fin", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
138
92
  {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
139
- {"finnish", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
140
93
  {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
141
- {"fr", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
142
94
  {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
143
- {"fra", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
144
95
  {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
145
- {"fre", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
146
96
  {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
147
- {"french", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
148
97
  {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
149
- {"ga", ENC_ISO_8859_1, irish_ISO_8859_1_create_env, irish_ISO_8859_1_close_env, irish_ISO_8859_1_stem},
150
98
  {"ga", ENC_UTF_8, irish_UTF_8_create_env, irish_UTF_8_close_env, irish_UTF_8_stem},
151
- {"ger", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
152
99
  {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
153
- {"german", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
154
100
  {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
155
- {"gle", ENC_ISO_8859_1, irish_ISO_8859_1_create_env, irish_ISO_8859_1_close_env, irish_ISO_8859_1_stem},
156
101
  {"gle", ENC_UTF_8, irish_UTF_8_create_env, irish_UTF_8_close_env, irish_UTF_8_stem},
157
102
  {"gre", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem},
158
103
  {"greek", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem},
159
104
  {"hi", ENC_UTF_8, hindi_UTF_8_create_env, hindi_UTF_8_close_env, hindi_UTF_8_stem},
160
105
  {"hin", ENC_UTF_8, hindi_UTF_8_create_env, hindi_UTF_8_close_env, hindi_UTF_8_stem},
161
106
  {"hindi", ENC_UTF_8, hindi_UTF_8_create_env, hindi_UTF_8_close_env, hindi_UTF_8_stem},
162
- {"hu", ENC_ISO_8859_2, hungarian_ISO_8859_2_create_env, hungarian_ISO_8859_2_close_env, hungarian_ISO_8859_2_stem},
163
107
  {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
164
- {"hun", ENC_ISO_8859_2, hungarian_ISO_8859_2_create_env, hungarian_ISO_8859_2_close_env, hungarian_ISO_8859_2_stem},
165
108
  {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
166
- {"hungarian", ENC_ISO_8859_2, hungarian_ISO_8859_2_create_env, hungarian_ISO_8859_2_close_env, hungarian_ISO_8859_2_stem},
167
109
  {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
168
110
  {"hy", ENC_UTF_8, armenian_UTF_8_create_env, armenian_UTF_8_close_env, armenian_UTF_8_stem},
169
111
  {"hye", ENC_UTF_8, armenian_UTF_8_create_env, armenian_UTF_8_close_env, armenian_UTF_8_stem},
170
- {"id", ENC_ISO_8859_1, indonesian_ISO_8859_1_create_env, indonesian_ISO_8859_1_close_env, indonesian_ISO_8859_1_stem},
171
112
  {"id", ENC_UTF_8, indonesian_UTF_8_create_env, indonesian_UTF_8_close_env, indonesian_UTF_8_stem},
172
- {"ind", ENC_ISO_8859_1, indonesian_ISO_8859_1_create_env, indonesian_ISO_8859_1_close_env, indonesian_ISO_8859_1_stem},
173
113
  {"ind", ENC_UTF_8, indonesian_UTF_8_create_env, indonesian_UTF_8_close_env, indonesian_UTF_8_stem},
174
- {"indonesian", ENC_ISO_8859_1, indonesian_ISO_8859_1_create_env, indonesian_ISO_8859_1_close_env, indonesian_ISO_8859_1_stem},
175
114
  {"indonesian", ENC_UTF_8, indonesian_UTF_8_create_env, indonesian_UTF_8_close_env, indonesian_UTF_8_stem},
176
- {"irish", ENC_ISO_8859_1, irish_ISO_8859_1_create_env, irish_ISO_8859_1_close_env, irish_ISO_8859_1_stem},
177
115
  {"irish", ENC_UTF_8, irish_UTF_8_create_env, irish_UTF_8_close_env, irish_UTF_8_stem},
178
- {"it", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
179
116
  {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
180
- {"ita", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
181
117
  {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
182
- {"italian", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
183
118
  {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
184
119
  {"lit", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem},
185
120
  {"lithuanian", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem},
@@ -187,50 +122,29 @@ static const struct stemmer_modules modules[] = {
187
122
  {"ne", ENC_UTF_8, nepali_UTF_8_create_env, nepali_UTF_8_close_env, nepali_UTF_8_stem},
188
123
  {"nep", ENC_UTF_8, nepali_UTF_8_create_env, nepali_UTF_8_close_env, nepali_UTF_8_stem},
189
124
  {"nepali", ENC_UTF_8, nepali_UTF_8_create_env, nepali_UTF_8_close_env, nepali_UTF_8_stem},
190
- {"nl", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
191
125
  {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
192
- {"nld", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
193
126
  {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
194
- {"no", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem},
195
127
  {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
196
- {"nor", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem},
197
128
  {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
198
- {"norwegian", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem},
199
129
  {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
200
- {"por", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem},
201
130
  {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
202
- {"porter", ENC_ISO_8859_1, porter_ISO_8859_1_create_env, porter_ISO_8859_1_close_env, porter_ISO_8859_1_stem},
203
131
  {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem},
204
- {"portuguese", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem},
205
132
  {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
206
- {"pt", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem},
207
133
  {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
208
- {"ro", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
209
134
  {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
210
- {"romanian", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
211
135
  {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
212
- {"ron", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
213
136
  {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
214
- {"ru", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
215
137
  {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
216
- {"rum", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
217
138
  {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
218
- {"rus", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
219
139
  {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
220
- {"russian", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
221
140
  {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
222
141
  {"serbian", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
223
- {"spa", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
224
142
  {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
225
- {"spanish", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
226
143
  {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
227
144
  {"sr", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
228
145
  {"srp", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
229
- {"sv", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
230
146
  {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
231
- {"swe", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
232
147
  {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
233
- {"swedish", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
234
148
  {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
235
149
  {"ta", ENC_UTF_8, tamil_UTF_8_create_env, tamil_UTF_8_close_env, tamil_UTF_8_stem},
236
150
  {"tam", ENC_UTF_8, tamil_UTF_8_create_env, tamil_UTF_8_close_env, tamil_UTF_8_stem},
@@ -279,23 +279,10 @@ static void append_to_msg_buf(const char *fmt, ...)
279
279
  va_end(args);
280
280
  }
281
281
 
282
-
283
- static void Tstack()
284
- {
285
- if (show_stack) {
286
- char *stack = frt_get_stacktrace();
287
- if (stack) {
288
- append_to_msg_buf("\n\nStack trace:\n%s\n", stack);
289
- free(stack);
290
- }
291
- }
292
- }
293
-
294
282
  static void vTmsg_nf(const char *fmt, va_list args)
295
283
  {
296
284
  if (verbose) {
297
285
  vappend_to_msg_buf(fmt, args);
298
- Tstack();
299
286
  }
300
287
  }
301
288
 
@@ -306,8 +293,6 @@ void vTmsg(const char *fmt, va_list args)
306
293
  vappend_to_msg_buf(fmt, args);
307
294
  va_end(args);
308
295
  append_to_msg_buf("\n");
309
-
310
- Tstack();
311
296
  }
312
297
  }
313
298
 
@@ -349,8 +334,6 @@ void tst_msg(const char *func, const char *fname, int line_num, const char *fmt,
349
334
  va_start(args, fmt);
350
335
  vappend_to_msg_buf(fmt, args);
351
336
  va_end(args);
352
-
353
- Tstack();
354
337
  }
355
338
  }
356
339
 
@@ -3,10 +3,10 @@
3
3
  #include "testhelper.h"
4
4
  #include <stdio.h>
5
5
 
6
- static FrtFieldInfos *create_fis()
7
- {
8
- FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES,
9
- FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
6
+ extern rb_encoding *utf8_encoding;
7
+
8
+ static FrtFieldInfos *create_fis(void) {
9
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_COMPRESSION_NONE, FRT_INDEX_YES, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
10
10
  return fis;
11
11
  }
12
12
 
@@ -15,21 +15,21 @@ static FrtIndexWriter *create_iw(FrtStore *store)
15
15
  FrtFieldInfos *fis = create_fis();
16
16
  frt_index_create(store, fis);
17
17
  frt_fis_deref(fis);
18
- return frt_iw_open(store, frt_standard_analyzer_new(true), &frt_default_config);
18
+ return frt_iw_open(NULL, store, frt_standard_analyzer_new(true), &frt_default_config);
19
19
  }
20
20
 
21
- static FrtDocument *prep_doc()
22
- {
21
+ static FrtDocument *prep_doc(void) {
23
22
  FrtDocument *doc = frt_doc_new();
23
+ rb_encoding *enc = utf8_encoding;
24
24
  frt_doc_add_field(
25
25
  doc,
26
26
  frt_df_add_data(
27
27
  frt_df_new(rb_intern("content")),
28
- frt_estrdup("http://_____________________________________________________")
28
+ frt_estrdup("http://_____________________________________________________"),
29
+ enc
29
30
  )
30
31
  )->destroy_data = true;
31
32
  return doc;
32
-
33
33
  }
34
34
 
35
35
  static void test_problem_text(TestCase *tc, void *data)
@@ -40,8 +40,7 @@ static void test_problem_text(TestCase *tc, void *data)
40
40
 
41
41
  frt_iw_add_doc(iw, problem_text);
42
42
  Aiequal(1, frt_iw_doc_count(iw));
43
- Assert(!store->exists(store, "_0.cfs"),
44
- "data shouldn't have been written yet");
43
+ Assert(!store->exists(store, "_0.cfs"), "data shouldn't have been written yet");
45
44
  frt_iw_commit(iw);
46
45
  Assert(store->exists(store, "_0.cfs"), "data should now be written");
47
46
  frt_iw_close(iw);
@@ -50,7 +49,7 @@ static void test_problem_text(TestCase *tc, void *data)
50
49
 
51
50
  TestSuite *ts_1710(TestSuite *suite)
52
51
  {
53
- FrtStore *store = frt_open_ram_store();
52
+ FrtStore *store = frt_open_ram_store(NULL);
54
53
 
55
54
  suite = ADD_SUITE(suite);
56
55