isomorfeus-ferret 0.12.7 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (164) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +54 -1
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
  9. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
  10. data/ext/isomorfeus_ferret_ext/bzip_blocksort.c +1094 -0
  11. data/ext/isomorfeus_ferret_ext/bzip_huffman.c +205 -0
  12. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  13. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  14. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  15. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  16. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  17. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  18. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  19. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  20. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  21. data/ext/isomorfeus_ferret_ext/frb_index.c +492 -474
  22. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  23. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  24. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  25. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  26. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  27. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  28. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  29. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  30. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  31. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  32. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  33. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
  34. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +105 -63
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -3
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  42. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  43. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  44. data/ext/isomorfeus_ferret_ext/frt_index.c +580 -399
  45. data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
  46. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  47. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  48. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +67 -91
  49. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  50. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  51. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  52. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  53. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  54. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  55. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
  56. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  57. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  58. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  59. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  60. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  61. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
  62. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  63. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  64. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  65. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  66. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  67. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  68. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  69. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  70. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +21 -109
  71. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  72. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  73. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  74. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  75. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  76. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  77. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  78. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  79. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  80. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  81. data/ext/isomorfeus_ferret_ext/test.c +1 -2
  82. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  83. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  84. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  85. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  86. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  87. data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
  88. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  89. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  90. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  91. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  92. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  93. data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
  94. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  95. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  96. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  97. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  98. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  99. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  100. data/ext/isomorfeus_ferret_ext/test_search.c +60 -62
  101. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  102. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  103. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  104. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  105. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  106. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  107. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  108. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  109. data/lib/isomorfeus/ferret/version.rb +1 -1
  110. metadata +27 -57
  111. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  112. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  113. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  114. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  115. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  116. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  117. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  118. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  119. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  120. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  144. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  145. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  146. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  147. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  148. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  149. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  150. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  151. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  152. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  153. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  154. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  155. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  156. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  157. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  158. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  159. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  160. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  161. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  162. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  163. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  164. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -0,0 +1,328 @@
1
+ /*
2
+ xxHash - Extremely Fast Hash algorithm
3
+ Header File
4
+ Copyright (C) 2012-2016, Yann Collet.
5
+
6
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
+
8
+ Redistribution and use in source and binary forms, with or without
9
+ modification, are permitted provided that the following conditions are
10
+ met:
11
+
12
+ * Redistributions of source code must retain the above copyright
13
+ notice, this list of conditions and the following disclaimer.
14
+ * Redistributions in binary form must reproduce the above
15
+ copyright notice, this list of conditions and the following disclaimer
16
+ in the documentation and/or other materials provided with the
17
+ distribution.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+
31
+ You can contact the author at :
32
+ - xxHash source repository : https://github.com/Cyan4973/xxHash
33
+ */
34
+
35
+ /* Notice extracted from xxHash homepage :
36
+
37
+ xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
38
+ It also successfully passes all tests from the SMHasher suite.
39
+
40
+ Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
41
+
42
+ Name Speed Q.Score Author
43
+ xxHash 5.4 GB/s 10
44
+ CrapWow 3.2 GB/s 2 Andrew
45
+ MumurHash 3a 2.7 GB/s 10 Austin Appleby
46
+ SpookyHash 2.0 GB/s 10 Bob Jenkins
47
+ SBox 1.4 GB/s 9 Bret Mulvey
48
+ Lookup3 1.2 GB/s 9 Bob Jenkins
49
+ SuperFastHash 1.2 GB/s 1 Paul Hsieh
50
+ CityHash64 1.05 GB/s 10 Pike & Alakuijala
51
+ FNV 0.55 GB/s 5 Fowler, Noll, Vo
52
+ CRC32 0.43 GB/s 9
53
+ MD5-32 0.33 GB/s 10 Ronald L. Rivest
54
+ SHA1-32 0.28 GB/s 10
55
+
56
+ Q.Score is a measure of quality of the hash function.
57
+ It depends on successfully passing SMHasher test set.
58
+ 10 is a perfect score.
59
+
60
+ A 64-bit version, named XXH64, is available since r35.
61
+ It offers much better speed, but for 64-bit applications only.
62
+ Name Speed on 64 bits Speed on 32 bits
63
+ XXH64 13.8 GB/s 1.9 GB/s
64
+ XXH32 6.8 GB/s 6.0 GB/s
65
+ */
66
+
67
+ #ifndef XXHASH_H_5627135585666179
68
+ #define XXHASH_H_5627135585666179 1
69
+
70
+ #if defined (__cplusplus)
71
+ extern "C" {
72
+ #endif
73
+
74
+
75
+ /* ****************************
76
+ * Definitions
77
+ ******************************/
78
+ #include <stddef.h> /* size_t */
79
+ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
80
+
81
+
82
+ /* ****************************
83
+ * API modifier
84
+ ******************************/
85
+ /** XXH_INLINE_ALL (and XXH_PRIVATE_API)
86
+ * This is useful to include xxhash functions in `static` mode
87
+ * in order to inline them, and remove their symbol from the public list.
88
+ * Inlining can offer dramatic performance improvement on small keys.
89
+ * Methodology :
90
+ * #define XXH_INLINE_ALL
91
+ * #include "lz4xxhash.h"
92
+ * `xxhash.c` is automatically included.
93
+ * It's not useful to compile and link it as a separate module.
94
+ */
95
+ #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
96
+ # ifndef XXH_STATIC_LINKING_ONLY
97
+ # define XXH_STATIC_LINKING_ONLY
98
+ # endif
99
+ # if defined(__GNUC__)
100
+ # define XXH_PUBLIC_API static __inline __attribute__((unused))
101
+ # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
102
+ # define XXH_PUBLIC_API static inline
103
+ # elif defined(_MSC_VER)
104
+ # define XXH_PUBLIC_API static __inline
105
+ # else
106
+ /* this version may generate warnings for unused static functions */
107
+ # define XXH_PUBLIC_API static
108
+ # endif
109
+ #else
110
+ # define XXH_PUBLIC_API /* do nothing */
111
+ #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
112
+
113
+ /*! XXH_NAMESPACE, aka Namespace Emulation :
114
+ *
115
+ * If you want to include _and expose_ xxHash functions from within your own library,
116
+ * but also want to avoid symbol collisions with other libraries which may also include xxHash,
117
+ *
118
+ * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
119
+ * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
120
+ *
121
+ * Note that no change is required within the calling program as long as it includes `xxhash.h` :
122
+ * regular symbol name will be automatically translated by this header.
123
+ */
124
+ #ifdef XXH_NAMESPACE
125
+ # define XXH_CAT(A,B) A##B
126
+ # define XXH_NAME2(A,B) XXH_CAT(A,B)
127
+ # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
128
+ # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
129
+ # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
130
+ # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
131
+ # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
132
+ # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
133
+ # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
134
+ # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
135
+ # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
136
+ # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
137
+ # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
138
+ # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
139
+ # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
140
+ # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
141
+ # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
142
+ # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
143
+ # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
144
+ # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
145
+ # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
146
+ #endif
147
+
148
+
149
+ /* *************************************
150
+ * Version
151
+ ***************************************/
152
+ #define XXH_VERSION_MAJOR 0
153
+ #define XXH_VERSION_MINOR 6
154
+ #define XXH_VERSION_RELEASE 5
155
+ #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
156
+ XXH_PUBLIC_API unsigned XXH_versionNumber (void);
157
+
158
+
159
+ /*-**********************************************************************
160
+ * 32-bit hash
161
+ ************************************************************************/
162
+ typedef unsigned int XXH32_hash_t;
163
+
164
+ /*! XXH32() :
165
+ Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
166
+ The memory between input & input+length must be valid (allocated and read-accessible).
167
+ "seed" can be used to alter the result predictably.
168
+ Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
169
+ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
170
+
171
+ /*====== Streaming ======*/
172
+ typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
173
+ XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
174
+ XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
175
+ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
176
+
177
+ XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed);
178
+ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
179
+ XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
180
+
181
+ /*
182
+ * Streaming functions generate the xxHash of an input provided in multiple segments.
183
+ * Note that, for small input, they are slower than single-call functions, due to state management.
184
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
185
+ *
186
+ * XXH state must first be allocated, using XXH*_createState() .
187
+ *
188
+ * Start a new hash by initializing state with a seed, using XXH*_reset().
189
+ *
190
+ * Then, feed the hash state by calling XXH*_update() as many times as necessary.
191
+ * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
192
+ *
193
+ * Finally, a hash value can be produced anytime, by using XXH*_digest().
194
+ * This function returns the nn-bits hash as an int or long long.
195
+ *
196
+ * It's still possible to continue inserting input into the hash state after a digest,
197
+ * and generate some new hashes later on, by calling again XXH*_digest().
198
+ *
199
+ * When done, free XXH state space if it was allocated dynamically.
200
+ */
201
+
202
+ /*====== Canonical representation ======*/
203
+
204
+ typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
205
+ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
206
+ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
207
+
208
+ /* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
209
+ * The canonical representation uses human-readable write convention, aka big-endian (large digits first).
210
+ * These functions allow transformation of hash result into and from its canonical format.
211
+ * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
212
+ */
213
+
214
+
215
+ #ifndef XXH_NO_LONG_LONG
216
+ /*-**********************************************************************
217
+ * 64-bit hash
218
+ ************************************************************************/
219
+ typedef unsigned long long XXH64_hash_t;
220
+
221
+ /*! XXH64() :
222
+ Calculate the 64-bit hash of sequence of length "len" stored at memory address "input".
223
+ "seed" can be used to alter the result predictably.
224
+ This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
225
+ */
226
+ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
227
+
228
+ /*====== Streaming ======*/
229
+ typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
230
+ XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
231
+ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
232
+ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
233
+
234
+ XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
235
+ XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
236
+ XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
237
+
238
+ /*====== Canonical representation ======*/
239
+ typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
240
+ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
241
+ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
242
+ #endif /* XXH_NO_LONG_LONG */
243
+
244
+
245
+
246
+ #ifdef XXH_STATIC_LINKING_ONLY
247
+
248
+ /* ================================================================================================
249
+ This section contains declarations which are not guaranteed to remain stable.
250
+ They may change in future versions, becoming incompatible with a different version of the library.
251
+ These declarations should only be used with static linking.
252
+ Never use them in association with dynamic linking !
253
+ =================================================================================================== */
254
+
255
+ /* These definitions are only present to allow
256
+ * static allocation of XXH state, on stack or in a struct for example.
257
+ * Never **ever** use members directly. */
258
+
259
+ #if !defined (__VMS) \
260
+ && (defined (__cplusplus) \
261
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
262
+ # include <stdint.h>
263
+
264
+ struct XXH32_state_s {
265
+ uint32_t total_len_32;
266
+ uint32_t large_len;
267
+ uint32_t v1;
268
+ uint32_t v2;
269
+ uint32_t v3;
270
+ uint32_t v4;
271
+ uint32_t mem32[4];
272
+ uint32_t memsize;
273
+ uint32_t reserved; /* never read nor write, might be removed in a future version */
274
+ }; /* typedef'd to XXH32_state_t */
275
+
276
+ struct XXH64_state_s {
277
+ uint64_t total_len;
278
+ uint64_t v1;
279
+ uint64_t v2;
280
+ uint64_t v3;
281
+ uint64_t v4;
282
+ uint64_t mem64[4];
283
+ uint32_t memsize;
284
+ uint32_t reserved[2]; /* never read nor write, might be removed in a future version */
285
+ }; /* typedef'd to XXH64_state_t */
286
+
287
+ # else
288
+
289
+ struct XXH32_state_s {
290
+ unsigned total_len_32;
291
+ unsigned large_len;
292
+ unsigned v1;
293
+ unsigned v2;
294
+ unsigned v3;
295
+ unsigned v4;
296
+ unsigned mem32[4];
297
+ unsigned memsize;
298
+ unsigned reserved; /* never read nor write, might be removed in a future version */
299
+ }; /* typedef'd to XXH32_state_t */
300
+
301
+ # ifndef XXH_NO_LONG_LONG /* remove 64-bit support */
302
+ struct XXH64_state_s {
303
+ unsigned long long total_len;
304
+ unsigned long long v1;
305
+ unsigned long long v2;
306
+ unsigned long long v3;
307
+ unsigned long long v4;
308
+ unsigned long long mem64[4];
309
+ unsigned memsize;
310
+ unsigned reserved[2]; /* never read nor write, might be removed in a future version */
311
+ }; /* typedef'd to XXH64_state_t */
312
+ # endif
313
+
314
+ # endif
315
+
316
+
317
+ #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
318
+ # include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */
319
+ #endif
320
+
321
+ #endif /* XXH_STATIC_LINKING_ONLY */
322
+
323
+
324
+ #if defined (__cplusplus)
325
+ }
326
+ #endif
327
+
328
+ #endif /* XXHASH_H_5627135585666179 */
@@ -12,48 +12,29 @@
12
12
 
13
13
  #include "stem_UTF_8_arabic.h"
14
14
  #include "stem_UTF_8_armenian.h"
15
- #include "stem_ISO_8859_1_basque.h"
16
15
  #include "stem_UTF_8_basque.h"
17
- #include "stem_ISO_8859_1_catalan.h"
18
16
  #include "stem_UTF_8_catalan.h"
19
- #include "stem_ISO_8859_1_danish.h"
20
17
  #include "stem_UTF_8_danish.h"
21
- #include "stem_ISO_8859_1_dutch.h"
22
18
  #include "stem_UTF_8_dutch.h"
23
- #include "stem_ISO_8859_1_english.h"
24
19
  #include "stem_UTF_8_english.h"
25
- #include "stem_ISO_8859_1_finnish.h"
26
20
  #include "stem_UTF_8_finnish.h"
27
- #include "stem_ISO_8859_1_french.h"
28
21
  #include "stem_UTF_8_french.h"
29
- #include "stem_ISO_8859_1_german.h"
30
22
  #include "stem_UTF_8_german.h"
31
23
  #include "stem_UTF_8_greek.h"
32
24
  #include "stem_UTF_8_hindi.h"
33
- #include "stem_ISO_8859_2_hungarian.h"
34
25
  #include "stem_UTF_8_hungarian.h"
35
- #include "stem_ISO_8859_1_indonesian.h"
36
26
  #include "stem_UTF_8_indonesian.h"
37
- #include "stem_ISO_8859_1_irish.h"
38
27
  #include "stem_UTF_8_irish.h"
39
- #include "stem_ISO_8859_1_italian.h"
40
28
  #include "stem_UTF_8_italian.h"
41
29
  #include "stem_UTF_8_lithuanian.h"
42
30
  #include "stem_UTF_8_nepali.h"
43
- #include "stem_ISO_8859_1_norwegian.h"
44
31
  #include "stem_UTF_8_norwegian.h"
45
- #include "stem_ISO_8859_1_porter.h"
46
32
  #include "stem_UTF_8_porter.h"
47
- #include "stem_ISO_8859_1_portuguese.h"
48
33
  #include "stem_UTF_8_portuguese.h"
49
- #include "stem_ISO_8859_2_romanian.h"
50
34
  #include "stem_UTF_8_romanian.h"
51
- #include "stem_KOI8_R_russian.h"
52
35
  #include "stem_UTF_8_russian.h"
53
36
  #include "stem_UTF_8_serbian.h"
54
- #include "stem_ISO_8859_1_spanish.h"
55
37
  #include "stem_UTF_8_spanish.h"
56
- #include "stem_ISO_8859_1_swedish.h"
57
38
  #include "stem_UTF_8_swedish.h"
58
39
  #include "stem_UTF_8_tamil.h"
59
40
  #include "stem_UTF_8_turkish.h"
@@ -61,9 +42,6 @@
61
42
 
62
43
  typedef enum {
63
44
  ENC_UNKNOWN=0,
64
- ENC_ISO_8859_1,
65
- ENC_ISO_8859_2,
66
- ENC_KOI8_R,
67
45
  ENC_UTF_8
68
46
  } stemmer_encoding_t;
69
47
 
@@ -72,9 +50,6 @@ struct stemmer_encoding {
72
50
  stemmer_encoding_t enc;
73
51
  };
74
52
  static const struct stemmer_encoding encodings[] = {
75
- {"ISO_8859_1", ENC_ISO_8859_1},
76
- {"ISO_8859_2", ENC_ISO_8859_2},
77
- {"KOI8_R", ENC_KOI8_R},
78
53
  {"UTF_8", ENC_UTF_8},
79
54
  {0,ENC_UNKNOWN}
80
55
  };
@@ -92,94 +67,54 @@ static const struct stemmer_modules modules[] = {
92
67
  {"arabic", ENC_UTF_8, arabic_UTF_8_create_env, arabic_UTF_8_close_env, arabic_UTF_8_stem},
93
68
  {"arm", ENC_UTF_8, armenian_UTF_8_create_env, armenian_UTF_8_close_env, armenian_UTF_8_stem},
94
69
  {"armenian", ENC_UTF_8, armenian_UTF_8_create_env, armenian_UTF_8_close_env, armenian_UTF_8_stem},
95
- {"baq", ENC_ISO_8859_1, basque_ISO_8859_1_create_env, basque_ISO_8859_1_close_env, basque_ISO_8859_1_stem},
96
70
  {"baq", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem},
97
- {"basque", ENC_ISO_8859_1, basque_ISO_8859_1_create_env, basque_ISO_8859_1_close_env, basque_ISO_8859_1_stem},
98
71
  {"basque", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem},
99
- {"ca", ENC_ISO_8859_1, catalan_ISO_8859_1_create_env, catalan_ISO_8859_1_close_env, catalan_ISO_8859_1_stem},
100
72
  {"ca", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem},
101
- {"cat", ENC_ISO_8859_1, catalan_ISO_8859_1_create_env, catalan_ISO_8859_1_close_env, catalan_ISO_8859_1_stem},
102
73
  {"cat", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem},
103
- {"catalan", ENC_ISO_8859_1, catalan_ISO_8859_1_create_env, catalan_ISO_8859_1_close_env, catalan_ISO_8859_1_stem},
104
74
  {"catalan", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem},
105
- {"da", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
106
75
  {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
107
- {"dan", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
108
76
  {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
109
- {"danish", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
110
77
  {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
111
- {"de", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
112
78
  {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
113
- {"deu", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
114
79
  {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
115
- {"dut", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
116
80
  {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
117
- {"dutch", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
118
81
  {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
119
82
  {"el", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem},
120
83
  {"ell", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem},
121
- {"en", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
122
84
  {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
123
- {"eng", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
124
85
  {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
125
- {"english", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
126
86
  {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
127
- {"es", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
128
87
  {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
129
- {"esl", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
130
88
  {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
131
- {"eu", ENC_ISO_8859_1, basque_ISO_8859_1_create_env, basque_ISO_8859_1_close_env, basque_ISO_8859_1_stem},
132
89
  {"eu", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem},
133
- {"eus", ENC_ISO_8859_1, basque_ISO_8859_1_create_env, basque_ISO_8859_1_close_env, basque_ISO_8859_1_stem},
134
90
  {"eus", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem},
135
- {"fi", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
136
91
  {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
137
- {"fin", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
138
92
  {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
139
- {"finnish", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
140
93
  {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
141
- {"fr", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
142
94
  {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
143
- {"fra", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
144
95
  {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
145
- {"fre", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
146
96
  {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
147
- {"french", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
148
97
  {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
149
- {"ga", ENC_ISO_8859_1, irish_ISO_8859_1_create_env, irish_ISO_8859_1_close_env, irish_ISO_8859_1_stem},
150
98
  {"ga", ENC_UTF_8, irish_UTF_8_create_env, irish_UTF_8_close_env, irish_UTF_8_stem},
151
- {"ger", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
152
99
  {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
153
- {"german", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
154
100
  {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
155
- {"gle", ENC_ISO_8859_1, irish_ISO_8859_1_create_env, irish_ISO_8859_1_close_env, irish_ISO_8859_1_stem},
156
101
  {"gle", ENC_UTF_8, irish_UTF_8_create_env, irish_UTF_8_close_env, irish_UTF_8_stem},
157
102
  {"gre", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem},
158
103
  {"greek", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem},
159
104
  {"hi", ENC_UTF_8, hindi_UTF_8_create_env, hindi_UTF_8_close_env, hindi_UTF_8_stem},
160
105
  {"hin", ENC_UTF_8, hindi_UTF_8_create_env, hindi_UTF_8_close_env, hindi_UTF_8_stem},
161
106
  {"hindi", ENC_UTF_8, hindi_UTF_8_create_env, hindi_UTF_8_close_env, hindi_UTF_8_stem},
162
- {"hu", ENC_ISO_8859_2, hungarian_ISO_8859_2_create_env, hungarian_ISO_8859_2_close_env, hungarian_ISO_8859_2_stem},
163
107
  {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
164
- {"hun", ENC_ISO_8859_2, hungarian_ISO_8859_2_create_env, hungarian_ISO_8859_2_close_env, hungarian_ISO_8859_2_stem},
165
108
  {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
166
- {"hungarian", ENC_ISO_8859_2, hungarian_ISO_8859_2_create_env, hungarian_ISO_8859_2_close_env, hungarian_ISO_8859_2_stem},
167
109
  {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
168
110
  {"hy", ENC_UTF_8, armenian_UTF_8_create_env, armenian_UTF_8_close_env, armenian_UTF_8_stem},
169
111
  {"hye", ENC_UTF_8, armenian_UTF_8_create_env, armenian_UTF_8_close_env, armenian_UTF_8_stem},
170
- {"id", ENC_ISO_8859_1, indonesian_ISO_8859_1_create_env, indonesian_ISO_8859_1_close_env, indonesian_ISO_8859_1_stem},
171
112
  {"id", ENC_UTF_8, indonesian_UTF_8_create_env, indonesian_UTF_8_close_env, indonesian_UTF_8_stem},
172
- {"ind", ENC_ISO_8859_1, indonesian_ISO_8859_1_create_env, indonesian_ISO_8859_1_close_env, indonesian_ISO_8859_1_stem},
173
113
  {"ind", ENC_UTF_8, indonesian_UTF_8_create_env, indonesian_UTF_8_close_env, indonesian_UTF_8_stem},
174
- {"indonesian", ENC_ISO_8859_1, indonesian_ISO_8859_1_create_env, indonesian_ISO_8859_1_close_env, indonesian_ISO_8859_1_stem},
175
114
  {"indonesian", ENC_UTF_8, indonesian_UTF_8_create_env, indonesian_UTF_8_close_env, indonesian_UTF_8_stem},
176
- {"irish", ENC_ISO_8859_1, irish_ISO_8859_1_create_env, irish_ISO_8859_1_close_env, irish_ISO_8859_1_stem},
177
115
  {"irish", ENC_UTF_8, irish_UTF_8_create_env, irish_UTF_8_close_env, irish_UTF_8_stem},
178
- {"it", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
179
116
  {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
180
- {"ita", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
181
117
  {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
182
- {"italian", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
183
118
  {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
184
119
  {"lit", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem},
185
120
  {"lithuanian", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem},
@@ -187,50 +122,29 @@ static const struct stemmer_modules modules[] = {
187
122
  {"ne", ENC_UTF_8, nepali_UTF_8_create_env, nepali_UTF_8_close_env, nepali_UTF_8_stem},
188
123
  {"nep", ENC_UTF_8, nepali_UTF_8_create_env, nepali_UTF_8_close_env, nepali_UTF_8_stem},
189
124
  {"nepali", ENC_UTF_8, nepali_UTF_8_create_env, nepali_UTF_8_close_env, nepali_UTF_8_stem},
190
- {"nl", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
191
125
  {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
192
- {"nld", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
193
126
  {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
194
- {"no", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem},
195
127
  {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
196
- {"nor", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem},
197
128
  {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
198
- {"norwegian", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem},
199
129
  {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
200
- {"por", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem},
201
130
  {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
202
- {"porter", ENC_ISO_8859_1, porter_ISO_8859_1_create_env, porter_ISO_8859_1_close_env, porter_ISO_8859_1_stem},
203
131
  {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem},
204
- {"portuguese", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem},
205
132
  {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
206
- {"pt", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem},
207
133
  {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
208
- {"ro", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
209
134
  {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
210
- {"romanian", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
211
135
  {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
212
- {"ron", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
213
136
  {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
214
- {"ru", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
215
137
  {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
216
- {"rum", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
217
138
  {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
218
- {"rus", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
219
139
  {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
220
- {"russian", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
221
140
  {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
222
141
  {"serbian", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
223
- {"spa", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
224
142
  {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
225
- {"spanish", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
226
143
  {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
227
144
  {"sr", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
228
145
  {"srp", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
229
- {"sv", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
230
146
  {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
231
- {"swe", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
232
147
  {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
233
- {"swedish", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
234
148
  {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
235
149
  {"ta", ENC_UTF_8, tamil_UTF_8_create_env, tamil_UTF_8_close_env, tamil_UTF_8_stem},
236
150
  {"tam", ENC_UTF_8, tamil_UTF_8_create_env, tamil_UTF_8_close_env, tamil_UTF_8_stem},
@@ -280,8 +280,7 @@ static void append_to_msg_buf(const char *fmt, ...)
280
280
  }
281
281
 
282
282
 
283
- static void Tstack()
284
- {
283
+ static void Tstack(void) {
285
284
  if (show_stack) {
286
285
  char *stack = frt_get_stacktrace();
287
286
  if (stack) {
@@ -3,10 +3,10 @@
3
3
  #include "testhelper.h"
4
4
  #include <stdio.h>
5
5
 
6
- static FrtFieldInfos *create_fis()
7
- {
8
- FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES,
9
- FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
6
+ extern rb_encoding *utf8_encoding;
7
+
8
+ static FrtFieldInfos *create_fis(void) {
9
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_COMPRESSION_NONE, FRT_INDEX_YES, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
10
10
  return fis;
11
11
  }
12
12
 
@@ -15,21 +15,21 @@ static FrtIndexWriter *create_iw(FrtStore *store)
15
15
  FrtFieldInfos *fis = create_fis();
16
16
  frt_index_create(store, fis);
17
17
  frt_fis_deref(fis);
18
- return frt_iw_open(store, frt_standard_analyzer_new(true), &frt_default_config);
18
+ return frt_iw_open(NULL, store, frt_standard_analyzer_new(true), &frt_default_config);
19
19
  }
20
20
 
21
- static FrtDocument *prep_doc()
22
- {
21
+ static FrtDocument *prep_doc(void) {
23
22
  FrtDocument *doc = frt_doc_new();
23
+ rb_encoding *enc = utf8_encoding;
24
24
  frt_doc_add_field(
25
25
  doc,
26
26
  frt_df_add_data(
27
27
  frt_df_new(rb_intern("content")),
28
- frt_estrdup("http://_____________________________________________________")
28
+ frt_estrdup("http://_____________________________________________________"),
29
+ enc
29
30
  )
30
31
  )->destroy_data = true;
31
32
  return doc;
32
-
33
33
  }
34
34
 
35
35
  static void test_problem_text(TestCase *tc, void *data)
@@ -40,8 +40,7 @@ static void test_problem_text(TestCase *tc, void *data)
40
40
 
41
41
  frt_iw_add_doc(iw, problem_text);
42
42
  Aiequal(1, frt_iw_doc_count(iw));
43
- Assert(!store->exists(store, "_0.cfs"),
44
- "data shouldn't have been written yet");
43
+ Assert(!store->exists(store, "_0.cfs"), "data shouldn't have been written yet");
45
44
  frt_iw_commit(iw);
46
45
  Assert(store->exists(store, "_0.cfs"), "data should now be written");
47
46
  frt_iw_close(iw);
@@ -50,7 +49,7 @@ static void test_problem_text(TestCase *tc, void *data)
50
49
 
51
50
  TestSuite *ts_1710(TestSuite *suite)
52
51
  {
53
- FrtStore *store = frt_open_ram_store();
52
+ FrtStore *store = frt_open_ram_store(NULL);
54
53
 
55
54
  suite = ADD_SUITE(suite);
56
55