isomorfeus-ferret 0.12.7 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (164) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +54 -1
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
  9. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
  10. data/ext/isomorfeus_ferret_ext/bzip_blocksort.c +1094 -0
  11. data/ext/isomorfeus_ferret_ext/bzip_huffman.c +205 -0
  12. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  13. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  14. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  15. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  16. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  17. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  18. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  19. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  20. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  21. data/ext/isomorfeus_ferret_ext/frb_index.c +492 -474
  22. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  23. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  24. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  25. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  26. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  27. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  28. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  29. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  30. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  31. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  32. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  33. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
  34. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +105 -63
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -3
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  42. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  43. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  44. data/ext/isomorfeus_ferret_ext/frt_index.c +580 -399
  45. data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
  46. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  47. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  48. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +67 -91
  49. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  50. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  51. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  52. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  53. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  54. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  55. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
  56. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  57. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  58. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  59. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  60. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  61. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
  62. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  63. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  64. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  65. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  66. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  67. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  68. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  69. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  70. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +21 -109
  71. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  72. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  73. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  74. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  75. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  76. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  77. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  78. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  79. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  80. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  81. data/ext/isomorfeus_ferret_ext/test.c +1 -2
  82. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  83. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  84. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  85. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  86. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  87. data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
  88. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  89. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  90. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  91. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  92. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  93. data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
  94. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  95. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  96. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  97. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  98. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  99. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  100. data/ext/isomorfeus_ferret_ext/test_search.c +60 -62
  101. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  102. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  103. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  104. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  105. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  106. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  107. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  108. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  109. data/lib/isomorfeus/ferret/version.rb +1 -1
  110. metadata +27 -57
  111. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  112. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  113. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  114. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  115. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  116. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  117. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  118. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  119. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  120. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  144. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  145. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  146. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  147. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  148. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  149. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  150. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  151. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  152. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  153. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  154. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  155. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  156. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  157. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  158. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  159. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  160. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  161. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  162. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  163. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  164. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -0,0 +1,205 @@
1
+
2
+ /*-------------------------------------------------------------*/
3
+ /*--- Huffman coding low-level stuff ---*/
4
+ /*--- huffman.c ---*/
5
+ /*-------------------------------------------------------------*/
6
+
7
+ /* ------------------------------------------------------------------
8
+ This file is part of bzip2/libbzip2, a program and library for
9
+ lossless, block-sorting data compression.
10
+
11
+ bzip2/libbzip2 version 1.0.8 of 13 July 2019
12
+ Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
13
+
14
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
15
+ README file.
16
+
17
+ This program is released under the terms of the license contained
18
+ in the file LICENSE.
19
+ ------------------------------------------------------------------ */
20
+
21
+
22
+ #include "bzlib_private.h"
23
+
24
+ /*---------------------------------------------------*/
25
+ #define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
26
+ #define DEPTHOF(zz1) ((zz1) & 0x000000ff)
27
+ #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
28
+
29
+ #define ADDWEIGHTS(zw1,zw2) \
30
+ (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
31
+ (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
32
+
33
+ #define UPHEAP(z) \
34
+ { \
35
+ Int32 zz, tmp; \
36
+ zz = z; tmp = heap[zz]; \
37
+ while (weight[tmp] < weight[heap[zz >> 1]]) { \
38
+ heap[zz] = heap[zz >> 1]; \
39
+ zz >>= 1; \
40
+ } \
41
+ heap[zz] = tmp; \
42
+ }
43
+
44
+ #define DOWNHEAP(z) \
45
+ { \
46
+ Int32 zz, yy, tmp; \
47
+ zz = z; tmp = heap[zz]; \
48
+ while (True) { \
49
+ yy = zz << 1; \
50
+ if (yy > nHeap) break; \
51
+ if (yy < nHeap && \
52
+ weight[heap[yy+1]] < weight[heap[yy]]) \
53
+ yy++; \
54
+ if (weight[tmp] < weight[heap[yy]]) break; \
55
+ heap[zz] = heap[yy]; \
56
+ zz = yy; \
57
+ } \
58
+ heap[zz] = tmp; \
59
+ }
60
+
61
+
62
+ /*---------------------------------------------------*/
63
+ void BZ2_hbMakeCodeLengths ( UChar *len,
64
+ Int32 *freq,
65
+ Int32 alphaSize,
66
+ Int32 maxLen )
67
+ {
68
+ /*--
69
+ Nodes and heap entries run from 1. Entry 0
70
+ for both the heap and nodes is a sentinel.
71
+ --*/
72
+ Int32 nNodes, nHeap, n1, n2, i, j, k;
73
+ Bool tooLong;
74
+
75
+ Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ];
76
+ Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
77
+ Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
78
+
79
+ for (i = 0; i < alphaSize; i++)
80
+ weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
81
+
82
+ while (True) {
83
+
84
+ nNodes = alphaSize;
85
+ nHeap = 0;
86
+
87
+ heap[0] = 0;
88
+ weight[0] = 0;
89
+ parent[0] = -2;
90
+
91
+ for (i = 1; i <= alphaSize; i++) {
92
+ parent[i] = -1;
93
+ nHeap++;
94
+ heap[nHeap] = i;
95
+ UPHEAP(nHeap);
96
+ }
97
+
98
+ AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
99
+
100
+ while (nHeap > 1) {
101
+ n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
102
+ n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
103
+ nNodes++;
104
+ parent[n1] = parent[n2] = nNodes;
105
+ weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
106
+ parent[nNodes] = -1;
107
+ nHeap++;
108
+ heap[nHeap] = nNodes;
109
+ UPHEAP(nHeap);
110
+ }
111
+
112
+ AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
113
+
114
+ tooLong = False;
115
+ for (i = 1; i <= alphaSize; i++) {
116
+ j = 0;
117
+ k = i;
118
+ while (parent[k] >= 0) { k = parent[k]; j++; }
119
+ len[i-1] = j;
120
+ if (j > maxLen) tooLong = True;
121
+ }
122
+
123
+ if (! tooLong) break;
124
+
125
+ /* 17 Oct 04: keep-going condition for the following loop used
126
+ to be 'i < alphaSize', which missed the last element,
127
+ theoretically leading to the possibility of the compressor
128
+ looping. However, this count-scaling step is only needed if
129
+ one of the generated Huffman code words is longer than
130
+ maxLen, which up to and including version 1.0.2 was 20 bits,
131
+ which is extremely unlikely. In version 1.0.3 maxLen was
132
+ changed to 17 bits, which has minimal effect on compression
133
+ ratio, but does mean this scaling step is used from time to
134
+ time, enough to verify that it works.
135
+
136
+ This means that bzip2-1.0.3 and later will only produce
137
+ Huffman codes with a maximum length of 17 bits. However, in
138
+ order to preserve backwards compatibility with bitstreams
139
+ produced by versions pre-1.0.3, the decompressor must still
140
+ handle lengths of up to 20. */
141
+
142
+ for (i = 1; i <= alphaSize; i++) {
143
+ j = weight[i] >> 8;
144
+ j = 1 + (j / 2);
145
+ weight[i] = j << 8;
146
+ }
147
+ }
148
+ }
149
+
150
+
151
+ /*---------------------------------------------------*/
152
+ void BZ2_hbAssignCodes ( Int32 *code,
153
+ UChar *length,
154
+ Int32 minLen,
155
+ Int32 maxLen,
156
+ Int32 alphaSize )
157
+ {
158
+ Int32 n, vec, i;
159
+
160
+ vec = 0;
161
+ for (n = minLen; n <= maxLen; n++) {
162
+ for (i = 0; i < alphaSize; i++)
163
+ if (length[i] == n) { code[i] = vec; vec++; };
164
+ vec <<= 1;
165
+ }
166
+ }
167
+
168
+
169
+ /*---------------------------------------------------*/
170
+ void BZ2_hbCreateDecodeTables ( Int32 *limit,
171
+ Int32 *base,
172
+ Int32 *perm,
173
+ UChar *length,
174
+ Int32 minLen,
175
+ Int32 maxLen,
176
+ Int32 alphaSize )
177
+ {
178
+ Int32 pp, i, j, vec;
179
+
180
+ pp = 0;
181
+ for (i = minLen; i <= maxLen; i++)
182
+ for (j = 0; j < alphaSize; j++)
183
+ if (length[j] == i) { perm[pp] = j; pp++; };
184
+
185
+ for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
186
+ for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
187
+
188
+ for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
189
+
190
+ for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
191
+ vec = 0;
192
+
193
+ for (i = minLen; i <= maxLen; i++) {
194
+ vec += (base[i+1] - base[i]);
195
+ limit[i] = vec-1;
196
+ vec <<= 1;
197
+ }
198
+ for (i = minLen + 1; i <= maxLen; i++)
199
+ base[i] = ((limit[i-1] + 1) << 1) - base[i];
200
+ }
201
+
202
+
203
+ /*-------------------------------------------------------------*/
204
+ /*--- end huffman.c ---*/
205
+ /*-------------------------------------------------------------*/