libdeflate 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/test.yml +34 -0
  3. data/README.md +1 -6
  4. data/ext/libdeflate/extconf.rb +18 -7
  5. data/ext/libdeflate/libdeflate_ext.c +17 -17
  6. data/lib/libdeflate/version.rb +1 -1
  7. data/libdeflate.gemspec +2 -1
  8. metadata +13 -84
  9. data/.gitmodules +0 -3
  10. data/.travis.yml +0 -5
  11. data/ext/libdeflate/libdeflate/.gitignore +0 -19
  12. data/ext/libdeflate/libdeflate/COPYING +0 -21
  13. data/ext/libdeflate/libdeflate/Makefile +0 -231
  14. data/ext/libdeflate/libdeflate/Makefile.msc +0 -64
  15. data/ext/libdeflate/libdeflate/NEWS +0 -57
  16. data/ext/libdeflate/libdeflate/README.md +0 -170
  17. data/ext/libdeflate/libdeflate/common/common_defs.h +0 -351
  18. data/ext/libdeflate/libdeflate/common/compiler_gcc.h +0 -134
  19. data/ext/libdeflate/libdeflate/common/compiler_msc.h +0 -95
  20. data/ext/libdeflate/libdeflate/lib/adler32.c +0 -213
  21. data/ext/libdeflate/libdeflate/lib/adler32_impl.h +0 -281
  22. data/ext/libdeflate/libdeflate/lib/aligned_malloc.c +0 -57
  23. data/ext/libdeflate/libdeflate/lib/aligned_malloc.h +0 -13
  24. data/ext/libdeflate/libdeflate/lib/bt_matchfinder.h +0 -357
  25. data/ext/libdeflate/libdeflate/lib/crc32.c +0 -368
  26. data/ext/libdeflate/libdeflate/lib/crc32_impl.h +0 -286
  27. data/ext/libdeflate/libdeflate/lib/crc32_table.h +0 -526
  28. data/ext/libdeflate/libdeflate/lib/decompress_impl.h +0 -404
  29. data/ext/libdeflate/libdeflate/lib/deflate_compress.c +0 -2817
  30. data/ext/libdeflate/libdeflate/lib/deflate_compress.h +0 -14
  31. data/ext/libdeflate/libdeflate/lib/deflate_constants.h +0 -66
  32. data/ext/libdeflate/libdeflate/lib/deflate_decompress.c +0 -889
  33. data/ext/libdeflate/libdeflate/lib/gzip_compress.c +0 -95
  34. data/ext/libdeflate/libdeflate/lib/gzip_constants.h +0 -45
  35. data/ext/libdeflate/libdeflate/lib/gzip_decompress.c +0 -130
  36. data/ext/libdeflate/libdeflate/lib/hc_matchfinder.h +0 -405
  37. data/ext/libdeflate/libdeflate/lib/lib_common.h +0 -35
  38. data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h +0 -53
  39. data/ext/libdeflate/libdeflate/lib/matchfinder_common.h +0 -205
  40. data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h +0 -61
  41. data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h +0 -53
  42. data/ext/libdeflate/libdeflate/lib/unaligned.h +0 -202
  43. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.c +0 -169
  44. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.h +0 -48
  45. data/ext/libdeflate/libdeflate/lib/zlib_compress.c +0 -87
  46. data/ext/libdeflate/libdeflate/lib/zlib_constants.h +0 -21
  47. data/ext/libdeflate/libdeflate/lib/zlib_decompress.c +0 -91
  48. data/ext/libdeflate/libdeflate/libdeflate.h +0 -274
  49. data/ext/libdeflate/libdeflate/programs/benchmark.c +0 -558
  50. data/ext/libdeflate/libdeflate/programs/checksum.c +0 -197
  51. data/ext/libdeflate/libdeflate/programs/detect.sh +0 -62
  52. data/ext/libdeflate/libdeflate/programs/gzip.c +0 -603
  53. data/ext/libdeflate/libdeflate/programs/prog_util.c +0 -530
  54. data/ext/libdeflate/libdeflate/programs/prog_util.h +0 -162
  55. data/ext/libdeflate/libdeflate/programs/test_checksums.c +0 -135
  56. data/ext/libdeflate/libdeflate/programs/tgetopt.c +0 -118
  57. data/ext/libdeflate/libdeflate/tools/afl-fuzz/Makefile +0 -12
  58. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/fuzz.c +0 -40
  59. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/inputs/0 +0 -0
  60. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/fuzz.c +0 -28
  61. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/inputs/0 +0 -3
  62. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/fuzz.c +0 -28
  63. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/inputs/0 +0 -0
  64. data/ext/libdeflate/libdeflate/tools/afl-fuzz/prepare_for_fuzz.sh +0 -14
  65. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/fuzz.c +0 -28
  66. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/inputs/0 +0 -3
  67. data/ext/libdeflate/libdeflate/tools/android_build.sh +0 -104
  68. data/ext/libdeflate/libdeflate/tools/checksum_benchmarks.sh +0 -76
  69. data/ext/libdeflate/libdeflate/tools/exec_tests.sh +0 -30
  70. data/ext/libdeflate/libdeflate/tools/gen_crc32_multipliers.c +0 -108
  71. data/ext/libdeflate/libdeflate/tools/gen_crc32_table.c +0 -100
  72. data/ext/libdeflate/libdeflate/tools/gzip_tests.sh +0 -412
  73. data/ext/libdeflate/libdeflate/tools/make-windows-releases +0 -21
  74. data/ext/libdeflate/libdeflate/tools/mips_build.sh +0 -9
  75. data/ext/libdeflate/libdeflate/tools/msc_test.bat +0 -3
  76. data/ext/libdeflate/libdeflate/tools/pgo_build.sh +0 -23
  77. data/ext/libdeflate/libdeflate/tools/produce_gzip_benchmark_table.sh +0 -37
  78. data/ext/libdeflate/libdeflate/tools/run_tests.sh +0 -305
  79. data/ext/libdeflate/libdeflate/tools/windows_build.sh +0 -10
@@ -1,281 +0,0 @@
1
- /*
2
- * adler32_impl.h
3
- *
4
- * Originally public domain; changes after 2016-09-07 are copyrighted.
5
- *
6
- * Copyright 2016 Eric Biggers
7
- *
8
- * Permission is hereby granted, free of charge, to any person
9
- * obtaining a copy of this software and associated documentation
10
- * files (the "Software"), to deal in the Software without
11
- * restriction, including without limitation the rights to use,
12
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
13
- * copies of the Software, and to permit persons to whom the
14
- * Software is furnished to do so, subject to the following
15
- * conditions:
16
- *
17
- * The above copyright notice and this permission notice shall be
18
- * included in all copies or substantial portions of the Software.
19
- *
20
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27
- * OTHER DEALINGS IN THE SOFTWARE.
28
- */
29
-
30
- /*
31
- * This file contains a template for vectorized Adler-32 implementations.
32
- *
33
- * The inner loop between reductions modulo 65521 of an unvectorized Adler-32
34
- * implementation looks something like this:
35
- *
36
- * do {
37
- * s1 += *p;
38
- * s2 += s1;
39
- * } while (++p != chunk_end);
40
- *
41
- * For vectorized calculation of s1, we only need to sum the input bytes. They
42
- * can be accumulated into multiple counters which are eventually summed
43
- * together.
44
- *
45
- * For vectorized calculation of s2, the basic idea is that for each iteration
46
- * that processes N bytes, we can perform the following vectorizable
47
- * calculation:
48
- *
49
- * s2 += N*byte_1 + (N-1)*byte_2 + (N-2)*byte_3 + ... + 1*byte_N
50
- *
51
- * Or, equivalently, we can sum the byte_1...byte_N for each iteration into N
52
- * separate counters, then do the multiplications by N...1 just once at the end
53
- * rather than once per iteration.
54
- *
55
- * Also, we must account for how previous bytes will affect s2 by doing the
56
- * following at beginning of each iteration:
57
- *
58
- * s2 += s1 * N
59
- *
60
- * Furthermore, like s1, "s2" can actually be multiple counters which are
61
- * eventually summed together.
62
- */
63
-
64
- static u32 ATTRIBUTES
65
- FUNCNAME(u32 adler, const void *buffer, size_t size)
66
- {
67
- u32 s1 = adler & 0xFFFF;
68
- u32 s2 = adler >> 16;
69
- const u8 *p = buffer;
70
- const u8 * const end = p + size;
71
- const u8 *vend;
72
-
73
- /* Process a byte at a time until the required alignment is reached. */
74
- if (p != end && (uintptr_t)p % ALIGNMENT_REQUIRED) {
75
- do {
76
- s1 += *p++;
77
- s2 += s1;
78
- } while (p != end && (uintptr_t)p % ALIGNMENT_REQUIRED);
79
- s1 %= DIVISOR;
80
- s2 %= DIVISOR;
81
- }
82
-
83
- /*
84
- * Process "chunks" of bytes using vector instructions. Chunk sizes are
85
- * limited to MAX_BYTES_PER_CHUNK, which guarantees that s1 and s2 never
86
- * overflow before being reduced modulo DIVISOR. For vector processing,
87
- * chunks size are also made evenly divisible by BYTES_PER_ITERATION.
88
- */
89
- STATIC_ASSERT(BYTES_PER_ITERATION % ALIGNMENT_REQUIRED == 0);
90
- vend = end - ((size_t)(end - p) % BYTES_PER_ITERATION);
91
- while (p != vend) {
92
- size_t chunk_size;
93
- const u8 *chunk_end;
94
-
95
- chunk_size = MIN((size_t)(vend - p), MAX_BYTES_PER_CHUNK);
96
- #if TARGET == TARGET_SSE2
97
- /* SSE2: the 16-bit precision byte counters must not undergo
98
- * *signed* overflow, otherwise the signed multiplication at the
99
- * end will not behave as desired. */
100
- chunk_size = MIN(chunk_size, BYTES_PER_ITERATION * (0x7FFF / 0xFF));
101
- #elif TARGET == TARGET_NEON
102
- /* NEON: the 16-bit precision counters must not undergo
103
- * *unsigned* overflow. */
104
- chunk_size = MIN(chunk_size, BYTES_PER_ITERATION * (0xFFFF / 0xFF));
105
- #endif
106
- chunk_size -= chunk_size % BYTES_PER_ITERATION;
107
-
108
- chunk_end = p + chunk_size;
109
-
110
- s2 += s1 * chunk_size;
111
- {
112
- #if TARGET == TARGET_AVX2
113
- /* AVX2 implementation */
114
- const __m256i zeroes = _mm256_setzero_si256();
115
- const __v32qi multipliers = (__v32qi) { 32, 31, 30, 29, 28, 27, 26, 25,
116
- 24, 23, 22, 21, 20, 19, 18, 17,
117
- 16, 15, 14, 13, 12, 11, 10, 9,
118
- 8, 7, 6, 5, 4, 3, 2, 1 };
119
- const __v16hi ones = (__v16hi)_mm256_set1_epi16(1);
120
- __v8si v_s1 = (__v8si)zeroes;
121
- __v8si v_s1_sums = (__v8si)zeroes;
122
- __v8si v_s2 = (__v8si)zeroes;
123
- STATIC_ASSERT(ALIGNMENT_REQUIRED == 32 && BYTES_PER_ITERATION == 32);
124
- do {
125
- __m256i bytes = *(const __m256i *)p;
126
- __v16hi sums = (__v16hi)_mm256_maddubs_epi16(
127
- bytes, (__m256i)multipliers);
128
- v_s1_sums += v_s1;
129
- v_s1 += (__v8si)_mm256_sad_epu8(bytes, zeroes);
130
- v_s2 += (__v8si)_mm256_madd_epi16((__m256i)sums, (__m256i)ones);
131
- } while ((p += BYTES_PER_ITERATION) != chunk_end);
132
-
133
- v_s1 = (__v8si)_mm256_hadd_epi32((__m256i)v_s1, zeroes);
134
- v_s1 = (__v8si)_mm256_hadd_epi32((__m256i)v_s1, zeroes);
135
- s1 += v_s1[0] + v_s1[4];
136
-
137
- v_s2 += (__v8si)_mm256_slli_epi32((__m256i)v_s1_sums, 5);
138
- v_s2 = (__v8si)_mm256_hadd_epi32((__m256i)v_s2, zeroes);
139
- v_s2 = (__v8si)_mm256_hadd_epi32((__m256i)v_s2, zeroes);
140
- s2 += v_s2[0] + v_s2[4];
141
-
142
- #elif TARGET == TARGET_SSE2
143
- /* SSE2 implementation */
144
- const __m128i zeroes = _mm_setzero_si128();
145
-
146
- /* s1 counters: 32-bit, sum of bytes */
147
- __v4si v_s1 = (__v4si)zeroes;
148
-
149
- /* s2 counters: 32-bit, sum of s1 values */
150
- __v4si v_s2 = (__v4si)zeroes;
151
-
152
- /*
153
- * Thirty-two 16-bit counters for byte sums. Each accumulates
154
- * the bytes that eventually need to be multiplied by a number
155
- * 32...1 for addition into s2.
156
- */
157
- __v8hi v_byte_sums_a = (__v8hi)zeroes;
158
- __v8hi v_byte_sums_b = (__v8hi)zeroes;
159
- __v8hi v_byte_sums_c = (__v8hi)zeroes;
160
- __v8hi v_byte_sums_d = (__v8hi)zeroes;
161
-
162
- STATIC_ASSERT(ALIGNMENT_REQUIRED == 16 && BYTES_PER_ITERATION == 32);
163
- do {
164
- /* Load the next 32 bytes. */
165
- const __m128i bytes1 = *(const __m128i *)p;
166
- const __m128i bytes2 = *(const __m128i *)(p + 16);
167
-
168
- /*
169
- * Accumulate the previous s1 counters into the s2
170
- * counters. Logically, this really should be
171
- * v_s2 += v_s1 * BYTES_PER_ITERATION, but we can do the
172
- * multiplication (or left shift) later.
173
- */
174
- v_s2 += v_s1;
175
-
176
- /*
177
- * s1 update: use "Packed Sum of Absolute Differences"
178
- * to add the bytes horizontally with 8 bytes per sum.
179
- * Then add the sums to the s1 counters.
180
- */
181
- v_s1 += (__v4si)_mm_sad_epu8(bytes1, zeroes);
182
- v_s1 += (__v4si)_mm_sad_epu8(bytes2, zeroes);
183
-
184
- /*
185
- * Also accumulate the bytes into 32 separate counters
186
- * that have 16-bit precision.
187
- */
188
- v_byte_sums_a += (__v8hi)_mm_unpacklo_epi8(bytes1, zeroes);
189
- v_byte_sums_b += (__v8hi)_mm_unpackhi_epi8(bytes1, zeroes);
190
- v_byte_sums_c += (__v8hi)_mm_unpacklo_epi8(bytes2, zeroes);
191
- v_byte_sums_d += (__v8hi)_mm_unpackhi_epi8(bytes2, zeroes);
192
-
193
- } while ((p += BYTES_PER_ITERATION) != chunk_end);
194
-
195
- /* Finish calculating the s2 counters. */
196
- v_s2 = (__v4si)_mm_slli_epi32((__m128i)v_s2, 5);
197
- v_s2 += (__v4si)_mm_madd_epi16((__m128i)v_byte_sums_a,
198
- (__m128i)(__v8hi){ 32, 31, 30, 29, 28, 27, 26, 25 });
199
- v_s2 += (__v4si)_mm_madd_epi16((__m128i)v_byte_sums_b,
200
- (__m128i)(__v8hi){ 24, 23, 22, 21, 20, 19, 18, 17 });
201
- v_s2 += (__v4si)_mm_madd_epi16((__m128i)v_byte_sums_c,
202
- (__m128i)(__v8hi){ 16, 15, 14, 13, 12, 11, 10, 9 });
203
- v_s2 += (__v4si)_mm_madd_epi16((__m128i)v_byte_sums_d,
204
- (__m128i)(__v8hi){ 8, 7, 6, 5, 4, 3, 2, 1 });
205
-
206
- /* Now accumulate what we computed into the real s1 and s2. */
207
- v_s1 += (__v4si)_mm_shuffle_epi32((__m128i)v_s1, 0x31);
208
- v_s1 += (__v4si)_mm_shuffle_epi32((__m128i)v_s1, 0x02);
209
- s1 += _mm_cvtsi128_si32((__m128i)v_s1);
210
-
211
- v_s2 += (__v4si)_mm_shuffle_epi32((__m128i)v_s2, 0x31);
212
- v_s2 += (__v4si)_mm_shuffle_epi32((__m128i)v_s2, 0x02);
213
- s2 += _mm_cvtsi128_si32((__m128i)v_s2);
214
-
215
- #elif TARGET == TARGET_NEON
216
- /* ARM NEON (Advanced SIMD) implementation */
217
- uint32x4_t v_s1 = (uint32x4_t) { 0, 0, 0, 0 };
218
- uint32x4_t v_s2 = (uint32x4_t) { 0, 0, 0, 0 };
219
- uint16x8_t v_byte_sums_a = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 };
220
- uint16x8_t v_byte_sums_b = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 };
221
- uint16x8_t v_byte_sums_c = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 };
222
- uint16x8_t v_byte_sums_d = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 };
223
-
224
- STATIC_ASSERT(ALIGNMENT_REQUIRED == 16 && BYTES_PER_ITERATION == 32);
225
- do {
226
- const uint8x16_t bytes1 = *(const uint8x16_t *)p;
227
- const uint8x16_t bytes2 = *(const uint8x16_t *)(p + 16);
228
- uint16x8_t tmp;
229
-
230
- v_s2 += v_s1;
231
-
232
- tmp = vpaddlq_u8(bytes1);
233
- tmp = vpadalq_u8(tmp, bytes2);
234
- v_s1 = vpadalq_u16(v_s1, tmp);
235
-
236
- v_byte_sums_a = vaddw_u8(v_byte_sums_a, vget_low_u8(bytes1));
237
- v_byte_sums_b = vaddw_u8(v_byte_sums_b, vget_high_u8(bytes1));
238
- v_byte_sums_c = vaddw_u8(v_byte_sums_c, vget_low_u8(bytes2));
239
- v_byte_sums_d = vaddw_u8(v_byte_sums_d, vget_high_u8(bytes2));
240
-
241
- } while ((p += BYTES_PER_ITERATION) != chunk_end);
242
-
243
- v_s2 = vqshlq_n_u32(v_s2, 5);
244
- v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_a), (uint16x4_t) { 32, 31, 30, 29 });
245
- v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_a), (uint16x4_t) { 28, 27, 26, 25 });
246
- v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_b), (uint16x4_t) { 24, 23, 22, 21 });
247
- v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_b), (uint16x4_t) { 20, 19, 18, 17 });
248
- v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_c), (uint16x4_t) { 16, 15, 14, 13 });
249
- v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_c), (uint16x4_t) { 12, 11, 10, 9 });
250
- v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_byte_sums_d), (uint16x4_t) { 8, 7, 6, 5 });
251
- v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_d), (uint16x4_t) { 4, 3, 2, 1 });
252
-
253
- s1 += v_s1[0] + v_s1[1] + v_s1[2] + v_s1[3];
254
- s2 += v_s2[0] + v_s2[1] + v_s2[2] + v_s2[3];
255
- #else
256
- # error "BUG: unknown target"
257
- #endif
258
- }
259
-
260
- s1 %= DIVISOR;
261
- s2 %= DIVISOR;
262
- }
263
-
264
- /* Process any remaining bytes. */
265
- if (p != end) {
266
- do {
267
- s1 += *p++;
268
- s2 += s1;
269
- } while (p != end);
270
- s1 %= DIVISOR;
271
- s2 %= DIVISOR;
272
- }
273
-
274
- return (s2 << 16) | s1;
275
- }
276
-
277
- #undef FUNCNAME
278
- #undef TARGET
279
- #undef ALIGNMENT_REQUIRED
280
- #undef BYTES_PER_ITERATION
281
- #undef ATTRIBUTES
@@ -1,57 +0,0 @@
1
- /*
2
- * aligned_malloc.c - aligned memory allocation
3
- *
4
- * Originally public domain; changes after 2016-09-07 are copyrighted.
5
- *
6
- * Copyright 2016 Eric Biggers
7
- *
8
- * Permission is hereby granted, free of charge, to any person
9
- * obtaining a copy of this software and associated documentation
10
- * files (the "Software"), to deal in the Software without
11
- * restriction, including without limitation the rights to use,
12
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
13
- * copies of the Software, and to permit persons to whom the
14
- * Software is furnished to do so, subject to the following
15
- * conditions:
16
- *
17
- * The above copyright notice and this permission notice shall be
18
- * included in all copies or substantial portions of the Software.
19
- *
20
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27
- * OTHER DEALINGS IN THE SOFTWARE.
28
- */
29
-
30
- /*
31
- * This file provides portable aligned memory allocation functions that only
32
- * use malloc() and free(). This avoids portability problems with
33
- * posix_memalign(), aligned_alloc(), etc.
34
- */
35
-
36
- #include <stdlib.h>
37
-
38
- #include "aligned_malloc.h"
39
-
40
- void *
41
- aligned_malloc(size_t alignment, size_t size)
42
- {
43
- void *ptr = malloc(sizeof(void *) + alignment - 1 + size);
44
- if (ptr) {
45
- void *orig_ptr = ptr;
46
- ptr = (void *)ALIGN((uintptr_t)ptr + sizeof(void *), alignment);
47
- ((void **)ptr)[-1] = orig_ptr;
48
- }
49
- return ptr;
50
- }
51
-
52
- void
53
- aligned_free(void *ptr)
54
- {
55
- if (ptr)
56
- free(((void **)ptr)[-1]);
57
- }
@@ -1,13 +0,0 @@
1
- /*
2
- * aligned_malloc.c - aligned memory allocation
3
- */
4
-
5
- #ifndef LIB_ALIGNED_MALLOC_H
6
- #define LIB_ALIGNED_MALLOC_H
7
-
8
- #include "lib_common.h"
9
-
10
- extern void *aligned_malloc(size_t alignment, size_t size);
11
- extern void aligned_free(void *ptr);
12
-
13
- #endif /* LIB_ALIGNED_MALLOC_H */
@@ -1,357 +0,0 @@
1
- /*
2
- * bt_matchfinder.h - Lempel-Ziv matchfinding with a hash table of binary trees
3
- *
4
- * Originally public domain; changes after 2016-09-07 are copyrighted.
5
- *
6
- * Copyright 2016 Eric Biggers
7
- *
8
- * Permission is hereby granted, free of charge, to any person
9
- * obtaining a copy of this software and associated documentation
10
- * files (the "Software"), to deal in the Software without
11
- * restriction, including without limitation the rights to use,
12
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
13
- * copies of the Software, and to permit persons to whom the
14
- * Software is furnished to do so, subject to the following
15
- * conditions:
16
- *
17
- * The above copyright notice and this permission notice shall be
18
- * included in all copies or substantial portions of the Software.
19
- *
20
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27
- * OTHER DEALINGS IN THE SOFTWARE.
28
- *
29
- * ----------------------------------------------------------------------------
30
- *
31
- * This is a Binary Trees (bt) based matchfinder.
32
- *
33
- * The main data structure is a hash table where each hash bucket contains a
34
- * binary tree of sequences whose first 4 bytes share the same hash code. Each
35
- * sequence is identified by its starting position in the input buffer. Each
36
- * binary tree is always sorted such that each left child represents a sequence
37
- * lexicographically lesser than its parent and each right child represents a
38
- * sequence lexicographically greater than its parent.
39
- *
40
- * The algorithm processes the input buffer sequentially. At each byte
41
- * position, the hash code of the first 4 bytes of the sequence beginning at
42
- * that position (the sequence being matched against) is computed. This
43
- * identifies the hash bucket to use for that position. Then, a new binary tree
44
- * node is created to represent the current sequence. Then, in a single tree
45
- * traversal, the hash bucket's binary tree is searched for matches and is
46
- * re-rooted at the new node.
47
- *
48
- * Compared to the simpler algorithm that uses linked lists instead of binary
49
- * trees (see hc_matchfinder.h), the binary tree version gains more information
50
- * at each node visitation. Ideally, the binary tree version will examine only
51
- * 'log(n)' nodes to find the same matches that the linked list version will
52
- * find by examining 'n' nodes. In addition, the binary tree version can
53
- * examine fewer bytes at each node by taking advantage of the common prefixes
54
- * that result from the sort order, whereas the linked list version may have to
55
- * examine up to the full length of the match at each node.
56
- *
57
- * However, it is not always best to use the binary tree version. It requires
58
- * nearly twice as much memory as the linked list version, and it takes time to
59
- * keep the binary trees sorted, even at positions where the compressor does not
60
- * need matches. Generally, when doing fast compression on small buffers,
61
- * binary trees are the wrong approach. They are best suited for thorough
62
- * compression and/or large buffers.
63
- *
64
- * ----------------------------------------------------------------------------
65
- */
66
-
67
-
68
- #include "matchfinder_common.h"
69
-
70
- #define BT_MATCHFINDER_HASH3_ORDER 16
71
- #define BT_MATCHFINDER_HASH3_WAYS 2
72
- #define BT_MATCHFINDER_HASH4_ORDER 16
73
-
74
- #define BT_MATCHFINDER_TOTAL_HASH_LENGTH \
75
- ((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \
76
- (1UL << BT_MATCHFINDER_HASH4_ORDER))
77
-
78
- /* Representation of a match found by the bt_matchfinder */
79
- struct lz_match {
80
-
81
- /* The number of bytes matched. */
82
- u16 length;
83
-
84
- /* The offset back from the current position that was matched. */
85
- u16 offset;
86
- };
87
-
88
- struct bt_matchfinder {
89
-
90
- /* The hash table for finding length 3 matches */
91
- mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS];
92
-
93
- /* The hash table which contains the roots of the binary trees for
94
- * finding length 4+ matches */
95
- mf_pos_t hash4_tab[1UL << BT_MATCHFINDER_HASH4_ORDER];
96
-
97
- /* The child node references for the binary trees. The left and right
98
- * children of the node for the sequence with position 'pos' are
99
- * 'child_tab[pos * 2]' and 'child_tab[pos * 2 + 1]', respectively. */
100
- mf_pos_t child_tab[2UL * MATCHFINDER_WINDOW_SIZE];
101
-
102
- }
103
- #ifdef _aligned_attribute
104
- _aligned_attribute(MATCHFINDER_ALIGNMENT)
105
- #endif
106
- ;
107
-
108
- /* Prepare the matchfinder for a new input buffer. */
109
- static forceinline void
110
- bt_matchfinder_init(struct bt_matchfinder *mf)
111
- {
112
- matchfinder_init((mf_pos_t *)mf, BT_MATCHFINDER_TOTAL_HASH_LENGTH);
113
- }
114
-
115
- static forceinline void
116
- bt_matchfinder_slide_window(struct bt_matchfinder *mf)
117
- {
118
- matchfinder_rebase((mf_pos_t *)mf,
119
- sizeof(struct bt_matchfinder) / sizeof(mf_pos_t));
120
- }
121
-
122
- static forceinline mf_pos_t *
123
- bt_left_child(struct bt_matchfinder *mf, s32 node)
124
- {
125
- return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 0];
126
- }
127
-
128
- static forceinline mf_pos_t *
129
- bt_right_child(struct bt_matchfinder *mf, s32 node)
130
- {
131
- return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 1];
132
- }
133
-
134
- /* The minimum permissible value of 'max_len' for bt_matchfinder_get_matches()
135
- * and bt_matchfinder_skip_position(). There must be sufficiently many bytes
136
- * remaining to load a 32-bit integer from the *next* position. */
137
- #define BT_MATCHFINDER_REQUIRED_NBYTES 5
138
-
139
- /* Advance the binary tree matchfinder by one byte, optionally recording
140
- * matches. @record_matches should be a compile-time constant. */
141
- static forceinline struct lz_match *
142
- bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf,
143
- const u8 * const restrict in_base,
144
- const ptrdiff_t cur_pos,
145
- const u32 max_len,
146
- const u32 nice_len,
147
- const u32 max_search_depth,
148
- u32 * const restrict next_hashes,
149
- u32 * const restrict best_len_ret,
150
- struct lz_match * restrict lz_matchptr,
151
- const bool record_matches)
152
- {
153
- const u8 *in_next = in_base + cur_pos;
154
- u32 depth_remaining = max_search_depth;
155
- const s32 cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
156
- u32 next_seq4;
157
- u32 next_seq3;
158
- u32 hash3;
159
- u32 hash4;
160
- s32 cur_node;
161
- #if BT_MATCHFINDER_HASH3_WAYS >= 2
162
- s32 cur_node_2;
163
- #endif
164
- const u8 *matchptr;
165
- mf_pos_t *pending_lt_ptr, *pending_gt_ptr;
166
- u32 best_lt_len, best_gt_len;
167
- u32 len;
168
- u32 best_len = 3;
169
-
170
- STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 &&
171
- BT_MATCHFINDER_HASH3_WAYS <= 2);
172
-
173
- next_seq4 = load_u32_unaligned(in_next + 1);
174
- next_seq3 = loaded_u32_to_u24(next_seq4);
175
-
176
- hash3 = next_hashes[0];
177
- hash4 = next_hashes[1];
178
-
179
- next_hashes[0] = lz_hash(next_seq3, BT_MATCHFINDER_HASH3_ORDER);
180
- next_hashes[1] = lz_hash(next_seq4, BT_MATCHFINDER_HASH4_ORDER);
181
- prefetchw(&mf->hash3_tab[next_hashes[0]]);
182
- prefetchw(&mf->hash4_tab[next_hashes[1]]);
183
-
184
- cur_node = mf->hash3_tab[hash3][0];
185
- mf->hash3_tab[hash3][0] = cur_pos;
186
- #if BT_MATCHFINDER_HASH3_WAYS >= 2
187
- cur_node_2 = mf->hash3_tab[hash3][1];
188
- mf->hash3_tab[hash3][1] = cur_node;
189
- #endif
190
- if (record_matches && cur_node > cutoff) {
191
- u32 seq3 = load_u24_unaligned(in_next);
192
- if (seq3 == load_u24_unaligned(&in_base[cur_node])) {
193
- lz_matchptr->length = 3;
194
- lz_matchptr->offset = in_next - &in_base[cur_node];
195
- lz_matchptr++;
196
- }
197
- #if BT_MATCHFINDER_HASH3_WAYS >= 2
198
- else if (cur_node_2 > cutoff &&
199
- seq3 == load_u24_unaligned(&in_base[cur_node_2]))
200
- {
201
- lz_matchptr->length = 3;
202
- lz_matchptr->offset = in_next - &in_base[cur_node_2];
203
- lz_matchptr++;
204
- }
205
- #endif
206
- }
207
-
208
- cur_node = mf->hash4_tab[hash4];
209
- mf->hash4_tab[hash4] = cur_pos;
210
-
211
- pending_lt_ptr = bt_left_child(mf, cur_pos);
212
- pending_gt_ptr = bt_right_child(mf, cur_pos);
213
-
214
- if (cur_node <= cutoff) {
215
- *pending_lt_ptr = MATCHFINDER_INITVAL;
216
- *pending_gt_ptr = MATCHFINDER_INITVAL;
217
- *best_len_ret = best_len;
218
- return lz_matchptr;
219
- }
220
-
221
- best_lt_len = 0;
222
- best_gt_len = 0;
223
- len = 0;
224
-
225
- for (;;) {
226
- matchptr = &in_base[cur_node];
227
-
228
- if (matchptr[len] == in_next[len]) {
229
- len = lz_extend(in_next, matchptr, len + 1, max_len);
230
- if (!record_matches || len > best_len) {
231
- if (record_matches) {
232
- best_len = len;
233
- lz_matchptr->length = len;
234
- lz_matchptr->offset = in_next - matchptr;
235
- lz_matchptr++;
236
- }
237
- if (len >= nice_len) {
238
- *pending_lt_ptr = *bt_left_child(mf, cur_node);
239
- *pending_gt_ptr = *bt_right_child(mf, cur_node);
240
- *best_len_ret = best_len;
241
- return lz_matchptr;
242
- }
243
- }
244
- }
245
-
246
- if (matchptr[len] < in_next[len]) {
247
- *pending_lt_ptr = cur_node;
248
- pending_lt_ptr = bt_right_child(mf, cur_node);
249
- cur_node = *pending_lt_ptr;
250
- best_lt_len = len;
251
- if (best_gt_len < len)
252
- len = best_gt_len;
253
- } else {
254
- *pending_gt_ptr = cur_node;
255
- pending_gt_ptr = bt_left_child(mf, cur_node);
256
- cur_node = *pending_gt_ptr;
257
- best_gt_len = len;
258
- if (best_lt_len < len)
259
- len = best_lt_len;
260
- }
261
-
262
- if (cur_node <= cutoff || !--depth_remaining) {
263
- *pending_lt_ptr = MATCHFINDER_INITVAL;
264
- *pending_gt_ptr = MATCHFINDER_INITVAL;
265
- *best_len_ret = best_len;
266
- return lz_matchptr;
267
- }
268
- }
269
- }
270
-
271
- /*
272
- * Retrieve a list of matches with the current position.
273
- *
274
- * @mf
275
- * The matchfinder structure.
276
- * @in_base
277
- * Pointer to the next byte in the input buffer to process _at the last
278
- * time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
279
- * @cur_pos
280
- * The current position in the input buffer relative to @in_base (the
281
- * position of the sequence being matched against).
282
- * @max_len
283
- * The maximum permissible match length at this position. Must be >=
284
- * BT_MATCHFINDER_REQUIRED_NBYTES.
285
- * @nice_len
286
- * Stop searching if a match of at least this length is found.
287
- * Must be <= @max_len.
288
- * @max_search_depth
289
- * Limit on the number of potential matches to consider. Must be >= 1.
290
- * @next_hashes
291
- * The precomputed hash codes for the sequence beginning at @in_next.
292
- * These will be used and then updated with the precomputed hashcodes for
293
- * the sequence beginning at @in_next + 1.
294
- * @best_len_ret
295
- * If a match of length >= 4 was found, then the length of the longest such
296
- * match is written here; otherwise 3 is written here. (Note: this is
297
- * redundant with the 'struct lz_match' array, but this is easier for the
298
- * compiler to optimize when inlined and the caller immediately does a
299
- * check against 'best_len'.)
300
- * @lz_matchptr
301
- * An array in which this function will record the matches. The recorded
302
- * matches will be sorted by strictly increasing length and (non-strictly)
303
- * increasing offset. The maximum number of matches that may be found is
304
- * 'nice_len - 2'.
305
- *
306
- * The return value is a pointer to the next available slot in the @lz_matchptr
307
- * array. (If no matches were found, this will be the same as @lz_matchptr.)
308
- */
309
- static forceinline struct lz_match *
310
- bt_matchfinder_get_matches(struct bt_matchfinder *mf,
311
- const u8 *in_base,
312
- ptrdiff_t cur_pos,
313
- u32 max_len,
314
- u32 nice_len,
315
- u32 max_search_depth,
316
- u32 next_hashes[2],
317
- u32 *best_len_ret,
318
- struct lz_match *lz_matchptr)
319
- {
320
- return bt_matchfinder_advance_one_byte(mf,
321
- in_base,
322
- cur_pos,
323
- max_len,
324
- nice_len,
325
- max_search_depth,
326
- next_hashes,
327
- best_len_ret,
328
- lz_matchptr,
329
- true);
330
- }
331
-
332
- /*
333
- * Advance the matchfinder, but don't record any matches.
334
- *
335
- * This is very similar to bt_matchfinder_get_matches() because both functions
336
- * must do hashing and tree re-rooting.
337
- */
338
- static forceinline void
339
- bt_matchfinder_skip_position(struct bt_matchfinder *mf,
340
- const u8 *in_base,
341
- ptrdiff_t cur_pos,
342
- u32 nice_len,
343
- u32 max_search_depth,
344
- u32 next_hashes[2])
345
- {
346
- u32 best_len;
347
- bt_matchfinder_advance_one_byte(mf,
348
- in_base,
349
- cur_pos,
350
- nice_len,
351
- nice_len,
352
- max_search_depth,
353
- next_hashes,
354
- &best_len,
355
- NULL,
356
- false);
357
- }