zstd-ruby 1.4.4.0 → 1.5.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/extconf.rb +1 -0
- data/ext/zstdruby/libzstd/BUCK +5 -7
- data/ext/zstdruby/libzstd/Makefile +241 -173
- data/ext/zstdruby/libzstd/README.md +76 -18
- data/ext/zstdruby/libzstd/common/bitstream.h +75 -57
- data/ext/zstdruby/libzstd/common/compiler.h +196 -20
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +208 -76
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +87 -4
- data/ext/zstdruby/libzstd/common/fse.h +51 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -57
- data/ext/zstdruby/libzstd/common/huf.h +60 -54
- data/ext/zstdruby/libzstd/common/mem.h +87 -98
- data/ext/zstdruby/libzstd/common/pool.c +23 -17
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
- data/ext/zstdruby/libzstd/common/threading.c +10 -8
- data/ext/zstdruby/libzstd/common/threading.h +4 -3
- data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
- data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +252 -108
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +105 -85
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +831 -259
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3213 -1007
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +493 -71
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +21 -16
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +51 -24
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +573 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +208 -81
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +315 -137
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +319 -128
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1156 -171
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +331 -206
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +403 -226
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +188 -453
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1065 -410
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +691 -230
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1072 -323
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +16 -7
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +71 -10
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +57 -40
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +54 -35
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +151 -57
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +25 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +18 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +18 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +22 -16
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -26
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.mk +185 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
- data/ext/zstdruby/libzstd/zstd.h +760 -234
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +20 -9
- data/.travis.yml +0 -14
@@ -0,0 +1,571 @@
|
|
1
|
+
#include "../common/portability_macros.h"
|
2
|
+
|
3
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
4
|
+
|
5
|
+
/* Stack marking
|
6
|
+
* ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
|
7
|
+
*/
|
8
|
+
#if defined(__linux__) && defined(__ELF__)
|
9
|
+
.section .note.GNU-stack,"",%progbits
|
10
|
+
#endif
|
11
|
+
|
12
|
+
/* Calling convention:
|
13
|
+
*
|
14
|
+
* %rdi contains the first argument: HUF_DecompressAsmArgs*.
|
15
|
+
* %rbp isn't maintained (no frame pointer).
|
16
|
+
* %rsp contains the stack pointer that grows down.
|
17
|
+
* No red-zone is assumed, only addresses >= %rsp are used.
|
18
|
+
* All register contents are preserved.
|
19
|
+
*
|
20
|
+
* TODO: Support Windows calling convention.
|
21
|
+
*/
|
22
|
+
|
23
|
+
.global HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
|
24
|
+
.global HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
|
25
|
+
.global _HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
|
26
|
+
.global _HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
|
27
|
+
.text
|
28
|
+
|
29
|
+
/* Sets up register mappings for clarity.
|
30
|
+
* op[], bits[], dtable & ip[0] each get their own register.
|
31
|
+
* ip[1,2,3] & olimit alias var[].
|
32
|
+
* %rax is a scratch register.
|
33
|
+
*/
|
34
|
+
|
35
|
+
#define op0 rsi
|
36
|
+
#define op1 rbx
|
37
|
+
#define op2 rcx
|
38
|
+
#define op3 rdi
|
39
|
+
|
40
|
+
#define ip0 r8
|
41
|
+
#define ip1 r9
|
42
|
+
#define ip2 r10
|
43
|
+
#define ip3 r11
|
44
|
+
|
45
|
+
#define bits0 rbp
|
46
|
+
#define bits1 rdx
|
47
|
+
#define bits2 r12
|
48
|
+
#define bits3 r13
|
49
|
+
#define dtable r14
|
50
|
+
#define olimit r15
|
51
|
+
|
52
|
+
/* var[] aliases ip[1,2,3] & olimit
|
53
|
+
* ip[1,2,3] are saved every iteration.
|
54
|
+
* olimit is only used in compute_olimit.
|
55
|
+
*/
|
56
|
+
#define var0 r15
|
57
|
+
#define var1 r9
|
58
|
+
#define var2 r10
|
59
|
+
#define var3 r11
|
60
|
+
|
61
|
+
/* 32-bit var registers */
|
62
|
+
#define vard0 r15d
|
63
|
+
#define vard1 r9d
|
64
|
+
#define vard2 r10d
|
65
|
+
#define vard3 r11d
|
66
|
+
|
67
|
+
/* Calls X(N) for each stream 0, 1, 2, 3. */
|
68
|
+
#define FOR_EACH_STREAM(X) \
|
69
|
+
X(0); \
|
70
|
+
X(1); \
|
71
|
+
X(2); \
|
72
|
+
X(3)
|
73
|
+
|
74
|
+
/* Calls X(N, idx) for each stream 0, 1, 2, 3. */
|
75
|
+
#define FOR_EACH_STREAM_WITH_INDEX(X, idx) \
|
76
|
+
X(0, idx); \
|
77
|
+
X(1, idx); \
|
78
|
+
X(2, idx); \
|
79
|
+
X(3, idx)
|
80
|
+
|
81
|
+
/* Define both _HUF_* & HUF_* symbols because MacOS
|
82
|
+
* C symbols are prefixed with '_' & Linux symbols aren't.
|
83
|
+
*/
|
84
|
+
_HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
|
85
|
+
HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
|
86
|
+
/* Save all registers - even if they are callee saved for simplicity. */
|
87
|
+
push %rax
|
88
|
+
push %rbx
|
89
|
+
push %rcx
|
90
|
+
push %rdx
|
91
|
+
push %rbp
|
92
|
+
push %rsi
|
93
|
+
push %rdi
|
94
|
+
push %r8
|
95
|
+
push %r9
|
96
|
+
push %r10
|
97
|
+
push %r11
|
98
|
+
push %r12
|
99
|
+
push %r13
|
100
|
+
push %r14
|
101
|
+
push %r15
|
102
|
+
|
103
|
+
/* Read HUF_DecompressAsmArgs* args from %rax */
|
104
|
+
movq %rdi, %rax
|
105
|
+
movq 0(%rax), %ip0
|
106
|
+
movq 8(%rax), %ip1
|
107
|
+
movq 16(%rax), %ip2
|
108
|
+
movq 24(%rax), %ip3
|
109
|
+
movq 32(%rax), %op0
|
110
|
+
movq 40(%rax), %op1
|
111
|
+
movq 48(%rax), %op2
|
112
|
+
movq 56(%rax), %op3
|
113
|
+
movq 64(%rax), %bits0
|
114
|
+
movq 72(%rax), %bits1
|
115
|
+
movq 80(%rax), %bits2
|
116
|
+
movq 88(%rax), %bits3
|
117
|
+
movq 96(%rax), %dtable
|
118
|
+
push %rax /* argument */
|
119
|
+
push 104(%rax) /* ilimit */
|
120
|
+
push 112(%rax) /* oend */
|
121
|
+
push %olimit /* olimit space */
|
122
|
+
|
123
|
+
subq $24, %rsp
|
124
|
+
|
125
|
+
.L_4X1_compute_olimit:
|
126
|
+
/* Computes how many iterations we can do safely
|
127
|
+
* %r15, %rax may be clobbered
|
128
|
+
* rbx, rdx must be saved
|
129
|
+
* op3 & ip0 mustn't be clobbered
|
130
|
+
*/
|
131
|
+
movq %rbx, 0(%rsp)
|
132
|
+
movq %rdx, 8(%rsp)
|
133
|
+
|
134
|
+
movq 32(%rsp), %rax /* rax = oend */
|
135
|
+
subq %op3, %rax /* rax = oend - op3 */
|
136
|
+
|
137
|
+
/* r15 = (oend - op3) / 5 */
|
138
|
+
movabsq $-3689348814741910323, %rdx
|
139
|
+
mulq %rdx
|
140
|
+
movq %rdx, %r15
|
141
|
+
shrq $2, %r15
|
142
|
+
|
143
|
+
movq %ip0, %rax /* rax = ip0 */
|
144
|
+
movq 40(%rsp), %rdx /* rdx = ilimit */
|
145
|
+
subq %rdx, %rax /* rax = ip0 - ilimit */
|
146
|
+
movq %rax, %rbx /* rbx = ip0 - ilimit */
|
147
|
+
|
148
|
+
/* rdx = (ip0 - ilimit) / 7 */
|
149
|
+
movabsq $2635249153387078803, %rdx
|
150
|
+
mulq %rdx
|
151
|
+
subq %rdx, %rbx
|
152
|
+
shrq %rbx
|
153
|
+
addq %rbx, %rdx
|
154
|
+
shrq $2, %rdx
|
155
|
+
|
156
|
+
/* r15 = min(%rdx, %r15) */
|
157
|
+
cmpq %rdx, %r15
|
158
|
+
cmova %rdx, %r15
|
159
|
+
|
160
|
+
/* r15 = r15 * 5 */
|
161
|
+
leaq (%r15, %r15, 4), %r15
|
162
|
+
|
163
|
+
/* olimit = op3 + r15 */
|
164
|
+
addq %op3, %olimit
|
165
|
+
|
166
|
+
movq 8(%rsp), %rdx
|
167
|
+
movq 0(%rsp), %rbx
|
168
|
+
|
169
|
+
/* If (op3 + 20 > olimit) */
|
170
|
+
movq %op3, %rax /* rax = op3 */
|
171
|
+
addq $20, %rax /* rax = op3 + 20 */
|
172
|
+
cmpq %rax, %olimit /* op3 + 20 > olimit */
|
173
|
+
jb .L_4X1_exit
|
174
|
+
|
175
|
+
/* If (ip1 < ip0) go to exit */
|
176
|
+
cmpq %ip0, %ip1
|
177
|
+
jb .L_4X1_exit
|
178
|
+
|
179
|
+
/* If (ip2 < ip1) go to exit */
|
180
|
+
cmpq %ip1, %ip2
|
181
|
+
jb .L_4X1_exit
|
182
|
+
|
183
|
+
/* If (ip3 < ip2) go to exit */
|
184
|
+
cmpq %ip2, %ip3
|
185
|
+
jb .L_4X1_exit
|
186
|
+
|
187
|
+
/* Reads top 11 bits from bits[n]
|
188
|
+
* Loads dt[bits[n]] into var[n]
|
189
|
+
*/
|
190
|
+
#define GET_NEXT_DELT(n) \
|
191
|
+
movq $53, %var##n; \
|
192
|
+
shrxq %var##n, %bits##n, %var##n; \
|
193
|
+
movzwl (%dtable,%var##n,2),%vard##n
|
194
|
+
|
195
|
+
/* var[n] must contain the DTable entry computed with GET_NEXT_DELT
|
196
|
+
* Moves var[n] to %rax
|
197
|
+
* bits[n] <<= var[n] & 63
|
198
|
+
* op[n][idx] = %rax >> 8
|
199
|
+
* %ah is a way to access bits [8, 16) of %rax
|
200
|
+
*/
|
201
|
+
#define DECODE_FROM_DELT(n, idx) \
|
202
|
+
movq %var##n, %rax; \
|
203
|
+
shlxq %var##n, %bits##n, %bits##n; \
|
204
|
+
movb %ah, idx(%op##n)
|
205
|
+
|
206
|
+
/* Assumes GET_NEXT_DELT has been called.
|
207
|
+
* Calls DECODE_FROM_DELT then GET_NEXT_DELT
|
208
|
+
*/
|
209
|
+
#define DECODE_AND_GET_NEXT(n, idx) \
|
210
|
+
DECODE_FROM_DELT(n, idx); \
|
211
|
+
GET_NEXT_DELT(n) \
|
212
|
+
|
213
|
+
/* // ctz & nbBytes is stored in bits[n]
|
214
|
+
* // nbBits is stored in %rax
|
215
|
+
* ctz = CTZ[bits[n]]
|
216
|
+
* nbBits = ctz & 7
|
217
|
+
* nbBytes = ctz >> 3
|
218
|
+
* op[n] += 5
|
219
|
+
* ip[n] -= nbBytes
|
220
|
+
* // Note: x86-64 is little-endian ==> no bswap
|
221
|
+
* bits[n] = MEM_readST(ip[n]) | 1
|
222
|
+
* bits[n] <<= nbBits
|
223
|
+
*/
|
224
|
+
#define RELOAD_BITS(n) \
|
225
|
+
bsfq %bits##n, %bits##n; \
|
226
|
+
movq %bits##n, %rax; \
|
227
|
+
andq $7, %rax; \
|
228
|
+
shrq $3, %bits##n; \
|
229
|
+
leaq 5(%op##n), %op##n; \
|
230
|
+
subq %bits##n, %ip##n; \
|
231
|
+
movq (%ip##n), %bits##n; \
|
232
|
+
orq $1, %bits##n; \
|
233
|
+
shlx %rax, %bits##n, %bits##n
|
234
|
+
|
235
|
+
/* Store clobbered variables on the stack */
|
236
|
+
movq %olimit, 24(%rsp)
|
237
|
+
movq %ip1, 0(%rsp)
|
238
|
+
movq %ip2, 8(%rsp)
|
239
|
+
movq %ip3, 16(%rsp)
|
240
|
+
|
241
|
+
/* Call GET_NEXT_DELT for each stream */
|
242
|
+
FOR_EACH_STREAM(GET_NEXT_DELT)
|
243
|
+
|
244
|
+
.p2align 6
|
245
|
+
|
246
|
+
.L_4X1_loop_body:
|
247
|
+
/* Decode 5 symbols in each of the 4 streams (20 total)
|
248
|
+
* Must have called GET_NEXT_DELT for each stream
|
249
|
+
*/
|
250
|
+
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 0)
|
251
|
+
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 1)
|
252
|
+
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 2)
|
253
|
+
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 3)
|
254
|
+
FOR_EACH_STREAM_WITH_INDEX(DECODE_FROM_DELT, 4)
|
255
|
+
|
256
|
+
/* Load ip[1,2,3] from stack (var[] aliases them)
|
257
|
+
* ip[] is needed for RELOAD_BITS
|
258
|
+
* Each will be stored back to the stack after RELOAD
|
259
|
+
*/
|
260
|
+
movq 0(%rsp), %ip1
|
261
|
+
movq 8(%rsp), %ip2
|
262
|
+
movq 16(%rsp), %ip3
|
263
|
+
|
264
|
+
/* Reload each stream & fetch the next table entry
|
265
|
+
* to prepare for the next iteration
|
266
|
+
*/
|
267
|
+
RELOAD_BITS(0)
|
268
|
+
GET_NEXT_DELT(0)
|
269
|
+
|
270
|
+
RELOAD_BITS(1)
|
271
|
+
movq %ip1, 0(%rsp)
|
272
|
+
GET_NEXT_DELT(1)
|
273
|
+
|
274
|
+
RELOAD_BITS(2)
|
275
|
+
movq %ip2, 8(%rsp)
|
276
|
+
GET_NEXT_DELT(2)
|
277
|
+
|
278
|
+
RELOAD_BITS(3)
|
279
|
+
movq %ip3, 16(%rsp)
|
280
|
+
GET_NEXT_DELT(3)
|
281
|
+
|
282
|
+
/* If op3 < olimit: continue the loop */
|
283
|
+
cmp %op3, 24(%rsp)
|
284
|
+
ja .L_4X1_loop_body
|
285
|
+
|
286
|
+
/* Reload ip[1,2,3] from stack */
|
287
|
+
movq 0(%rsp), %ip1
|
288
|
+
movq 8(%rsp), %ip2
|
289
|
+
movq 16(%rsp), %ip3
|
290
|
+
|
291
|
+
/* Re-compute olimit */
|
292
|
+
jmp .L_4X1_compute_olimit
|
293
|
+
|
294
|
+
#undef GET_NEXT_DELT
|
295
|
+
#undef DECODE_FROM_DELT
|
296
|
+
#undef DECODE
|
297
|
+
#undef RELOAD_BITS
|
298
|
+
.L_4X1_exit:
|
299
|
+
addq $24, %rsp
|
300
|
+
|
301
|
+
/* Restore stack (oend & olimit) */
|
302
|
+
pop %rax /* olimit */
|
303
|
+
pop %rax /* oend */
|
304
|
+
pop %rax /* ilimit */
|
305
|
+
pop %rax /* arg */
|
306
|
+
|
307
|
+
/* Save ip / op / bits */
|
308
|
+
movq %ip0, 0(%rax)
|
309
|
+
movq %ip1, 8(%rax)
|
310
|
+
movq %ip2, 16(%rax)
|
311
|
+
movq %ip3, 24(%rax)
|
312
|
+
movq %op0, 32(%rax)
|
313
|
+
movq %op1, 40(%rax)
|
314
|
+
movq %op2, 48(%rax)
|
315
|
+
movq %op3, 56(%rax)
|
316
|
+
movq %bits0, 64(%rax)
|
317
|
+
movq %bits1, 72(%rax)
|
318
|
+
movq %bits2, 80(%rax)
|
319
|
+
movq %bits3, 88(%rax)
|
320
|
+
|
321
|
+
/* Restore registers */
|
322
|
+
pop %r15
|
323
|
+
pop %r14
|
324
|
+
pop %r13
|
325
|
+
pop %r12
|
326
|
+
pop %r11
|
327
|
+
pop %r10
|
328
|
+
pop %r9
|
329
|
+
pop %r8
|
330
|
+
pop %rdi
|
331
|
+
pop %rsi
|
332
|
+
pop %rbp
|
333
|
+
pop %rdx
|
334
|
+
pop %rcx
|
335
|
+
pop %rbx
|
336
|
+
pop %rax
|
337
|
+
ret
|
338
|
+
|
339
|
+
_HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
|
340
|
+
HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
|
341
|
+
/* Save all registers - even if they are callee saved for simplicity. */
|
342
|
+
push %rax
|
343
|
+
push %rbx
|
344
|
+
push %rcx
|
345
|
+
push %rdx
|
346
|
+
push %rbp
|
347
|
+
push %rsi
|
348
|
+
push %rdi
|
349
|
+
push %r8
|
350
|
+
push %r9
|
351
|
+
push %r10
|
352
|
+
push %r11
|
353
|
+
push %r12
|
354
|
+
push %r13
|
355
|
+
push %r14
|
356
|
+
push %r15
|
357
|
+
|
358
|
+
movq %rdi, %rax
|
359
|
+
movq 0(%rax), %ip0
|
360
|
+
movq 8(%rax), %ip1
|
361
|
+
movq 16(%rax), %ip2
|
362
|
+
movq 24(%rax), %ip3
|
363
|
+
movq 32(%rax), %op0
|
364
|
+
movq 40(%rax), %op1
|
365
|
+
movq 48(%rax), %op2
|
366
|
+
movq 56(%rax), %op3
|
367
|
+
movq 64(%rax), %bits0
|
368
|
+
movq 72(%rax), %bits1
|
369
|
+
movq 80(%rax), %bits2
|
370
|
+
movq 88(%rax), %bits3
|
371
|
+
movq 96(%rax), %dtable
|
372
|
+
push %rax /* argument */
|
373
|
+
push %rax /* olimit */
|
374
|
+
push 104(%rax) /* ilimit */
|
375
|
+
|
376
|
+
movq 112(%rax), %rax
|
377
|
+
push %rax /* oend3 */
|
378
|
+
|
379
|
+
movq %op3, %rax
|
380
|
+
push %rax /* oend2 */
|
381
|
+
|
382
|
+
movq %op2, %rax
|
383
|
+
push %rax /* oend1 */
|
384
|
+
|
385
|
+
movq %op1, %rax
|
386
|
+
push %rax /* oend0 */
|
387
|
+
|
388
|
+
/* Scratch space */
|
389
|
+
subq $8, %rsp
|
390
|
+
|
391
|
+
.L_4X2_compute_olimit:
|
392
|
+
/* Computes how many iterations we can do safely
|
393
|
+
* %r15, %rax may be clobbered
|
394
|
+
* rdx must be saved
|
395
|
+
* op[1,2,3,4] & ip0 mustn't be clobbered
|
396
|
+
*/
|
397
|
+
movq %rdx, 0(%rsp)
|
398
|
+
|
399
|
+
/* We can consume up to 7 input bytes each iteration. */
|
400
|
+
movq %ip0, %rax /* rax = ip0 */
|
401
|
+
movq 40(%rsp), %rdx /* rdx = ilimit */
|
402
|
+
subq %rdx, %rax /* rax = ip0 - ilimit */
|
403
|
+
movq %rax, %r15 /* r15 = ip0 - ilimit */
|
404
|
+
|
405
|
+
/* rdx = rax / 7 */
|
406
|
+
movabsq $2635249153387078803, %rdx
|
407
|
+
mulq %rdx
|
408
|
+
subq %rdx, %r15
|
409
|
+
shrq %r15
|
410
|
+
addq %r15, %rdx
|
411
|
+
shrq $2, %rdx
|
412
|
+
|
413
|
+
/* r15 = (ip0 - ilimit) / 7 */
|
414
|
+
movq %rdx, %r15
|
415
|
+
|
416
|
+
movabsq $-3689348814741910323, %rdx
|
417
|
+
movq 8(%rsp), %rax /* rax = oend0 */
|
418
|
+
subq %op0, %rax /* rax = oend0 - op0 */
|
419
|
+
mulq %rdx
|
420
|
+
shrq $3, %rdx /* rdx = rax / 10 */
|
421
|
+
|
422
|
+
/* r15 = min(%rdx, %r15) */
|
423
|
+
cmpq %rdx, %r15
|
424
|
+
cmova %rdx, %r15
|
425
|
+
|
426
|
+
movabsq $-3689348814741910323, %rdx
|
427
|
+
movq 16(%rsp), %rax /* rax = oend1 */
|
428
|
+
subq %op1, %rax /* rax = oend1 - op1 */
|
429
|
+
mulq %rdx
|
430
|
+
shrq $3, %rdx /* rdx = rax / 10 */
|
431
|
+
|
432
|
+
/* r15 = min(%rdx, %r15) */
|
433
|
+
cmpq %rdx, %r15
|
434
|
+
cmova %rdx, %r15
|
435
|
+
|
436
|
+
movabsq $-3689348814741910323, %rdx
|
437
|
+
movq 24(%rsp), %rax /* rax = oend2 */
|
438
|
+
subq %op2, %rax /* rax = oend2 - op2 */
|
439
|
+
mulq %rdx
|
440
|
+
shrq $3, %rdx /* rdx = rax / 10 */
|
441
|
+
|
442
|
+
/* r15 = min(%rdx, %r15) */
|
443
|
+
cmpq %rdx, %r15
|
444
|
+
cmova %rdx, %r15
|
445
|
+
|
446
|
+
movabsq $-3689348814741910323, %rdx
|
447
|
+
movq 32(%rsp), %rax /* rax = oend3 */
|
448
|
+
subq %op3, %rax /* rax = oend3 - op3 */
|
449
|
+
mulq %rdx
|
450
|
+
shrq $3, %rdx /* rdx = rax / 10 */
|
451
|
+
|
452
|
+
/* r15 = min(%rdx, %r15) */
|
453
|
+
cmpq %rdx, %r15
|
454
|
+
cmova %rdx, %r15
|
455
|
+
|
456
|
+
/* olimit = op3 + 5 * r15 */
|
457
|
+
movq %r15, %rax
|
458
|
+
leaq (%op3, %rax, 4), %olimit
|
459
|
+
addq %rax, %olimit
|
460
|
+
|
461
|
+
movq 0(%rsp), %rdx
|
462
|
+
|
463
|
+
/* If (op3 + 10 > olimit) */
|
464
|
+
movq %op3, %rax /* rax = op3 */
|
465
|
+
addq $10, %rax /* rax = op3 + 10 */
|
466
|
+
cmpq %rax, %olimit /* op3 + 10 > olimit */
|
467
|
+
jb .L_4X2_exit
|
468
|
+
|
469
|
+
/* If (ip1 < ip0) go to exit */
|
470
|
+
cmpq %ip0, %ip1
|
471
|
+
jb .L_4X2_exit
|
472
|
+
|
473
|
+
/* If (ip2 < ip1) go to exit */
|
474
|
+
cmpq %ip1, %ip2
|
475
|
+
jb .L_4X2_exit
|
476
|
+
|
477
|
+
/* If (ip3 < ip2) go to exit */
|
478
|
+
cmpq %ip2, %ip3
|
479
|
+
jb .L_4X2_exit
|
480
|
+
|
481
|
+
#define DECODE(n, idx) \
|
482
|
+
movq %bits##n, %rax; \
|
483
|
+
shrq $53, %rax; \
|
484
|
+
movzwl 0(%dtable,%rax,4),%r8d; \
|
485
|
+
movzbl 2(%dtable,%rax,4),%r15d; \
|
486
|
+
movzbl 3(%dtable,%rax,4),%eax; \
|
487
|
+
movw %r8w, (%op##n); \
|
488
|
+
shlxq %r15, %bits##n, %bits##n; \
|
489
|
+
addq %rax, %op##n
|
490
|
+
|
491
|
+
#define RELOAD_BITS(n) \
|
492
|
+
bsfq %bits##n, %bits##n; \
|
493
|
+
movq %bits##n, %rax; \
|
494
|
+
shrq $3, %bits##n; \
|
495
|
+
andq $7, %rax; \
|
496
|
+
subq %bits##n, %ip##n; \
|
497
|
+
movq (%ip##n), %bits##n; \
|
498
|
+
orq $1, %bits##n; \
|
499
|
+
shlxq %rax, %bits##n, %bits##n
|
500
|
+
|
501
|
+
|
502
|
+
movq %olimit, 48(%rsp)
|
503
|
+
|
504
|
+
.p2align 6
|
505
|
+
|
506
|
+
.L_4X2_loop_body:
|
507
|
+
/* We clobber r8, so store it on the stack */
|
508
|
+
movq %r8, 0(%rsp)
|
509
|
+
|
510
|
+
/* Decode 5 symbols from each of the 4 streams (20 symbols total). */
|
511
|
+
FOR_EACH_STREAM_WITH_INDEX(DECODE, 0)
|
512
|
+
FOR_EACH_STREAM_WITH_INDEX(DECODE, 1)
|
513
|
+
FOR_EACH_STREAM_WITH_INDEX(DECODE, 2)
|
514
|
+
FOR_EACH_STREAM_WITH_INDEX(DECODE, 3)
|
515
|
+
FOR_EACH_STREAM_WITH_INDEX(DECODE, 4)
|
516
|
+
|
517
|
+
/* Reload r8 */
|
518
|
+
movq 0(%rsp), %r8
|
519
|
+
|
520
|
+
FOR_EACH_STREAM(RELOAD_BITS)
|
521
|
+
|
522
|
+
cmp %op3, 48(%rsp)
|
523
|
+
ja .L_4X2_loop_body
|
524
|
+
jmp .L_4X2_compute_olimit
|
525
|
+
|
526
|
+
#undef DECODE
|
527
|
+
#undef RELOAD_BITS
|
528
|
+
.L_4X2_exit:
|
529
|
+
addq $8, %rsp
|
530
|
+
/* Restore stack (oend & olimit) */
|
531
|
+
pop %rax /* oend0 */
|
532
|
+
pop %rax /* oend1 */
|
533
|
+
pop %rax /* oend2 */
|
534
|
+
pop %rax /* oend3 */
|
535
|
+
pop %rax /* ilimit */
|
536
|
+
pop %rax /* olimit */
|
537
|
+
pop %rax /* arg */
|
538
|
+
|
539
|
+
/* Save ip / op / bits */
|
540
|
+
movq %ip0, 0(%rax)
|
541
|
+
movq %ip1, 8(%rax)
|
542
|
+
movq %ip2, 16(%rax)
|
543
|
+
movq %ip3, 24(%rax)
|
544
|
+
movq %op0, 32(%rax)
|
545
|
+
movq %op1, 40(%rax)
|
546
|
+
movq %op2, 48(%rax)
|
547
|
+
movq %op3, 56(%rax)
|
548
|
+
movq %bits0, 64(%rax)
|
549
|
+
movq %bits1, 72(%rax)
|
550
|
+
movq %bits2, 80(%rax)
|
551
|
+
movq %bits3, 88(%rax)
|
552
|
+
|
553
|
+
/* Restore registers */
|
554
|
+
pop %r15
|
555
|
+
pop %r14
|
556
|
+
pop %r13
|
557
|
+
pop %r12
|
558
|
+
pop %r11
|
559
|
+
pop %r10
|
560
|
+
pop %r9
|
561
|
+
pop %r8
|
562
|
+
pop %rdi
|
563
|
+
pop %rsi
|
564
|
+
pop %rbp
|
565
|
+
pop %rdx
|
566
|
+
pop %rcx
|
567
|
+
pop %rbx
|
568
|
+
pop %rax
|
569
|
+
ret
|
570
|
+
|
571
|
+
#endif
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -14,18 +14,18 @@
|
|
14
14
|
/*-*******************************************************
|
15
15
|
* Dependencies
|
16
16
|
*********************************************************/
|
17
|
-
#include
|
18
|
-
#include "cpu.h" /* bmi2 */
|
19
|
-
#include "mem.h" /* low level memory routines */
|
17
|
+
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
18
|
+
#include "../common/cpu.h" /* bmi2 */
|
19
|
+
#include "../common/mem.h" /* low level memory routines */
|
20
20
|
#define FSE_STATIC_LINKING_ONLY
|
21
|
-
#include "fse.h"
|
21
|
+
#include "../common/fse.h"
|
22
22
|
#define HUF_STATIC_LINKING_ONLY
|
23
|
-
#include "huf.h"
|
23
|
+
#include "../common/huf.h"
|
24
24
|
#include "zstd_decompress_internal.h"
|
25
25
|
#include "zstd_ddict.h"
|
26
26
|
|
27
27
|
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
28
|
-
# include "zstd_legacy.h"
|
28
|
+
# include "../legacy/zstd_legacy.h"
|
29
29
|
#endif
|
30
30
|
|
31
31
|
|
@@ -65,6 +65,10 @@ void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
|
|
65
65
|
dctx->virtualStart = ddict->dictContent;
|
66
66
|
dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
|
67
67
|
dctx->previousDstEnd = dctx->dictEnd;
|
68
|
+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
69
|
+
dctx->dictContentBeginForFuzzing = dctx->prefixStart;
|
70
|
+
dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
|
71
|
+
#endif
|
68
72
|
if (ddict->entropyPresent) {
|
69
73
|
dctx->litEntropy = 1;
|
70
74
|
dctx->fseEntropy = 1;
|
@@ -107,7 +111,7 @@ ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
|
|
107
111
|
/* load entropy tables */
|
108
112
|
RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
|
109
113
|
&ddict->entropy, ddict->dictContent, ddict->dictSize)),
|
110
|
-
dictionary_corrupted);
|
114
|
+
dictionary_corrupted, "");
|
111
115
|
ddict->entropyPresent = 1;
|
112
116
|
return 0;
|
113
117
|
}
|
@@ -123,17 +127,17 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
|
|
123
127
|
ddict->dictContent = dict;
|
124
128
|
if (!dict) dictSize = 0;
|
125
129
|
} else {
|
126
|
-
void* const internalBuffer =
|
130
|
+
void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
|
127
131
|
ddict->dictBuffer = internalBuffer;
|
128
132
|
ddict->dictContent = internalBuffer;
|
129
133
|
if (!internalBuffer) return ERROR(memory_allocation);
|
130
|
-
|
134
|
+
ZSTD_memcpy(internalBuffer, dict, dictSize);
|
131
135
|
}
|
132
136
|
ddict->dictSize = dictSize;
|
133
137
|
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
|
134
138
|
|
135
139
|
/* parse dictionary content */
|
136
|
-
FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
|
140
|
+
FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
|
137
141
|
|
138
142
|
return 0;
|
139
143
|
}
|
@@ -143,9 +147,9 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
|
|
143
147
|
ZSTD_dictContentType_e dictContentType,
|
144
148
|
ZSTD_customMem customMem)
|
145
149
|
{
|
146
|
-
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
|
150
|
+
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
|
147
151
|
|
148
|
-
{ ZSTD_DDict* const ddict = (ZSTD_DDict*)
|
152
|
+
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
|
149
153
|
if (ddict == NULL) return NULL;
|
150
154
|
ddict->cMem = customMem;
|
151
155
|
{ size_t const initResult = ZSTD_initDDict_internal(ddict,
|
@@ -194,7 +198,7 @@ const ZSTD_DDict* ZSTD_initStaticDDict(
|
|
194
198
|
if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
|
195
199
|
if (sBufferSize < neededSpace) return NULL;
|
196
200
|
if (dictLoadMethod == ZSTD_dlm_byCopy) {
|
197
|
-
|
201
|
+
ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */
|
198
202
|
dict = ddict+1;
|
199
203
|
}
|
200
204
|
if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
|
@@ -209,8 +213,8 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
|
|
209
213
|
{
|
210
214
|
if (ddict==NULL) return 0; /* support free on NULL */
|
211
215
|
{ ZSTD_customMem const cMem = ddict->cMem;
|
212
|
-
|
213
|
-
|
216
|
+
ZSTD_customFree(ddict->dictBuffer, cMem);
|
217
|
+
ZSTD_customFree(ddict, cMem);
|
214
218
|
return 0;
|
215
219
|
}
|
216
220
|
}
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -15,8 +15,8 @@
|
|
15
15
|
/*-*******************************************************
|
16
16
|
* Dependencies
|
17
17
|
*********************************************************/
|
18
|
-
#include
|
19
|
-
#include "zstd.h" /* ZSTD_DDict, and several public functions */
|
18
|
+
#include "../common/zstd_deps.h" /* size_t */
|
19
|
+
#include "../zstd.h" /* ZSTD_DDict, and several public functions */
|
20
20
|
|
21
21
|
|
22
22
|
/*-*******************************************************
|