zstd-ruby 1.4.4.0 → 1.5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/extconf.rb +1 -0
  6. data/ext/zstdruby/libzstd/BUCK +5 -7
  7. data/ext/zstdruby/libzstd/Makefile +241 -173
  8. data/ext/zstdruby/libzstd/README.md +76 -18
  9. data/ext/zstdruby/libzstd/common/bitstream.h +75 -57
  10. data/ext/zstdruby/libzstd/common/compiler.h +196 -20
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  13. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +208 -76
  15. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  16. data/ext/zstdruby/libzstd/common/error_private.h +87 -4
  17. data/ext/zstdruby/libzstd/common/fse.h +51 -42
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -57
  19. data/ext/zstdruby/libzstd/common/huf.h +60 -54
  20. data/ext/zstdruby/libzstd/common/mem.h +87 -98
  21. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  22. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +10 -8
  25. data/ext/zstdruby/libzstd/common/threading.h +4 -3
  26. data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +252 -108
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +105 -85
  34. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  35. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +831 -259
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3213 -1007
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +493 -71
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +21 -16
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +51 -24
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +573 -0
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +208 -81
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +315 -137
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +319 -128
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1156 -171
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +331 -206
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +403 -226
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +188 -453
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1065 -410
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +691 -230
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1072 -323
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +16 -7
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +71 -10
  67. data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
  68. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  69. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
  70. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  71. data/ext/zstdruby/libzstd/dictBuilder/cover.c +57 -40
  72. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  73. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  74. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +54 -35
  75. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +151 -57
  76. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  77. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  78. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
  79. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +25 -19
  80. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +18 -14
  82. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +18 -14
  84. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  85. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +22 -16
  86. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -25
  88. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  89. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -25
  90. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  91. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -26
  92. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  93. data/ext/zstdruby/libzstd/libzstd.mk +185 -0
  94. data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
  95. data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
  96. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
  97. data/ext/zstdruby/libzstd/zstd.h +760 -234
  98. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
  99. data/ext/zstdruby/zstdruby.c +2 -2
  100. data/lib/zstd-ruby/version.rb +1 -1
  101. metadata +20 -9
  102. data/.travis.yml +0 -14
@@ -0,0 +1,571 @@
1
+ #include "../common/portability_macros.h"
2
+
3
+ #if ZSTD_ENABLE_ASM_X86_64_BMI2
4
+
5
+ /* Stack marking
6
+ * ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
7
+ */
8
+ #if defined(__linux__) && defined(__ELF__)
9
+ .section .note.GNU-stack,"",%progbits
10
+ #endif
11
+
12
+ /* Calling convention:
13
+ *
14
+ * %rdi contains the first argument: HUF_DecompressAsmArgs*.
15
+ * %rbp isn't maintained (no frame pointer).
16
+ * %rsp contains the stack pointer that grows down.
17
+ * No red-zone is assumed, only addresses >= %rsp are used.
18
+ * All register contents are preserved.
19
+ *
20
+ * TODO: Support Windows calling convention.
21
+ */
22
+
23
+ .global HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
24
+ .global HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
25
+ .global _HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
26
+ .global _HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
27
+ .text
28
+
29
+ /* Sets up register mappings for clarity.
30
+ * op[], bits[], dtable & ip[0] each get their own register.
31
+ * ip[1,2,3] & olimit alias var[].
32
+ * %rax is a scratch register.
33
+ */
34
+
35
+ #define op0 rsi
36
+ #define op1 rbx
37
+ #define op2 rcx
38
+ #define op3 rdi
39
+
40
+ #define ip0 r8
41
+ #define ip1 r9
42
+ #define ip2 r10
43
+ #define ip3 r11
44
+
45
+ #define bits0 rbp
46
+ #define bits1 rdx
47
+ #define bits2 r12
48
+ #define bits3 r13
49
+ #define dtable r14
50
+ #define olimit r15
51
+
52
+ /* var[] aliases ip[1,2,3] & olimit
53
+ * ip[1,2,3] are saved every iteration.
54
+ * olimit is only used in compute_olimit.
55
+ */
56
+ #define var0 r15
57
+ #define var1 r9
58
+ #define var2 r10
59
+ #define var3 r11
60
+
61
+ /* 32-bit var registers */
62
+ #define vard0 r15d
63
+ #define vard1 r9d
64
+ #define vard2 r10d
65
+ #define vard3 r11d
66
+
67
+ /* Calls X(N) for each stream 0, 1, 2, 3. */
68
+ #define FOR_EACH_STREAM(X) \
69
+ X(0); \
70
+ X(1); \
71
+ X(2); \
72
+ X(3)
73
+
74
+ /* Calls X(N, idx) for each stream 0, 1, 2, 3. */
75
+ #define FOR_EACH_STREAM_WITH_INDEX(X, idx) \
76
+ X(0, idx); \
77
+ X(1, idx); \
78
+ X(2, idx); \
79
+ X(3, idx)
80
+
81
+ /* Define both _HUF_* & HUF_* symbols because MacOS
82
+ * C symbols are prefixed with '_' & Linux symbols aren't.
83
+ */
84
+ _HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
85
+ HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
86
+ /* Save all registers - even if they are callee saved for simplicity. */
87
+ push %rax
88
+ push %rbx
89
+ push %rcx
90
+ push %rdx
91
+ push %rbp
92
+ push %rsi
93
+ push %rdi
94
+ push %r8
95
+ push %r9
96
+ push %r10
97
+ push %r11
98
+ push %r12
99
+ push %r13
100
+ push %r14
101
+ push %r15
102
+
103
+ /* Read HUF_DecompressAsmArgs* args from %rax */
104
+ movq %rdi, %rax
105
+ movq 0(%rax), %ip0
106
+ movq 8(%rax), %ip1
107
+ movq 16(%rax), %ip2
108
+ movq 24(%rax), %ip3
109
+ movq 32(%rax), %op0
110
+ movq 40(%rax), %op1
111
+ movq 48(%rax), %op2
112
+ movq 56(%rax), %op3
113
+ movq 64(%rax), %bits0
114
+ movq 72(%rax), %bits1
115
+ movq 80(%rax), %bits2
116
+ movq 88(%rax), %bits3
117
+ movq 96(%rax), %dtable
118
+ push %rax /* argument */
119
+ push 104(%rax) /* ilimit */
120
+ push 112(%rax) /* oend */
121
+ push %olimit /* olimit space */
122
+
123
+ subq $24, %rsp
124
+
125
+ .L_4X1_compute_olimit:
126
+ /* Computes how many iterations we can do safely
127
+ * %r15, %rax may be clobbered
128
+ * rbx, rdx must be saved
129
+ * op3 & ip0 mustn't be clobbered
130
+ */
131
+ movq %rbx, 0(%rsp)
132
+ movq %rdx, 8(%rsp)
133
+
134
+ movq 32(%rsp), %rax /* rax = oend */
135
+ subq %op3, %rax /* rax = oend - op3 */
136
+
137
+ /* r15 = (oend - op3) / 5 */
138
+ movabsq $-3689348814741910323, %rdx
139
+ mulq %rdx
140
+ movq %rdx, %r15
141
+ shrq $2, %r15
142
+
143
+ movq %ip0, %rax /* rax = ip0 */
144
+ movq 40(%rsp), %rdx /* rdx = ilimit */
145
+ subq %rdx, %rax /* rax = ip0 - ilimit */
146
+ movq %rax, %rbx /* rbx = ip0 - ilimit */
147
+
148
+ /* rdx = (ip0 - ilimit) / 7 */
149
+ movabsq $2635249153387078803, %rdx
150
+ mulq %rdx
151
+ subq %rdx, %rbx
152
+ shrq %rbx
153
+ addq %rbx, %rdx
154
+ shrq $2, %rdx
155
+
156
+ /* r15 = min(%rdx, %r15) */
157
+ cmpq %rdx, %r15
158
+ cmova %rdx, %r15
159
+
160
+ /* r15 = r15 * 5 */
161
+ leaq (%r15, %r15, 4), %r15
162
+
163
+ /* olimit = op3 + r15 */
164
+ addq %op3, %olimit
165
+
166
+ movq 8(%rsp), %rdx
167
+ movq 0(%rsp), %rbx
168
+
169
+ /* If (op3 + 20 > olimit) */
170
+ movq %op3, %rax /* rax = op3 */
171
+ addq $20, %rax /* rax = op3 + 20 */
172
+ cmpq %rax, %olimit /* op3 + 20 > olimit */
173
+ jb .L_4X1_exit
174
+
175
+ /* If (ip1 < ip0) go to exit */
176
+ cmpq %ip0, %ip1
177
+ jb .L_4X1_exit
178
+
179
+ /* If (ip2 < ip1) go to exit */
180
+ cmpq %ip1, %ip2
181
+ jb .L_4X1_exit
182
+
183
+ /* If (ip3 < ip2) go to exit */
184
+ cmpq %ip2, %ip3
185
+ jb .L_4X1_exit
186
+
187
+ /* Reads top 11 bits from bits[n]
188
+ * Loads dt[bits[n]] into var[n]
189
+ */
190
+ #define GET_NEXT_DELT(n) \
191
+ movq $53, %var##n; \
192
+ shrxq %var##n, %bits##n, %var##n; \
193
+ movzwl (%dtable,%var##n,2),%vard##n
194
+
195
+ /* var[n] must contain the DTable entry computed with GET_NEXT_DELT
196
+ * Moves var[n] to %rax
197
+ * bits[n] <<= var[n] & 63
198
+ * op[n][idx] = %rax >> 8
199
+ * %ah is a way to access bits [8, 16) of %rax
200
+ */
201
+ #define DECODE_FROM_DELT(n, idx) \
202
+ movq %var##n, %rax; \
203
+ shlxq %var##n, %bits##n, %bits##n; \
204
+ movb %ah, idx(%op##n)
205
+
206
+ /* Assumes GET_NEXT_DELT has been called.
207
+ * Calls DECODE_FROM_DELT then GET_NEXT_DELT
208
+ */
209
+ #define DECODE_AND_GET_NEXT(n, idx) \
210
+ DECODE_FROM_DELT(n, idx); \
211
+ GET_NEXT_DELT(n) \
212
+
213
+ /* // ctz & nbBytes is stored in bits[n]
214
+ * // nbBits is stored in %rax
215
+ * ctz = CTZ[bits[n]]
216
+ * nbBits = ctz & 7
217
+ * nbBytes = ctz >> 3
218
+ * op[n] += 5
219
+ * ip[n] -= nbBytes
220
+ * // Note: x86-64 is little-endian ==> no bswap
221
+ * bits[n] = MEM_readST(ip[n]) | 1
222
+ * bits[n] <<= nbBits
223
+ */
224
+ #define RELOAD_BITS(n) \
225
+ bsfq %bits##n, %bits##n; \
226
+ movq %bits##n, %rax; \
227
+ andq $7, %rax; \
228
+ shrq $3, %bits##n; \
229
+ leaq 5(%op##n), %op##n; \
230
+ subq %bits##n, %ip##n; \
231
+ movq (%ip##n), %bits##n; \
232
+ orq $1, %bits##n; \
233
+ shlx %rax, %bits##n, %bits##n
234
+
235
+ /* Store clobbered variables on the stack */
236
+ movq %olimit, 24(%rsp)
237
+ movq %ip1, 0(%rsp)
238
+ movq %ip2, 8(%rsp)
239
+ movq %ip3, 16(%rsp)
240
+
241
+ /* Call GET_NEXT_DELT for each stream */
242
+ FOR_EACH_STREAM(GET_NEXT_DELT)
243
+
244
+ .p2align 6
245
+
246
+ .L_4X1_loop_body:
247
+ /* Decode 5 symbols in each of the 4 streams (20 total)
248
+ * Must have called GET_NEXT_DELT for each stream
249
+ */
250
+ FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 0)
251
+ FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 1)
252
+ FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 2)
253
+ FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 3)
254
+ FOR_EACH_STREAM_WITH_INDEX(DECODE_FROM_DELT, 4)
255
+
256
+ /* Load ip[1,2,3] from stack (var[] aliases them)
257
+ * ip[] is needed for RELOAD_BITS
258
+ * Each will be stored back to the stack after RELOAD
259
+ */
260
+ movq 0(%rsp), %ip1
261
+ movq 8(%rsp), %ip2
262
+ movq 16(%rsp), %ip3
263
+
264
+ /* Reload each stream & fetch the next table entry
265
+ * to prepare for the next iteration
266
+ */
267
+ RELOAD_BITS(0)
268
+ GET_NEXT_DELT(0)
269
+
270
+ RELOAD_BITS(1)
271
+ movq %ip1, 0(%rsp)
272
+ GET_NEXT_DELT(1)
273
+
274
+ RELOAD_BITS(2)
275
+ movq %ip2, 8(%rsp)
276
+ GET_NEXT_DELT(2)
277
+
278
+ RELOAD_BITS(3)
279
+ movq %ip3, 16(%rsp)
280
+ GET_NEXT_DELT(3)
281
+
282
+ /* If op3 < olimit: continue the loop */
283
+ cmp %op3, 24(%rsp)
284
+ ja .L_4X1_loop_body
285
+
286
+ /* Reload ip[1,2,3] from stack */
287
+ movq 0(%rsp), %ip1
288
+ movq 8(%rsp), %ip2
289
+ movq 16(%rsp), %ip3
290
+
291
+ /* Re-compute olimit */
292
+ jmp .L_4X1_compute_olimit
293
+
294
+ #undef GET_NEXT_DELT
295
+ #undef DECODE_FROM_DELT
296
+ #undef DECODE
297
+ #undef RELOAD_BITS
298
+ .L_4X1_exit:
299
+ addq $24, %rsp
300
+
301
+ /* Restore stack (oend & olimit) */
302
+ pop %rax /* olimit */
303
+ pop %rax /* oend */
304
+ pop %rax /* ilimit */
305
+ pop %rax /* arg */
306
+
307
+ /* Save ip / op / bits */
308
+ movq %ip0, 0(%rax)
309
+ movq %ip1, 8(%rax)
310
+ movq %ip2, 16(%rax)
311
+ movq %ip3, 24(%rax)
312
+ movq %op0, 32(%rax)
313
+ movq %op1, 40(%rax)
314
+ movq %op2, 48(%rax)
315
+ movq %op3, 56(%rax)
316
+ movq %bits0, 64(%rax)
317
+ movq %bits1, 72(%rax)
318
+ movq %bits2, 80(%rax)
319
+ movq %bits3, 88(%rax)
320
+
321
+ /* Restore registers */
322
+ pop %r15
323
+ pop %r14
324
+ pop %r13
325
+ pop %r12
326
+ pop %r11
327
+ pop %r10
328
+ pop %r9
329
+ pop %r8
330
+ pop %rdi
331
+ pop %rsi
332
+ pop %rbp
333
+ pop %rdx
334
+ pop %rcx
335
+ pop %rbx
336
+ pop %rax
337
+ ret
338
+
339
+ _HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
340
+ HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
341
+ /* Save all registers - even if they are callee saved for simplicity. */
342
+ push %rax
343
+ push %rbx
344
+ push %rcx
345
+ push %rdx
346
+ push %rbp
347
+ push %rsi
348
+ push %rdi
349
+ push %r8
350
+ push %r9
351
+ push %r10
352
+ push %r11
353
+ push %r12
354
+ push %r13
355
+ push %r14
356
+ push %r15
357
+
358
+ movq %rdi, %rax
359
+ movq 0(%rax), %ip0
360
+ movq 8(%rax), %ip1
361
+ movq 16(%rax), %ip2
362
+ movq 24(%rax), %ip3
363
+ movq 32(%rax), %op0
364
+ movq 40(%rax), %op1
365
+ movq 48(%rax), %op2
366
+ movq 56(%rax), %op3
367
+ movq 64(%rax), %bits0
368
+ movq 72(%rax), %bits1
369
+ movq 80(%rax), %bits2
370
+ movq 88(%rax), %bits3
371
+ movq 96(%rax), %dtable
372
+ push %rax /* argument */
373
+ push %rax /* olimit */
374
+ push 104(%rax) /* ilimit */
375
+
376
+ movq 112(%rax), %rax
377
+ push %rax /* oend3 */
378
+
379
+ movq %op3, %rax
380
+ push %rax /* oend2 */
381
+
382
+ movq %op2, %rax
383
+ push %rax /* oend1 */
384
+
385
+ movq %op1, %rax
386
+ push %rax /* oend0 */
387
+
388
+ /* Scratch space */
389
+ subq $8, %rsp
390
+
391
+ .L_4X2_compute_olimit:
392
+ /* Computes how many iterations we can do safely
393
+ * %r15, %rax may be clobbered
394
+ * rdx must be saved
395
+ * op[1,2,3,4] & ip0 mustn't be clobbered
396
+ */
397
+ movq %rdx, 0(%rsp)
398
+
399
+ /* We can consume up to 7 input bytes each iteration. */
400
+ movq %ip0, %rax /* rax = ip0 */
401
+ movq 40(%rsp), %rdx /* rdx = ilimit */
402
+ subq %rdx, %rax /* rax = ip0 - ilimit */
403
+ movq %rax, %r15 /* r15 = ip0 - ilimit */
404
+
405
+ /* rdx = rax / 7 */
406
+ movabsq $2635249153387078803, %rdx
407
+ mulq %rdx
408
+ subq %rdx, %r15
409
+ shrq %r15
410
+ addq %r15, %rdx
411
+ shrq $2, %rdx
412
+
413
+ /* r15 = (ip0 - ilimit) / 7 */
414
+ movq %rdx, %r15
415
+
416
+ movabsq $-3689348814741910323, %rdx
417
+ movq 8(%rsp), %rax /* rax = oend0 */
418
+ subq %op0, %rax /* rax = oend0 - op0 */
419
+ mulq %rdx
420
+ shrq $3, %rdx /* rdx = rax / 10 */
421
+
422
+ /* r15 = min(%rdx, %r15) */
423
+ cmpq %rdx, %r15
424
+ cmova %rdx, %r15
425
+
426
+ movabsq $-3689348814741910323, %rdx
427
+ movq 16(%rsp), %rax /* rax = oend1 */
428
+ subq %op1, %rax /* rax = oend1 - op1 */
429
+ mulq %rdx
430
+ shrq $3, %rdx /* rdx = rax / 10 */
431
+
432
+ /* r15 = min(%rdx, %r15) */
433
+ cmpq %rdx, %r15
434
+ cmova %rdx, %r15
435
+
436
+ movabsq $-3689348814741910323, %rdx
437
+ movq 24(%rsp), %rax /* rax = oend2 */
438
+ subq %op2, %rax /* rax = oend2 - op2 */
439
+ mulq %rdx
440
+ shrq $3, %rdx /* rdx = rax / 10 */
441
+
442
+ /* r15 = min(%rdx, %r15) */
443
+ cmpq %rdx, %r15
444
+ cmova %rdx, %r15
445
+
446
+ movabsq $-3689348814741910323, %rdx
447
+ movq 32(%rsp), %rax /* rax = oend3 */
448
+ subq %op3, %rax /* rax = oend3 - op3 */
449
+ mulq %rdx
450
+ shrq $3, %rdx /* rdx = rax / 10 */
451
+
452
+ /* r15 = min(%rdx, %r15) */
453
+ cmpq %rdx, %r15
454
+ cmova %rdx, %r15
455
+
456
+ /* olimit = op3 + 5 * r15 */
457
+ movq %r15, %rax
458
+ leaq (%op3, %rax, 4), %olimit
459
+ addq %rax, %olimit
460
+
461
+ movq 0(%rsp), %rdx
462
+
463
+ /* If (op3 + 10 > olimit) */
464
+ movq %op3, %rax /* rax = op3 */
465
+ addq $10, %rax /* rax = op3 + 10 */
466
+ cmpq %rax, %olimit /* op3 + 10 > olimit */
467
+ jb .L_4X2_exit
468
+
469
+ /* If (ip1 < ip0) go to exit */
470
+ cmpq %ip0, %ip1
471
+ jb .L_4X2_exit
472
+
473
+ /* If (ip2 < ip1) go to exit */
474
+ cmpq %ip1, %ip2
475
+ jb .L_4X2_exit
476
+
477
+ /* If (ip3 < ip2) go to exit */
478
+ cmpq %ip2, %ip3
479
+ jb .L_4X2_exit
480
+
481
+ #define DECODE(n, idx) \
482
+ movq %bits##n, %rax; \
483
+ shrq $53, %rax; \
484
+ movzwl 0(%dtable,%rax,4),%r8d; \
485
+ movzbl 2(%dtable,%rax,4),%r15d; \
486
+ movzbl 3(%dtable,%rax,4),%eax; \
487
+ movw %r8w, (%op##n); \
488
+ shlxq %r15, %bits##n, %bits##n; \
489
+ addq %rax, %op##n
490
+
491
+ #define RELOAD_BITS(n) \
492
+ bsfq %bits##n, %bits##n; \
493
+ movq %bits##n, %rax; \
494
+ shrq $3, %bits##n; \
495
+ andq $7, %rax; \
496
+ subq %bits##n, %ip##n; \
497
+ movq (%ip##n), %bits##n; \
498
+ orq $1, %bits##n; \
499
+ shlxq %rax, %bits##n, %bits##n
500
+
501
+
502
+ movq %olimit, 48(%rsp)
503
+
504
+ .p2align 6
505
+
506
+ .L_4X2_loop_body:
507
+ /* We clobber r8, so store it on the stack */
508
+ movq %r8, 0(%rsp)
509
+
510
+ /* Decode 5 symbols from each of the 4 streams (20 symbols total). */
511
+ FOR_EACH_STREAM_WITH_INDEX(DECODE, 0)
512
+ FOR_EACH_STREAM_WITH_INDEX(DECODE, 1)
513
+ FOR_EACH_STREAM_WITH_INDEX(DECODE, 2)
514
+ FOR_EACH_STREAM_WITH_INDEX(DECODE, 3)
515
+ FOR_EACH_STREAM_WITH_INDEX(DECODE, 4)
516
+
517
+ /* Reload r8 */
518
+ movq 0(%rsp), %r8
519
+
520
+ FOR_EACH_STREAM(RELOAD_BITS)
521
+
522
+ cmp %op3, 48(%rsp)
523
+ ja .L_4X2_loop_body
524
+ jmp .L_4X2_compute_olimit
525
+
526
+ #undef DECODE
527
+ #undef RELOAD_BITS
528
+ .L_4X2_exit:
529
+ addq $8, %rsp
530
+ /* Restore stack (oend & olimit) */
531
+ pop %rax /* oend0 */
532
+ pop %rax /* oend1 */
533
+ pop %rax /* oend2 */
534
+ pop %rax /* oend3 */
535
+ pop %rax /* ilimit */
536
+ pop %rax /* olimit */
537
+ pop %rax /* arg */
538
+
539
+ /* Save ip / op / bits */
540
+ movq %ip0, 0(%rax)
541
+ movq %ip1, 8(%rax)
542
+ movq %ip2, 16(%rax)
543
+ movq %ip3, 24(%rax)
544
+ movq %op0, 32(%rax)
545
+ movq %op1, 40(%rax)
546
+ movq %op2, 48(%rax)
547
+ movq %op3, 56(%rax)
548
+ movq %bits0, 64(%rax)
549
+ movq %bits1, 72(%rax)
550
+ movq %bits2, 80(%rax)
551
+ movq %bits3, 88(%rax)
552
+
553
+ /* Restore registers */
554
+ pop %r15
555
+ pop %r14
556
+ pop %r13
557
+ pop %r12
558
+ pop %r11
559
+ pop %r10
560
+ pop %r9
561
+ pop %r8
562
+ pop %rdi
563
+ pop %rsi
564
+ pop %rbp
565
+ pop %rdx
566
+ pop %rcx
567
+ pop %rbx
568
+ pop %rax
569
+ ret
570
+
571
+ #endif
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,18 +14,18 @@
14
14
  /*-*******************************************************
15
15
  * Dependencies
16
16
  *********************************************************/
17
- #include <string.h> /* memcpy, memmove, memset */
18
- #include "cpu.h" /* bmi2 */
19
- #include "mem.h" /* low level memory routines */
17
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18
+ #include "../common/cpu.h" /* bmi2 */
19
+ #include "../common/mem.h" /* low level memory routines */
20
20
  #define FSE_STATIC_LINKING_ONLY
21
- #include "fse.h"
21
+ #include "../common/fse.h"
22
22
  #define HUF_STATIC_LINKING_ONLY
23
- #include "huf.h"
23
+ #include "../common/huf.h"
24
24
  #include "zstd_decompress_internal.h"
25
25
  #include "zstd_ddict.h"
26
26
 
27
27
  #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
28
- # include "zstd_legacy.h"
28
+ # include "../legacy/zstd_legacy.h"
29
29
  #endif
30
30
 
31
31
 
@@ -65,6 +65,10 @@ void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
65
65
  dctx->virtualStart = ddict->dictContent;
66
66
  dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
67
67
  dctx->previousDstEnd = dctx->dictEnd;
68
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
69
+ dctx->dictContentBeginForFuzzing = dctx->prefixStart;
70
+ dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
71
+ #endif
68
72
  if (ddict->entropyPresent) {
69
73
  dctx->litEntropy = 1;
70
74
  dctx->fseEntropy = 1;
@@ -107,7 +111,7 @@ ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
107
111
  /* load entropy tables */
108
112
  RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
109
113
  &ddict->entropy, ddict->dictContent, ddict->dictSize)),
110
- dictionary_corrupted);
114
+ dictionary_corrupted, "");
111
115
  ddict->entropyPresent = 1;
112
116
  return 0;
113
117
  }
@@ -123,17 +127,17 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
123
127
  ddict->dictContent = dict;
124
128
  if (!dict) dictSize = 0;
125
129
  } else {
126
- void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
130
+ void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
127
131
  ddict->dictBuffer = internalBuffer;
128
132
  ddict->dictContent = internalBuffer;
129
133
  if (!internalBuffer) return ERROR(memory_allocation);
130
- memcpy(internalBuffer, dict, dictSize);
134
+ ZSTD_memcpy(internalBuffer, dict, dictSize);
131
135
  }
132
136
  ddict->dictSize = dictSize;
133
137
  ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
134
138
 
135
139
  /* parse dictionary content */
136
- FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
140
+ FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
137
141
 
138
142
  return 0;
139
143
  }
@@ -143,9 +147,9 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
143
147
  ZSTD_dictContentType_e dictContentType,
144
148
  ZSTD_customMem customMem)
145
149
  {
146
- if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
150
+ if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
147
151
 
148
- { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
152
+ { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
149
153
  if (ddict == NULL) return NULL;
150
154
  ddict->cMem = customMem;
151
155
  { size_t const initResult = ZSTD_initDDict_internal(ddict,
@@ -194,7 +198,7 @@ const ZSTD_DDict* ZSTD_initStaticDDict(
194
198
  if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
195
199
  if (sBufferSize < neededSpace) return NULL;
196
200
  if (dictLoadMethod == ZSTD_dlm_byCopy) {
197
- memcpy(ddict+1, dict, dictSize); /* local copy */
201
+ ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */
198
202
  dict = ddict+1;
199
203
  }
200
204
  if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
@@ -209,8 +213,8 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
209
213
  {
210
214
  if (ddict==NULL) return 0; /* support free on NULL */
211
215
  { ZSTD_customMem const cMem = ddict->cMem;
212
- ZSTD_free(ddict->dictBuffer, cMem);
213
- ZSTD_free(ddict, cMem);
216
+ ZSTD_customFree(ddict->dictBuffer, cMem);
217
+ ZSTD_customFree(ddict, cMem);
214
218
  return 0;
215
219
  }
216
220
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,8 +15,8 @@
15
15
  /*-*******************************************************
16
16
  * Dependencies
17
17
  *********************************************************/
18
- #include <stddef.h> /* size_t */
19
- #include "zstd.h" /* ZSTD_DDict, and several public functions */
18
+ #include "../common/zstd_deps.h" /* size_t */
19
+ #include "../zstd.h" /* ZSTD_DDict, and several public functions */
20
20
 
21
21
 
22
22
  /*-*******************************************************