zstd-ruby 1.4.4.0 → 1.5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +78 -5
  4. data/Rakefile +8 -2
  5. data/ext/zstdruby/common.h +15 -0
  6. data/ext/zstdruby/extconf.rb +3 -2
  7. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  8. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  9. data/ext/zstdruby/libzstd/common/bitstream.h +74 -97
  10. data/ext/zstdruby/libzstd/common/compiler.h +219 -20
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  13. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +184 -80
  15. data/ext/zstdruby/libzstd/common/error_private.c +11 -2
  16. data/ext/zstdruby/libzstd/common/error_private.h +87 -4
  17. data/ext/zstdruby/libzstd/common/fse.h +47 -116
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +127 -127
  19. data/ext/zstdruby/libzstd/common/huf.h +112 -197
  20. data/ext/zstdruby/libzstd/common/mem.h +124 -142
  21. data/ext/zstdruby/libzstd/common/pool.c +54 -27
  22. data/ext/zstdruby/libzstd/common/pool.h +11 -5
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +78 -22
  25. data/ext/zstdruby/libzstd/common/threading.h +9 -13
  26. data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +186 -144
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +99 -196
  34. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  35. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +968 -331
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +4120 -1191
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +688 -159
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +121 -40
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +62 -35
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +577 -0
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +322 -115
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +394 -154
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -3
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -253
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -3
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1289 -247
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +339 -212
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +508 -282
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +217 -466
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +35 -114
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1220 -572
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +23 -19
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +859 -273
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1244 -375
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +21 -7
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +74 -11
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +75 -54
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +55 -36
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +126 -110
  72. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +248 -56
  73. data/ext/zstdruby/libzstd/zstd.h +1277 -306
  74. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +29 -8
  75. data/ext/zstdruby/main.c +20 -0
  76. data/ext/zstdruby/skippable_frame.c +63 -0
  77. data/ext/zstdruby/streaming_compress.c +177 -0
  78. data/ext/zstdruby/streaming_compress.h +5 -0
  79. data/ext/zstdruby/streaming_decompress.c +123 -0
  80. data/ext/zstdruby/zstdruby.c +114 -32
  81. data/lib/zstd-ruby/version.rb +1 -1
  82. data/lib/zstd-ruby.rb +0 -1
  83. data/zstd-ruby.gemspec +1 -1
  84. metadata +24 -39
  85. data/.travis.yml +0 -14
  86. data/ext/zstdruby/libzstd/.gitignore +0 -3
  87. data/ext/zstdruby/libzstd/BUCK +0 -234
  88. data/ext/zstdruby/libzstd/Makefile +0 -289
  89. data/ext/zstdruby/libzstd/README.md +0 -159
  90. data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
  91. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
  92. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
  93. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
  94. data/ext/zstdruby/libzstd/dll/example/Makefile +0 -47
  95. data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
  96. data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
  97. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
  98. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
  99. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
  100. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2152
  101. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
  102. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3514
  103. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
  104. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3156
  105. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
  106. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3641
  107. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
  108. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4046
  109. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
  110. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4150
  111. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
  112. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4533
  113. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
  114. data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
  115. data/ext/zstdruby/zstdruby.h +0 -6
@@ -0,0 +1,576 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #include "../common/portability_macros.h"
12
+
13
+ /* Stack marking
14
+ * ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
15
+ */
16
+ #if defined(__ELF__) && defined(__GNUC__)
17
+ .section .note.GNU-stack,"",%progbits
18
+ #endif
19
+
20
+ #if ZSTD_ENABLE_ASM_X86_64_BMI2
21
+
22
+ /* Calling convention:
23
+ *
24
+ * %rdi contains the first argument: HUF_DecompressAsmArgs*.
25
+ * %rbp isn't maintained (no frame pointer).
26
+ * %rsp contains the stack pointer that grows down.
27
+ * No red-zone is assumed, only addresses >= %rsp are used.
28
+ * All register contents are preserved.
29
+ *
30
+ * TODO: Support Windows calling convention.
31
+ */
32
+
33
+ ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
34
+ ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X2_usingDTable_internal_fast_asm_loop)
35
+ ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X2_usingDTable_internal_fast_asm_loop)
36
+ ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
37
+ .global HUF_decompress4X1_usingDTable_internal_fast_asm_loop
38
+ .global HUF_decompress4X2_usingDTable_internal_fast_asm_loop
39
+ .global _HUF_decompress4X1_usingDTable_internal_fast_asm_loop
40
+ .global _HUF_decompress4X2_usingDTable_internal_fast_asm_loop
41
+ .text
42
+
43
+ /* Sets up register mappings for clarity.
44
+ * op[], bits[], dtable & ip[0] each get their own register.
45
+ * ip[1,2,3] & olimit alias var[].
46
+ * %rax is a scratch register.
47
+ */
48
+
49
+ #define op0 rsi
50
+ #define op1 rbx
51
+ #define op2 rcx
52
+ #define op3 rdi
53
+
54
+ #define ip0 r8
55
+ #define ip1 r9
56
+ #define ip2 r10
57
+ #define ip3 r11
58
+
59
+ #define bits0 rbp
60
+ #define bits1 rdx
61
+ #define bits2 r12
62
+ #define bits3 r13
63
+ #define dtable r14
64
+ #define olimit r15
65
+
66
+ /* var[] aliases ip[1,2,3] & olimit
67
+ * ip[1,2,3] are saved every iteration.
68
+ * olimit is only used in compute_olimit.
69
+ */
70
+ #define var0 r15
71
+ #define var1 r9
72
+ #define var2 r10
73
+ #define var3 r11
74
+
75
+ /* 32-bit var registers */
76
+ #define vard0 r15d
77
+ #define vard1 r9d
78
+ #define vard2 r10d
79
+ #define vard3 r11d
80
+
81
+ /* Calls X(N) for each stream 0, 1, 2, 3. */
82
+ #define FOR_EACH_STREAM(X) \
83
+ X(0); \
84
+ X(1); \
85
+ X(2); \
86
+ X(3)
87
+
88
+ /* Calls X(N, idx) for each stream 0, 1, 2, 3. */
89
+ #define FOR_EACH_STREAM_WITH_INDEX(X, idx) \
90
+ X(0, idx); \
91
+ X(1, idx); \
92
+ X(2, idx); \
93
+ X(3, idx)
94
+
95
+ /* Define both _HUF_* & HUF_* symbols because MacOS
96
+ * C symbols are prefixed with '_' & Linux symbols aren't.
97
+ */
98
+ _HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
99
+ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
100
+ ZSTD_CET_ENDBRANCH
101
+ /* Save all registers - even if they are callee saved for simplicity. */
102
+ push %rax
103
+ push %rbx
104
+ push %rcx
105
+ push %rdx
106
+ push %rbp
107
+ push %rsi
108
+ push %rdi
109
+ push %r8
110
+ push %r9
111
+ push %r10
112
+ push %r11
113
+ push %r12
114
+ push %r13
115
+ push %r14
116
+ push %r15
117
+
118
+ /* Read HUF_DecompressAsmArgs* args from %rax */
119
+ movq %rdi, %rax
120
+ movq 0(%rax), %ip0
121
+ movq 8(%rax), %ip1
122
+ movq 16(%rax), %ip2
123
+ movq 24(%rax), %ip3
124
+ movq 32(%rax), %op0
125
+ movq 40(%rax), %op1
126
+ movq 48(%rax), %op2
127
+ movq 56(%rax), %op3
128
+ movq 64(%rax), %bits0
129
+ movq 72(%rax), %bits1
130
+ movq 80(%rax), %bits2
131
+ movq 88(%rax), %bits3
132
+ movq 96(%rax), %dtable
133
+ push %rax /* argument */
134
+ push 104(%rax) /* ilimit */
135
+ push 112(%rax) /* oend */
136
+ push %olimit /* olimit space */
137
+
138
+ subq $24, %rsp
139
+
140
+ .L_4X1_compute_olimit:
141
+ /* Computes how many iterations we can do safely
142
+ * %r15, %rax may be clobbered
143
+ * rbx, rdx must be saved
144
+ * op3 & ip0 mustn't be clobbered
145
+ */
146
+ movq %rbx, 0(%rsp)
147
+ movq %rdx, 8(%rsp)
148
+
149
+ movq 32(%rsp), %rax /* rax = oend */
150
+ subq %op3, %rax /* rax = oend - op3 */
151
+
152
+ /* r15 = (oend - op3) / 5 */
153
+ movabsq $-3689348814741910323, %rdx
154
+ mulq %rdx
155
+ movq %rdx, %r15
156
+ shrq $2, %r15
157
+
158
+ movq %ip0, %rax /* rax = ip0 */
159
+ movq 40(%rsp), %rdx /* rdx = ilimit */
160
+ subq %rdx, %rax /* rax = ip0 - ilimit */
161
+ movq %rax, %rbx /* rbx = ip0 - ilimit */
162
+
163
+ /* rdx = (ip0 - ilimit) / 7 */
164
+ movabsq $2635249153387078803, %rdx
165
+ mulq %rdx
166
+ subq %rdx, %rbx
167
+ shrq %rbx
168
+ addq %rbx, %rdx
169
+ shrq $2, %rdx
170
+
171
+ /* r15 = min(%rdx, %r15) */
172
+ cmpq %rdx, %r15
173
+ cmova %rdx, %r15
174
+
175
+ /* r15 = r15 * 5 */
176
+ leaq (%r15, %r15, 4), %r15
177
+
178
+ /* olimit = op3 + r15 */
179
+ addq %op3, %olimit
180
+
181
+ movq 8(%rsp), %rdx
182
+ movq 0(%rsp), %rbx
183
+
184
+ /* If (op3 + 20 > olimit) */
185
+ movq %op3, %rax /* rax = op3 */
186
+ addq $20, %rax /* rax = op3 + 20 */
187
+ cmpq %rax, %olimit /* op3 + 20 > olimit */
188
+ jb .L_4X1_exit
189
+
190
+ /* If (ip1 < ip0) go to exit */
191
+ cmpq %ip0, %ip1
192
+ jb .L_4X1_exit
193
+
194
+ /* If (ip2 < ip1) go to exit */
195
+ cmpq %ip1, %ip2
196
+ jb .L_4X1_exit
197
+
198
+ /* If (ip3 < ip2) go to exit */
199
+ cmpq %ip2, %ip3
200
+ jb .L_4X1_exit
201
+
202
+ /* Reads top 11 bits from bits[n]
203
+ * Loads dt[bits[n]] into var[n]
204
+ */
205
+ #define GET_NEXT_DELT(n) \
206
+ movq $53, %var##n; \
207
+ shrxq %var##n, %bits##n, %var##n; \
208
+ movzwl (%dtable,%var##n,2),%vard##n
209
+
210
+ /* var[n] must contain the DTable entry computed with GET_NEXT_DELT
211
+ * Moves var[n] to %rax
212
+ * bits[n] <<= var[n] & 63
213
+ * op[n][idx] = %rax >> 8
214
+ * %ah is a way to access bits [8, 16) of %rax
215
+ */
216
+ #define DECODE_FROM_DELT(n, idx) \
217
+ movq %var##n, %rax; \
218
+ shlxq %var##n, %bits##n, %bits##n; \
219
+ movb %ah, idx(%op##n)
220
+
221
+ /* Assumes GET_NEXT_DELT has been called.
222
+ * Calls DECODE_FROM_DELT then GET_NEXT_DELT
223
+ */
224
+ #define DECODE_AND_GET_NEXT(n, idx) \
225
+ DECODE_FROM_DELT(n, idx); \
226
+ GET_NEXT_DELT(n) \
227
+
228
+ /* // ctz & nbBytes is stored in bits[n]
229
+ * // nbBits is stored in %rax
230
+ * ctz = CTZ[bits[n]]
231
+ * nbBits = ctz & 7
232
+ * nbBytes = ctz >> 3
233
+ * op[n] += 5
234
+ * ip[n] -= nbBytes
235
+ * // Note: x86-64 is little-endian ==> no bswap
236
+ * bits[n] = MEM_readST(ip[n]) | 1
237
+ * bits[n] <<= nbBits
238
+ */
239
+ #define RELOAD_BITS(n) \
240
+ bsfq %bits##n, %bits##n; \
241
+ movq %bits##n, %rax; \
242
+ andq $7, %rax; \
243
+ shrq $3, %bits##n; \
244
+ leaq 5(%op##n), %op##n; \
245
+ subq %bits##n, %ip##n; \
246
+ movq (%ip##n), %bits##n; \
247
+ orq $1, %bits##n; \
248
+ shlx %rax, %bits##n, %bits##n
249
+
250
+ /* Store clobbered variables on the stack */
251
+ movq %olimit, 24(%rsp)
252
+ movq %ip1, 0(%rsp)
253
+ movq %ip2, 8(%rsp)
254
+ movq %ip3, 16(%rsp)
255
+
256
+ /* Call GET_NEXT_DELT for each stream */
257
+ FOR_EACH_STREAM(GET_NEXT_DELT)
258
+
259
+ .p2align 6
260
+
261
+ .L_4X1_loop_body:
262
+ /* Decode 5 symbols in each of the 4 streams (20 total)
263
+ * Must have called GET_NEXT_DELT for each stream
264
+ */
265
+ FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 0)
266
+ FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 1)
267
+ FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 2)
268
+ FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 3)
269
+ FOR_EACH_STREAM_WITH_INDEX(DECODE_FROM_DELT, 4)
270
+
271
+ /* Load ip[1,2,3] from stack (var[] aliases them)
272
+ * ip[] is needed for RELOAD_BITS
273
+ * Each will be stored back to the stack after RELOAD
274
+ */
275
+ movq 0(%rsp), %ip1
276
+ movq 8(%rsp), %ip2
277
+ movq 16(%rsp), %ip3
278
+
279
+ /* Reload each stream & fetch the next table entry
280
+ * to prepare for the next iteration
281
+ */
282
+ RELOAD_BITS(0)
283
+ GET_NEXT_DELT(0)
284
+
285
+ RELOAD_BITS(1)
286
+ movq %ip1, 0(%rsp)
287
+ GET_NEXT_DELT(1)
288
+
289
+ RELOAD_BITS(2)
290
+ movq %ip2, 8(%rsp)
291
+ GET_NEXT_DELT(2)
292
+
293
+ RELOAD_BITS(3)
294
+ movq %ip3, 16(%rsp)
295
+ GET_NEXT_DELT(3)
296
+
297
+ /* If op3 < olimit: continue the loop */
298
+ cmp %op3, 24(%rsp)
299
+ ja .L_4X1_loop_body
300
+
301
+ /* Reload ip[1,2,3] from stack */
302
+ movq 0(%rsp), %ip1
303
+ movq 8(%rsp), %ip2
304
+ movq 16(%rsp), %ip3
305
+
306
+ /* Re-compute olimit */
307
+ jmp .L_4X1_compute_olimit
308
+
309
+ #undef GET_NEXT_DELT
310
+ #undef DECODE_FROM_DELT
311
+ #undef DECODE
312
+ #undef RELOAD_BITS
313
+ .L_4X1_exit:
314
+ addq $24, %rsp
315
+
316
+ /* Restore stack (oend & olimit) */
317
+ pop %rax /* olimit */
318
+ pop %rax /* oend */
319
+ pop %rax /* ilimit */
320
+ pop %rax /* arg */
321
+
322
+ /* Save ip / op / bits */
323
+ movq %ip0, 0(%rax)
324
+ movq %ip1, 8(%rax)
325
+ movq %ip2, 16(%rax)
326
+ movq %ip3, 24(%rax)
327
+ movq %op0, 32(%rax)
328
+ movq %op1, 40(%rax)
329
+ movq %op2, 48(%rax)
330
+ movq %op3, 56(%rax)
331
+ movq %bits0, 64(%rax)
332
+ movq %bits1, 72(%rax)
333
+ movq %bits2, 80(%rax)
334
+ movq %bits3, 88(%rax)
335
+
336
+ /* Restore registers */
337
+ pop %r15
338
+ pop %r14
339
+ pop %r13
340
+ pop %r12
341
+ pop %r11
342
+ pop %r10
343
+ pop %r9
344
+ pop %r8
345
+ pop %rdi
346
+ pop %rsi
347
+ pop %rbp
348
+ pop %rdx
349
+ pop %rcx
350
+ pop %rbx
351
+ pop %rax
352
+ ret
353
+
354
+ _HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
355
+ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
356
+ ZSTD_CET_ENDBRANCH
357
+ /* Save all registers - even if they are callee saved for simplicity. */
358
+ push %rax
359
+ push %rbx
360
+ push %rcx
361
+ push %rdx
362
+ push %rbp
363
+ push %rsi
364
+ push %rdi
365
+ push %r8
366
+ push %r9
367
+ push %r10
368
+ push %r11
369
+ push %r12
370
+ push %r13
371
+ push %r14
372
+ push %r15
373
+
374
+ movq %rdi, %rax
375
+ movq 0(%rax), %ip0
376
+ movq 8(%rax), %ip1
377
+ movq 16(%rax), %ip2
378
+ movq 24(%rax), %ip3
379
+ movq 32(%rax), %op0
380
+ movq 40(%rax), %op1
381
+ movq 48(%rax), %op2
382
+ movq 56(%rax), %op3
383
+ movq 64(%rax), %bits0
384
+ movq 72(%rax), %bits1
385
+ movq 80(%rax), %bits2
386
+ movq 88(%rax), %bits3
387
+ movq 96(%rax), %dtable
388
+ push %rax /* argument */
389
+ push %rax /* olimit */
390
+ push 104(%rax) /* ilimit */
391
+
392
+ movq 112(%rax), %rax
393
+ push %rax /* oend3 */
394
+
395
+ movq %op3, %rax
396
+ push %rax /* oend2 */
397
+
398
+ movq %op2, %rax
399
+ push %rax /* oend1 */
400
+
401
+ movq %op1, %rax
402
+ push %rax /* oend0 */
403
+
404
+ /* Scratch space */
405
+ subq $8, %rsp
406
+
407
+ .L_4X2_compute_olimit:
408
+ /* Computes how many iterations we can do safely
409
+ * %r15, %rax may be clobbered
410
+ * rdx must be saved
411
+ * op[1,2,3,4] & ip0 mustn't be clobbered
412
+ */
413
+ movq %rdx, 0(%rsp)
414
+
415
+ /* We can consume up to 7 input bytes each iteration. */
416
+ movq %ip0, %rax /* rax = ip0 */
417
+ movq 40(%rsp), %rdx /* rdx = ilimit */
418
+ subq %rdx, %rax /* rax = ip0 - ilimit */
419
+ movq %rax, %r15 /* r15 = ip0 - ilimit */
420
+
421
+ /* rdx = rax / 7 */
422
+ movabsq $2635249153387078803, %rdx
423
+ mulq %rdx
424
+ subq %rdx, %r15
425
+ shrq %r15
426
+ addq %r15, %rdx
427
+ shrq $2, %rdx
428
+
429
+ /* r15 = (ip0 - ilimit) / 7 */
430
+ movq %rdx, %r15
431
+
432
+ /* r15 = min(r15, min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3) / 10) */
433
+ movq 8(%rsp), %rax /* rax = oend0 */
434
+ subq %op0, %rax /* rax = oend0 - op0 */
435
+ movq 16(%rsp), %rdx /* rdx = oend1 */
436
+ subq %op1, %rdx /* rdx = oend1 - op1 */
437
+
438
+ cmpq %rax, %rdx
439
+ cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
440
+
441
+ movq 24(%rsp), %rax /* rax = oend2 */
442
+ subq %op2, %rax /* rax = oend2 - op2 */
443
+
444
+ cmpq %rax, %rdx
445
+ cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
446
+
447
+ movq 32(%rsp), %rax /* rax = oend3 */
448
+ subq %op3, %rax /* rax = oend3 - op3 */
449
+
450
+ cmpq %rax, %rdx
451
+ cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
452
+
453
+ movabsq $-3689348814741910323, %rax
454
+ mulq %rdx
455
+ shrq $3, %rdx /* rdx = rdx / 10 */
456
+
457
+ /* r15 = min(%rdx, %r15) */
458
+ cmpq %rdx, %r15
459
+ cmova %rdx, %r15
460
+
461
+ /* olimit = op3 + 5 * r15 */
462
+ movq %r15, %rax
463
+ leaq (%op3, %rax, 4), %olimit
464
+ addq %rax, %olimit
465
+
466
+ movq 0(%rsp), %rdx
467
+
468
+ /* If (op3 + 10 > olimit) */
469
+ movq %op3, %rax /* rax = op3 */
470
+ addq $10, %rax /* rax = op3 + 10 */
471
+ cmpq %rax, %olimit /* op3 + 10 > olimit */
472
+ jb .L_4X2_exit
473
+
474
+ /* If (ip1 < ip0) go to exit */
475
+ cmpq %ip0, %ip1
476
+ jb .L_4X2_exit
477
+
478
+ /* If (ip2 < ip1) go to exit */
479
+ cmpq %ip1, %ip2
480
+ jb .L_4X2_exit
481
+
482
+ /* If (ip3 < ip2) go to exit */
483
+ cmpq %ip2, %ip3
484
+ jb .L_4X2_exit
485
+
486
+ #define DECODE(n, idx) \
487
+ movq %bits##n, %rax; \
488
+ shrq $53, %rax; \
489
+ movzwl 0(%dtable,%rax,4),%r8d; \
490
+ movzbl 2(%dtable,%rax,4),%r15d; \
491
+ movzbl 3(%dtable,%rax,4),%eax; \
492
+ movw %r8w, (%op##n); \
493
+ shlxq %r15, %bits##n, %bits##n; \
494
+ addq %rax, %op##n
495
+
496
+ #define RELOAD_BITS(n) \
497
+ bsfq %bits##n, %bits##n; \
498
+ movq %bits##n, %rax; \
499
+ shrq $3, %bits##n; \
500
+ andq $7, %rax; \
501
+ subq %bits##n, %ip##n; \
502
+ movq (%ip##n), %bits##n; \
503
+ orq $1, %bits##n; \
504
+ shlxq %rax, %bits##n, %bits##n
505
+
506
+
507
+ movq %olimit, 48(%rsp)
508
+
509
+ .p2align 6
510
+
511
+ .L_4X2_loop_body:
512
+ /* We clobber r8, so store it on the stack */
513
+ movq %r8, 0(%rsp)
514
+
515
+ /* Decode 5 symbols from each of the 4 streams (20 symbols total). */
516
+ FOR_EACH_STREAM_WITH_INDEX(DECODE, 0)
517
+ FOR_EACH_STREAM_WITH_INDEX(DECODE, 1)
518
+ FOR_EACH_STREAM_WITH_INDEX(DECODE, 2)
519
+ FOR_EACH_STREAM_WITH_INDEX(DECODE, 3)
520
+ FOR_EACH_STREAM_WITH_INDEX(DECODE, 4)
521
+
522
+ /* Reload r8 */
523
+ movq 0(%rsp), %r8
524
+
525
+ FOR_EACH_STREAM(RELOAD_BITS)
526
+
527
+ cmp %op3, 48(%rsp)
528
+ ja .L_4X2_loop_body
529
+ jmp .L_4X2_compute_olimit
530
+
531
+ #undef DECODE
532
+ #undef RELOAD_BITS
533
+ .L_4X2_exit:
534
+ addq $8, %rsp
535
+ /* Restore stack (oend & olimit) */
536
+ pop %rax /* oend0 */
537
+ pop %rax /* oend1 */
538
+ pop %rax /* oend2 */
539
+ pop %rax /* oend3 */
540
+ pop %rax /* ilimit */
541
+ pop %rax /* olimit */
542
+ pop %rax /* arg */
543
+
544
+ /* Save ip / op / bits */
545
+ movq %ip0, 0(%rax)
546
+ movq %ip1, 8(%rax)
547
+ movq %ip2, 16(%rax)
548
+ movq %ip3, 24(%rax)
549
+ movq %op0, 32(%rax)
550
+ movq %op1, 40(%rax)
551
+ movq %op2, 48(%rax)
552
+ movq %op3, 56(%rax)
553
+ movq %bits0, 64(%rax)
554
+ movq %bits1, 72(%rax)
555
+ movq %bits2, 80(%rax)
556
+ movq %bits3, 88(%rax)
557
+
558
+ /* Restore registers */
559
+ pop %r15
560
+ pop %r14
561
+ pop %r13
562
+ pop %r12
563
+ pop %r11
564
+ pop %r10
565
+ pop %r9
566
+ pop %r8
567
+ pop %rdi
568
+ pop %rsi
569
+ pop %rbp
570
+ pop %rdx
571
+ pop %rcx
572
+ pop %rbx
573
+ pop %rax
574
+ ret
575
+
576
+ #endif
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,18 +14,18 @@
14
14
  /*-*******************************************************
15
15
  * Dependencies
16
16
  *********************************************************/
17
- #include <string.h> /* memcpy, memmove, memset */
18
- #include "cpu.h" /* bmi2 */
19
- #include "mem.h" /* low level memory routines */
17
+ #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
18
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
19
+ #include "../common/cpu.h" /* bmi2 */
20
+ #include "../common/mem.h" /* low level memory routines */
20
21
  #define FSE_STATIC_LINKING_ONLY
21
- #include "fse.h"
22
- #define HUF_STATIC_LINKING_ONLY
23
- #include "huf.h"
22
+ #include "../common/fse.h"
23
+ #include "../common/huf.h"
24
24
  #include "zstd_decompress_internal.h"
25
25
  #include "zstd_ddict.h"
26
26
 
27
27
  #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
28
- # include "zstd_legacy.h"
28
+ # include "../legacy/zstd_legacy.h"
29
29
  #endif
30
30
 
31
31
 
@@ -65,6 +65,10 @@ void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
65
65
  dctx->virtualStart = ddict->dictContent;
66
66
  dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
67
67
  dctx->previousDstEnd = dctx->dictEnd;
68
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
69
+ dctx->dictContentBeginForFuzzing = dctx->prefixStart;
70
+ dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
71
+ #endif
68
72
  if (ddict->entropyPresent) {
69
73
  dctx->litEntropy = 1;
70
74
  dctx->fseEntropy = 1;
@@ -107,7 +111,7 @@ ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
107
111
  /* load entropy tables */
108
112
  RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
109
113
  &ddict->entropy, ddict->dictContent, ddict->dictSize)),
110
- dictionary_corrupted);
114
+ dictionary_corrupted, "");
111
115
  ddict->entropyPresent = 1;
112
116
  return 0;
113
117
  }
@@ -123,17 +127,17 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
123
127
  ddict->dictContent = dict;
124
128
  if (!dict) dictSize = 0;
125
129
  } else {
126
- void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
130
+ void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
127
131
  ddict->dictBuffer = internalBuffer;
128
132
  ddict->dictContent = internalBuffer;
129
133
  if (!internalBuffer) return ERROR(memory_allocation);
130
- memcpy(internalBuffer, dict, dictSize);
134
+ ZSTD_memcpy(internalBuffer, dict, dictSize);
131
135
  }
132
136
  ddict->dictSize = dictSize;
133
- ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
137
+ ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
134
138
 
135
139
  /* parse dictionary content */
136
- FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
140
+ FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
137
141
 
138
142
  return 0;
139
143
  }
@@ -143,9 +147,9 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
143
147
  ZSTD_dictContentType_e dictContentType,
144
148
  ZSTD_customMem customMem)
145
149
  {
146
- if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
150
+ if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
147
151
 
148
- { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
152
+ { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
149
153
  if (ddict == NULL) return NULL;
150
154
  ddict->cMem = customMem;
151
155
  { size_t const initResult = ZSTD_initDDict_internal(ddict,
@@ -194,7 +198,7 @@ const ZSTD_DDict* ZSTD_initStaticDDict(
194
198
  if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
195
199
  if (sBufferSize < neededSpace) return NULL;
196
200
  if (dictLoadMethod == ZSTD_dlm_byCopy) {
197
- memcpy(ddict+1, dict, dictSize); /* local copy */
201
+ ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */
198
202
  dict = ddict+1;
199
203
  }
200
204
  if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
@@ -209,8 +213,8 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
209
213
  {
210
214
  if (ddict==NULL) return 0; /* support free on NULL */
211
215
  { ZSTD_customMem const cMem = ddict->cMem;
212
- ZSTD_free(ddict->dictBuffer, cMem);
213
- ZSTD_free(ddict, cMem);
216
+ ZSTD_customFree(ddict->dictBuffer, cMem);
217
+ ZSTD_customFree(ddict, cMem);
214
218
  return 0;
215
219
  }
216
220
  }
@@ -236,5 +240,5 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
236
240
  unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
237
241
  {
238
242
  if (ddict==NULL) return 0;
239
- return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
243
+ return ddict->dictID;
240
244
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,8 +15,8 @@
15
15
  /*-*******************************************************
16
16
  * Dependencies
17
17
  *********************************************************/
18
- #include <stddef.h> /* size_t */
19
- #include "zstd.h" /* ZSTD_DDict, and several public functions */
18
+ #include "../common/zstd_deps.h" /* size_t */
19
+ #include "../zstd.h" /* ZSTD_DDict, and several public functions */
20
20
 
21
21
 
22
22
  /*-*******************************************************