zstdlib 0.10.0-x64-mingw32 → 0.12.0-x64-mingw32
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGES.md +17 -0
- data/ext/zstdlib_c/extconf.rb +9 -4
- data/ext/zstdlib_c/ruby/zlib-3.2/zstdlib.c +5090 -0
- data/ext/zstdlib_c/ruby/zlib-3.3/zstdlib.c +5090 -0
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/adler32.c +5 -27
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/compress.c +5 -16
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/crc32.c +94 -161
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/deflate.c +362 -434
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/deflate.h +43 -12
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/gzclose.c +1 -3
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/gzguts.h +13 -18
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/gzlib.c +28 -85
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/gzread.c +23 -73
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/gzwrite.c +19 -65
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/infback.c +17 -30
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/inffast.c +1 -4
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/inffast.h +1 -1
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/inflate.c +36 -102
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/inftrees.c +6 -11
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/inftrees.h +6 -6
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/trees.c +290 -355
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/uncompr.c +4 -12
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/zconf.h +23 -14
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/zlib.h +202 -199
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/zutil.c +18 -44
- data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/zutil.h +13 -33
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/allocations.h +55 -0
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/bits.h +200 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/bitstream.h +19 -60
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/compiler.h +26 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/cpu.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/entropy_common.c +12 -40
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.c +9 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse.h +5 -83
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse_decompress.c +7 -99
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/huf.h +65 -156
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/mem.h +39 -46
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.c +26 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.h +7 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/portability_macros.h +22 -3
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/threading.c +176 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/threading.h +5 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.c +2 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.h +8 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_common.c +1 -36
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_deps.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_internal.h +17 -118
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_trace.h +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/clevels.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/fse_compress.c +7 -124
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/huf_compress.c +234 -169
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress.c +1243 -538
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_internal.h +225 -151
- data/ext/zstdlib_c/zstd-1.5.5/lib/compress/zstd_compress_literals.c +235 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_literals.h +16 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.c +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.c +25 -21
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_cwksp.h +128 -62
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.c +95 -33
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.h +3 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.c +433 -148
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.h +3 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.c +398 -345
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.h +4 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.c +5 -5
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm_geartab.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.c +106 -80
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.c +17 -9
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress.c +434 -441
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress_amd64.S +30 -39
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.c +4 -4
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress.c +205 -80
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.c +201 -81
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.h +6 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_internal.h +4 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zdict.h +53 -31
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd.h +580 -135
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd_errors.h +27 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzclose.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzcompatibility.h +8 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzguts.h +10 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzlib.c +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzread.c +10 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzwrite.c +5 -5
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.c +46 -44
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.h +4 -1
- data/lib/2.4/zstdlib_c.so +0 -0
- data/lib/2.5/zstdlib_c.so +0 -0
- data/lib/2.6/zstdlib_c.so +0 -0
- data/lib/2.7/zstdlib_c.so +0 -0
- data/lib/3.0/zstdlib_c.so +0 -0
- metadata +108 -104
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/threading.c +0 -122
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_literals.c +0 -159
- /data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/crc32.h +0 -0
- /data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/inffixed.h +0 -0
- /data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/inflate.h +0 -0
- /data/ext/zstdlib_c/{zlib-1.2.12 → zlib-1.3.1}/trees.h +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -14,7 +14,9 @@
|
|
14
14
|
/*-*************************************
|
15
15
|
* Dependencies
|
16
16
|
***************************************/
|
17
|
+
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
|
17
18
|
#include "../common/zstd_internal.h"
|
19
|
+
#include "../common/portability_macros.h"
|
18
20
|
|
19
21
|
#if defined (__cplusplus)
|
20
22
|
extern "C" {
|
@@ -44,8 +46,9 @@ extern "C" {
|
|
44
46
|
***************************************/
|
45
47
|
typedef enum {
|
46
48
|
ZSTD_cwksp_alloc_objects,
|
47
|
-
|
48
|
-
ZSTD_cwksp_alloc_aligned
|
49
|
+
ZSTD_cwksp_alloc_aligned_init_once,
|
50
|
+
ZSTD_cwksp_alloc_aligned,
|
51
|
+
ZSTD_cwksp_alloc_buffers
|
49
52
|
} ZSTD_cwksp_alloc_phase_e;
|
50
53
|
|
51
54
|
/**
|
@@ -98,8 +101,8 @@ typedef enum {
|
|
98
101
|
*
|
99
102
|
* Workspace Layout:
|
100
103
|
*
|
101
|
-
* [ ... workspace ...
|
102
|
-
* [objects][tables
|
104
|
+
* [ ... workspace ... ]
|
105
|
+
* [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
|
103
106
|
*
|
104
107
|
* The various objects that live in the workspace are divided into the
|
105
108
|
* following categories, and are allocated separately:
|
@@ -123,9 +126,18 @@ typedef enum {
|
|
123
126
|
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
|
124
127
|
* Their sizes depend on the cparams. These tables are 64-byte aligned.
|
125
128
|
*
|
126
|
-
* -
|
127
|
-
*
|
128
|
-
*
|
129
|
+
* - Init once: these buffers require to be initialized at least once before
|
130
|
+
* use. They should be used when we want to skip memory initialization
|
131
|
+
* while not triggering memory checkers (like Valgrind) when reading from
|
132
|
+
* from this memory without writing to it first.
|
133
|
+
* These buffers should be used carefully as they might contain data
|
134
|
+
* from previous compressions.
|
135
|
+
* Buffers are aligned to 64 bytes.
|
136
|
+
*
|
137
|
+
* - Aligned: these buffers don't require any initialization before they're
|
138
|
+
* used. The user of the buffer should make sure they write into a buffer
|
139
|
+
* location before reading from it.
|
140
|
+
* Buffers are aligned to 64 bytes.
|
129
141
|
*
|
130
142
|
* - Buffers: these buffers are used for various purposes that don't require
|
131
143
|
* any alignment or initialization before they're used. This means they can
|
@@ -137,8 +149,9 @@ typedef enum {
|
|
137
149
|
* correctly packed into the workspace buffer. That order is:
|
138
150
|
*
|
139
151
|
* 1. Objects
|
140
|
-
* 2.
|
141
|
-
* 3. Aligned/Tables
|
152
|
+
* 2. Init once / Tables
|
153
|
+
* 3. Aligned / Tables
|
154
|
+
* 4. Buffers / Tables
|
142
155
|
*
|
143
156
|
* Attempts to reserve objects of different types out of order will fail.
|
144
157
|
*/
|
@@ -150,6 +163,7 @@ typedef struct {
|
|
150
163
|
void* tableEnd;
|
151
164
|
void* tableValidEnd;
|
152
165
|
void* allocStart;
|
166
|
+
void* initOnceStart;
|
153
167
|
|
154
168
|
BYTE allocFailed;
|
155
169
|
int workspaceOversizedDuration;
|
@@ -162,6 +176,7 @@ typedef struct {
|
|
162
176
|
***************************************/
|
163
177
|
|
164
178
|
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
|
179
|
+
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws);
|
165
180
|
|
166
181
|
MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
|
167
182
|
(void)ws;
|
@@ -171,6 +186,20 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
|
|
171
186
|
assert(ws->tableEnd <= ws->allocStart);
|
172
187
|
assert(ws->tableValidEnd <= ws->allocStart);
|
173
188
|
assert(ws->allocStart <= ws->workspaceEnd);
|
189
|
+
assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
|
190
|
+
assert(ws->workspace <= ws->initOnceStart);
|
191
|
+
#if ZSTD_MEMORY_SANITIZER
|
192
|
+
{
|
193
|
+
intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
|
194
|
+
(U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart);
|
195
|
+
#if defined(ZSTD_MSAN_PRINT)
|
196
|
+
if(offset!=-1) {
|
197
|
+
__msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32);
|
198
|
+
}
|
199
|
+
#endif
|
200
|
+
assert(offset==-1);
|
201
|
+
};
|
202
|
+
#endif
|
174
203
|
}
|
175
204
|
|
176
205
|
/**
|
@@ -217,14 +246,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
|
|
217
246
|
* for internal purposes (currently only alignment).
|
218
247
|
*/
|
219
248
|
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
|
220
|
-
/* For alignment, the wksp will always allocate an additional
|
221
|
-
* to align the beginning of tables section
|
222
|
-
* to align the beginning of the aligned section.
|
223
|
-
*
|
224
|
-
* n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
|
225
|
-
* aligneds being sized in multiples of 64 bytes.
|
249
|
+
/* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
|
250
|
+
* bytes to align the beginning of tables section and end of buffers;
|
226
251
|
*/
|
227
|
-
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
|
252
|
+
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
|
228
253
|
return slackSpace;
|
229
254
|
}
|
230
255
|
|
@@ -237,10 +262,18 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt
|
|
237
262
|
size_t const alignBytesMask = alignBytes - 1;
|
238
263
|
size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
|
239
264
|
assert((alignBytes & alignBytesMask) == 0);
|
240
|
-
assert(bytes
|
265
|
+
assert(bytes < alignBytes);
|
241
266
|
return bytes;
|
242
267
|
}
|
243
268
|
|
269
|
+
/**
|
270
|
+
* Returns the initial value for allocStart which is used to determine the position from
|
271
|
+
* which we can allocate from the end of the workspace.
|
272
|
+
*/
|
273
|
+
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
|
274
|
+
return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
|
275
|
+
}
|
276
|
+
|
244
277
|
/**
|
245
278
|
* Internal function. Do not use directly.
|
246
279
|
* Reserves the given number of bytes within the aligned/buffer segment of the wksp,
|
@@ -281,27 +314,16 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
|
|
281
314
|
{
|
282
315
|
assert(phase >= ws->phase);
|
283
316
|
if (phase > ws->phase) {
|
284
|
-
/* Going from allocating objects to allocating
|
285
|
-
if (ws->phase <
|
286
|
-
|
317
|
+
/* Going from allocating objects to allocating initOnce / tables */
|
318
|
+
if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
|
319
|
+
phase >= ZSTD_cwksp_alloc_aligned_init_once) {
|
287
320
|
ws->tableValidEnd = ws->objectEnd;
|
288
|
-
|
321
|
+
ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
|
289
322
|
|
290
|
-
/* Going from allocating buffers to allocating aligneds/tables */
|
291
|
-
if (ws->phase < ZSTD_cwksp_alloc_aligned &&
|
292
|
-
phase >= ZSTD_cwksp_alloc_aligned) {
|
293
|
-
{ /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
|
294
|
-
size_t const bytesToAlign =
|
295
|
-
ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
|
296
|
-
DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
|
297
|
-
ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
|
298
|
-
RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
|
299
|
-
memory_allocation, "aligned phase - alignment initial allocation failed!");
|
300
|
-
}
|
301
323
|
{ /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
|
302
|
-
void*
|
324
|
+
void *const alloc = ws->objectEnd;
|
303
325
|
size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
|
304
|
-
void*
|
326
|
+
void *const objectEnd = (BYTE *) alloc + bytesToAlign;
|
305
327
|
DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
|
306
328
|
RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
|
307
329
|
"table phase - alignment initial allocation failed!");
|
@@ -309,7 +331,9 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
|
|
309
331
|
ws->tableEnd = objectEnd; /* table area starts being empty */
|
310
332
|
if (ws->tableValidEnd < ws->tableEnd) {
|
311
333
|
ws->tableValidEnd = ws->tableEnd;
|
312
|
-
|
334
|
+
}
|
335
|
+
}
|
336
|
+
}
|
313
337
|
ws->phase = phase;
|
314
338
|
ZSTD_cwksp_assert_internal_consistency(ws);
|
315
339
|
}
|
@@ -321,7 +345,7 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
|
|
321
345
|
*/
|
322
346
|
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
|
323
347
|
{
|
324
|
-
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr
|
348
|
+
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd);
|
325
349
|
}
|
326
350
|
|
327
351
|
/**
|
@@ -348,7 +372,9 @@ ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase
|
|
348
372
|
if (alloc) {
|
349
373
|
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
|
350
374
|
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
|
351
|
-
|
375
|
+
/* We need to keep the redzone poisoned while unpoisoning the bytes that
|
376
|
+
* are actually allocated. */
|
377
|
+
__asan_unpoison_memory_region(alloc, bytes - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE);
|
352
378
|
}
|
353
379
|
}
|
354
380
|
#endif
|
@@ -364,6 +390,36 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
|
|
364
390
|
return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
|
365
391
|
}
|
366
392
|
|
393
|
+
/**
|
394
|
+
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
|
395
|
+
* This memory has been initialized at least once in the past.
|
396
|
+
* This doesn't mean it has been initialized this time, and it might contain data from previous
|
397
|
+
* operations.
|
398
|
+
* The main usage is for algorithms that might need read access into uninitialized memory.
|
399
|
+
* The algorithm must maintain safety under these conditions and must make sure it doesn't
|
400
|
+
* leak any of the past data (directly or in side channels).
|
401
|
+
*/
|
402
|
+
MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes)
|
403
|
+
{
|
404
|
+
size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
|
405
|
+
void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
|
406
|
+
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
|
407
|
+
if(ptr && ptr < ws->initOnceStart) {
|
408
|
+
/* We assume the memory following the current allocation is either:
|
409
|
+
* 1. Not usable as initOnce memory (end of workspace)
|
410
|
+
* 2. Another initOnce buffer that has been allocated before (and so was previously memset)
|
411
|
+
* 3. An ASAN redzone, in which case we don't want to write on it
|
412
|
+
* For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart.
|
413
|
+
* Note that we assume here that MSAN and ASAN cannot run in the same time. */
|
414
|
+
ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes));
|
415
|
+
ws->initOnceStart = ptr;
|
416
|
+
}
|
417
|
+
#if ZSTD_MEMORY_SANITIZER
|
418
|
+
assert(__msan_test_shadow(ptr, bytes) == -1);
|
419
|
+
#endif
|
420
|
+
return ptr;
|
421
|
+
}
|
422
|
+
|
367
423
|
/**
|
368
424
|
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
|
369
425
|
*/
|
@@ -382,13 +438,17 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
|
|
382
438
|
*/
|
383
439
|
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
|
384
440
|
{
|
385
|
-
const ZSTD_cwksp_alloc_phase_e phase =
|
441
|
+
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once;
|
386
442
|
void* alloc;
|
387
443
|
void* end;
|
388
444
|
void* top;
|
389
445
|
|
390
|
-
|
391
|
-
|
446
|
+
/* We can only start allocating tables after we are done reserving space for objects at the
|
447
|
+
* start of the workspace */
|
448
|
+
if(ws->phase < phase) {
|
449
|
+
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
|
450
|
+
return NULL;
|
451
|
+
}
|
392
452
|
}
|
393
453
|
alloc = ws->tableEnd;
|
394
454
|
end = (BYTE *)alloc + bytes;
|
@@ -467,11 +527,19 @@ MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
|
|
467
527
|
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
468
528
|
/* To validate that the table re-use logic is sound, and that we don't
|
469
529
|
* access table space that we haven't cleaned, we re-"poison" the table
|
470
|
-
* space every time we mark it dirty.
|
530
|
+
* space every time we mark it dirty.
|
531
|
+
* Since tableValidEnd space and initOnce space may overlap we don't poison
|
532
|
+
* the initOnce portion as it break its promise. This means that this poisoning
|
533
|
+
* check isn't always applied fully. */
|
471
534
|
{
|
472
535
|
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
|
473
536
|
assert(__msan_test_shadow(ws->objectEnd, size) == -1);
|
474
|
-
|
537
|
+
if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
|
538
|
+
__msan_poison(ws->objectEnd, size);
|
539
|
+
} else {
|
540
|
+
assert(ws->initOnceStart >= ws->objectEnd);
|
541
|
+
__msan_poison(ws->objectEnd, (BYTE*)ws->initOnceStart - (BYTE*)ws->objectEnd);
|
542
|
+
}
|
475
543
|
}
|
476
544
|
#endif
|
477
545
|
|
@@ -499,7 +567,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
|
|
499
567
|
assert(ws->tableValidEnd >= ws->objectEnd);
|
500
568
|
assert(ws->tableValidEnd <= ws->allocStart);
|
501
569
|
if (ws->tableValidEnd < ws->tableEnd) {
|
502
|
-
ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
|
570
|
+
ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd));
|
503
571
|
}
|
504
572
|
ZSTD_cwksp_mark_tables_clean(ws);
|
505
573
|
}
|
@@ -536,11 +604,14 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
|
|
536
604
|
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
537
605
|
/* To validate that the context re-use logic is sound, and that we don't
|
538
606
|
* access stuff that this compression hasn't initialized, we re-"poison"
|
539
|
-
* the workspace
|
540
|
-
*
|
607
|
+
* the workspace except for the areas in which we expect memory re-use
|
608
|
+
* without initialization (objects, valid tables area and init once
|
609
|
+
* memory). */
|
541
610
|
{
|
542
|
-
|
543
|
-
|
611
|
+
if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
|
612
|
+
size_t size = (BYTE*)ws->initOnceStart - (BYTE*)ws->tableValidEnd;
|
613
|
+
__msan_poison(ws->tableValidEnd, size);
|
614
|
+
}
|
544
615
|
}
|
545
616
|
#endif
|
546
617
|
|
@@ -556,10 +627,10 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
|
|
556
627
|
#endif
|
557
628
|
|
558
629
|
ws->tableEnd = ws->objectEnd;
|
559
|
-
ws->allocStart = ws
|
630
|
+
ws->allocStart = ZSTD_cwksp_initialAllocStart(ws);
|
560
631
|
ws->allocFailed = 0;
|
561
|
-
if (ws->phase >
|
562
|
-
ws->phase =
|
632
|
+
if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) {
|
633
|
+
ws->phase = ZSTD_cwksp_alloc_aligned_init_once;
|
563
634
|
}
|
564
635
|
ZSTD_cwksp_assert_internal_consistency(ws);
|
565
636
|
}
|
@@ -576,6 +647,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_c
|
|
576
647
|
ws->workspaceEnd = (BYTE*)start + size;
|
577
648
|
ws->objectEnd = ws->workspace;
|
578
649
|
ws->tableValidEnd = ws->objectEnd;
|
650
|
+
ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
|
579
651
|
ws->phase = ZSTD_cwksp_alloc_objects;
|
580
652
|
ws->isStatic = isStatic;
|
581
653
|
ZSTD_cwksp_clear(ws);
|
@@ -628,17 +700,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
|
|
628
700
|
* Returns if the estimated space needed for a wksp is within an acceptable limit of the
|
629
701
|
* actual amount of space used.
|
630
702
|
*/
|
631
|
-
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp*
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
} else {
|
637
|
-
/* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
|
638
|
-
* than estimatedSpace. See the comments in zstd_cwksp.h for details.
|
639
|
-
*/
|
640
|
-
return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
|
641
|
-
}
|
703
|
+
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
|
704
|
+
/* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
|
705
|
+
* the alignment bytes difference between estimation and actual usage */
|
706
|
+
return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
|
707
|
+
ZSTD_cwksp_used(ws) <= estimatedSpace;
|
642
708
|
}
|
643
709
|
|
644
710
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -11,8 +11,43 @@
|
|
11
11
|
#include "zstd_compress_internal.h"
|
12
12
|
#include "zstd_double_fast.h"
|
13
13
|
|
14
|
+
static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
|
15
|
+
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
16
|
+
{
|
17
|
+
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
18
|
+
U32* const hashLarge = ms->hashTable;
|
19
|
+
U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
20
|
+
U32 const mls = cParams->minMatch;
|
21
|
+
U32* const hashSmall = ms->chainTable;
|
22
|
+
U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
23
|
+
const BYTE* const base = ms->window.base;
|
24
|
+
const BYTE* ip = base + ms->nextToUpdate;
|
25
|
+
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
|
26
|
+
const U32 fastHashFillStep = 3;
|
14
27
|
|
15
|
-
|
28
|
+
/* Always insert every fastHashFillStep position into the hash tables.
|
29
|
+
* Insert the other positions into the large hash table if their entry
|
30
|
+
* is empty.
|
31
|
+
*/
|
32
|
+
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
|
33
|
+
U32 const curr = (U32)(ip - base);
|
34
|
+
U32 i;
|
35
|
+
for (i = 0; i < fastHashFillStep; ++i) {
|
36
|
+
size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
|
37
|
+
size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
|
38
|
+
if (i == 0) {
|
39
|
+
ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
|
40
|
+
}
|
41
|
+
if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
|
42
|
+
ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
|
43
|
+
}
|
44
|
+
/* Only load extra positions for ZSTD_dtlm_full */
|
45
|
+
if (dtlm == ZSTD_dtlm_fast)
|
46
|
+
break;
|
47
|
+
} }
|
48
|
+
}
|
49
|
+
|
50
|
+
static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
|
16
51
|
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
17
52
|
{
|
18
53
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
@@ -43,7 +78,19 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
|
43
78
|
/* Only load extra positions for ZSTD_dtlm_full */
|
44
79
|
if (dtlm == ZSTD_dtlm_fast)
|
45
80
|
break;
|
46
|
-
|
81
|
+
} }
|
82
|
+
}
|
83
|
+
|
84
|
+
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
85
|
+
const void* const end,
|
86
|
+
ZSTD_dictTableLoadMethod_e dtlm,
|
87
|
+
ZSTD_tableFillPurpose_e tfp)
|
88
|
+
{
|
89
|
+
if (tfp == ZSTD_tfp_forCDict) {
|
90
|
+
ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
|
91
|
+
} else {
|
92
|
+
ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
|
93
|
+
}
|
47
94
|
}
|
48
95
|
|
49
96
|
|
@@ -67,7 +114,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
|
|
67
114
|
const BYTE* const iend = istart + srcSize;
|
68
115
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
69
116
|
U32 offset_1=rep[0], offset_2=rep[1];
|
70
|
-
U32
|
117
|
+
U32 offsetSaved1 = 0, offsetSaved2 = 0;
|
71
118
|
|
72
119
|
size_t mLength;
|
73
120
|
U32 offset;
|
@@ -100,8 +147,8 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
|
|
100
147
|
U32 const current = (U32)(ip - base);
|
101
148
|
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
|
102
149
|
U32 const maxRep = current - windowLow;
|
103
|
-
if (offset_2 > maxRep)
|
104
|
-
if (offset_1 > maxRep)
|
150
|
+
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
|
151
|
+
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
|
105
152
|
}
|
106
153
|
|
107
154
|
/* Outer Loop: one iteration per match found and stored */
|
@@ -131,7 +178,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
|
|
131
178
|
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
|
132
179
|
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
133
180
|
ip++;
|
134
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,
|
181
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
135
182
|
goto _match_stored;
|
136
183
|
}
|
137
184
|
|
@@ -175,9 +222,13 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
|
|
175
222
|
} while (ip1 <= ilimit);
|
176
223
|
|
177
224
|
_cleanup:
|
225
|
+
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
|
226
|
+
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
|
227
|
+
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
|
228
|
+
|
178
229
|
/* save reps for next block */
|
179
|
-
rep[0] = offset_1 ? offset_1 :
|
180
|
-
rep[1] = offset_2 ? offset_2 :
|
230
|
+
rep[0] = offset_1 ? offset_1 : offsetSaved1;
|
231
|
+
rep[1] = offset_2 ? offset_2 : offsetSaved2;
|
181
232
|
|
182
233
|
/* Return the last literals size */
|
183
234
|
return (size_t)(iend - anchor);
|
@@ -217,7 +268,7 @@ _match_found: /* requires ip, offset, mLength */
|
|
217
268
|
hashLong[hl1] = (U32)(ip1 - base);
|
218
269
|
}
|
219
270
|
|
220
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,
|
271
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
221
272
|
|
222
273
|
_match_stored:
|
223
274
|
/* match found */
|
@@ -243,7 +294,7 @@ _match_stored:
|
|
243
294
|
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
244
295
|
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
245
296
|
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
246
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
297
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
|
247
298
|
ip += rLength;
|
248
299
|
anchor = ip;
|
249
300
|
continue; /* faster when present ... (?) */
|
@@ -275,7 +326,6 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
|
275
326
|
const BYTE* const iend = istart + srcSize;
|
276
327
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
277
328
|
U32 offset_1=rep[0], offset_2=rep[1];
|
278
|
-
U32 offsetSaved = 0;
|
279
329
|
|
280
330
|
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
281
331
|
const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
|
@@ -286,8 +336,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
|
286
336
|
const BYTE* const dictStart = dictBase + dictStartIndex;
|
287
337
|
const BYTE* const dictEnd = dms->window.nextSrc;
|
288
338
|
const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
|
289
|
-
const U32 dictHBitsL = dictCParams->hashLog;
|
290
|
-
const U32 dictHBitsS = dictCParams->chainLog;
|
339
|
+
const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
340
|
+
const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
291
341
|
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
|
292
342
|
|
293
343
|
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
|
@@ -295,6 +345,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
|
295
345
|
/* if a dictionary is attached, it must be within window range */
|
296
346
|
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
297
347
|
|
348
|
+
if (ms->prefetchCDictTables) {
|
349
|
+
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
|
350
|
+
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
|
351
|
+
PREFETCH_AREA(dictHashLong, hashTableBytes)
|
352
|
+
PREFETCH_AREA(dictHashSmall, chainTableBytes)
|
353
|
+
}
|
354
|
+
|
298
355
|
/* init */
|
299
356
|
ip += (dictAndPrefixLength == 0);
|
300
357
|
|
@@ -309,8 +366,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
|
309
366
|
U32 offset;
|
310
367
|
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
|
311
368
|
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
|
312
|
-
size_t const
|
313
|
-
size_t const
|
369
|
+
size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
|
370
|
+
size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
|
371
|
+
U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
|
372
|
+
U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
|
373
|
+
int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
|
374
|
+
int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
|
314
375
|
U32 const curr = (U32)(ip-base);
|
315
376
|
U32 const matchIndexL = hashLong[h2];
|
316
377
|
U32 matchIndexS = hashSmall[h];
|
@@ -328,7 +389,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
|
328
389
|
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
329
390
|
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
330
391
|
ip++;
|
331
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,
|
392
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
332
393
|
goto _match_stored;
|
333
394
|
}
|
334
395
|
|
@@ -340,9 +401,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
|
340
401
|
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
341
402
|
goto _match_found;
|
342
403
|
}
|
343
|
-
} else {
|
404
|
+
} else if (dictTagsMatchL) {
|
344
405
|
/* check dictMatchState long match */
|
345
|
-
U32 const dictMatchIndexL =
|
406
|
+
U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
|
346
407
|
const BYTE* dictMatchL = dictBase + dictMatchIndexL;
|
347
408
|
assert(dictMatchL < dictEnd);
|
348
409
|
|
@@ -358,9 +419,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
|
358
419
|
if (MEM_read32(match) == MEM_read32(ip)) {
|
359
420
|
goto _search_next_long;
|
360
421
|
}
|
361
|
-
} else {
|
422
|
+
} else if (dictTagsMatchS) {
|
362
423
|
/* check dictMatchState short match */
|
363
|
-
U32 const dictMatchIndexS =
|
424
|
+
U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
|
364
425
|
match = dictBase + dictMatchIndexS;
|
365
426
|
matchIndexS = dictMatchIndexS + dictIndexDelta;
|
366
427
|
|
@@ -375,10 +436,11 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
|
375
436
|
continue;
|
376
437
|
|
377
438
|
_search_next_long:
|
378
|
-
|
379
439
|
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
380
|
-
size_t const
|
440
|
+
size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
381
441
|
U32 const matchIndexL3 = hashLong[hl3];
|
442
|
+
U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
|
443
|
+
int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
|
382
444
|
const BYTE* matchL3 = base + matchIndexL3;
|
383
445
|
hashLong[hl3] = curr + 1;
|
384
446
|
|
@@ -391,9 +453,9 @@ _search_next_long:
|
|
391
453
|
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
|
392
454
|
goto _match_found;
|
393
455
|
}
|
394
|
-
} else {
|
456
|
+
} else if (dictTagsMatchL3) {
|
395
457
|
/* check dict long +1 match */
|
396
|
-
U32 const dictMatchIndexL3 =
|
458
|
+
U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
|
397
459
|
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
|
398
460
|
assert(dictMatchL3 < dictEnd);
|
399
461
|
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
|
@@ -419,7 +481,7 @@ _match_found:
|
|
419
481
|
offset_2 = offset_1;
|
420
482
|
offset_1 = offset;
|
421
483
|
|
422
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,
|
484
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
423
485
|
|
424
486
|
_match_stored:
|
425
487
|
/* match found */
|
@@ -448,7 +510,7 @@ _match_stored:
|
|
448
510
|
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
449
511
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
|
450
512
|
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
451
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
513
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
|
452
514
|
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
453
515
|
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
454
516
|
ip += repLength2;
|
@@ -461,8 +523,8 @@ _match_stored:
|
|
461
523
|
} /* while (ip < ilimit) */
|
462
524
|
|
463
525
|
/* save reps for next block */
|
464
|
-
rep[0] = offset_1
|
465
|
-
rep[1] = offset_2
|
526
|
+
rep[0] = offset_1;
|
527
|
+
rep[1] = offset_2;
|
466
528
|
|
467
529
|
/* Return the last literals size */
|
468
530
|
return (size_t)(iend - anchor);
|
@@ -585,7 +647,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
585
647
|
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
586
648
|
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
587
649
|
ip++;
|
588
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,
|
650
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
589
651
|
} else {
|
590
652
|
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
|
591
653
|
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
|
@@ -596,7 +658,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
596
658
|
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
597
659
|
offset_2 = offset_1;
|
598
660
|
offset_1 = offset;
|
599
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,
|
661
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
600
662
|
|
601
663
|
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
|
602
664
|
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
@@ -621,7 +683,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
621
683
|
}
|
622
684
|
offset_2 = offset_1;
|
623
685
|
offset_1 = offset;
|
624
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,
|
686
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
625
687
|
|
626
688
|
} else {
|
627
689
|
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
@@ -653,7 +715,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
653
715
|
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
654
716
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
655
717
|
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
656
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
718
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
|
657
719
|
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
658
720
|
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
659
721
|
ip += repLength2;
|