zstd-ruby 1.5.2.3 → 1.5.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/ext/zstdruby/libzstd/common/bits.h +175 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +18 -59
- data/ext/zstdruby/libzstd/common/compiler.h +22 -3
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +1 -1
- data/ext/zstdruby/libzstd/common/entropy_common.c +12 -40
- data/ext/zstdruby/libzstd/common/error_private.c +9 -2
- data/ext/zstdruby/libzstd/common/error_private.h +1 -1
- data/ext/zstdruby/libzstd/common/fse.h +5 -83
- data/ext/zstdruby/libzstd/common/fse_decompress.c +7 -99
- data/ext/zstdruby/libzstd/common/huf.h +65 -156
- data/ext/zstdruby/libzstd/common/mem.h +39 -46
- data/ext/zstdruby/libzstd/common/pool.c +26 -10
- data/ext/zstdruby/libzstd/common/pool.h +7 -1
- data/ext/zstdruby/libzstd/common/portability_macros.h +22 -3
- data/ext/zstdruby/libzstd/common/threading.c +68 -14
- data/ext/zstdruby/libzstd/common/threading.h +5 -10
- data/ext/zstdruby/libzstd/common/xxhash.c +2 -2
- data/ext/zstdruby/libzstd/common/xxhash.h +8 -8
- data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
- data/ext/zstdruby/libzstd/common/zstd_deps.h +1 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +17 -113
- data/ext/zstdruby/libzstd/common/zstd_trace.h +3 -3
- data/ext/zstdruby/libzstd/compress/clevels.h +1 -1
- data/ext/zstdruby/libzstd/compress/fse_compress.c +7 -124
- data/ext/zstdruby/libzstd/compress/hist.c +1 -1
- data/ext/zstdruby/libzstd/compress/hist.h +1 -1
- data/ext/zstdruby/libzstd/compress/huf_compress.c +234 -169
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +1055 -455
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +165 -145
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +115 -39
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -8
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +3 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +5 -3
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +95 -33
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +433 -148
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +306 -283
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +5 -5
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +104 -80
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +12 -5
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +1 -1
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +434 -441
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +30 -39
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +3 -4
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +1 -1
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +164 -42
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +186 -65
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +1 -1
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +4 -2
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +19 -15
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +2 -2
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +9 -87
- data/ext/zstdruby/libzstd/zdict.h +53 -31
- data/ext/zstdruby/libzstd/zstd.h +489 -90
- data/ext/zstdruby/libzstd/zstd_errors.h +27 -8
- data/ext/zstdruby/main.c +4 -0
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +7 -6
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -20,12 +20,12 @@
|
|
20
20
|
#include "../common/mem.h" /* low level memory routines */
|
21
21
|
#define FSE_STATIC_LINKING_ONLY
|
22
22
|
#include "../common/fse.h"
|
23
|
-
#define HUF_STATIC_LINKING_ONLY
|
24
23
|
#include "../common/huf.h"
|
25
24
|
#include "../common/zstd_internal.h"
|
26
25
|
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
|
27
26
|
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
|
28
27
|
#include "zstd_decompress_block.h"
|
28
|
+
#include "../common/bits.h" /* ZSTD_highbit32 */
|
29
29
|
|
30
30
|
/*_*******************************************************
|
31
31
|
* Macros
|
@@ -89,7 +89,7 @@ static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const
|
|
89
89
|
dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
|
90
90
|
}
|
91
91
|
else {
|
92
|
-
/* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
|
92
|
+
/* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
|
93
93
|
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
|
94
94
|
dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
|
95
95
|
}
|
@@ -134,13 +134,16 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
134
134
|
ZSTD_FALLTHROUGH;
|
135
135
|
|
136
136
|
case set_compressed:
|
137
|
-
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE ==
|
137
|
+
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
|
138
138
|
{ size_t lhSize, litSize, litCSize;
|
139
139
|
U32 singleStream=0;
|
140
140
|
U32 const lhlCode = (istart[0] >> 2) & 3;
|
141
141
|
U32 const lhc = MEM_readLE32(istart);
|
142
142
|
size_t hufSuccess;
|
143
143
|
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
144
|
+
int const flags = 0
|
145
|
+
| (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
|
146
|
+
| (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
|
144
147
|
switch(lhlCode)
|
145
148
|
{
|
146
149
|
case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
|
@@ -165,6 +168,10 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
165
168
|
}
|
166
169
|
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
167
170
|
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
171
|
+
if (!singleStream)
|
172
|
+
RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
|
173
|
+
"Not enough literals (%zu) for the 4-streams mode (min %u)",
|
174
|
+
litSize, MIN_LITERALS_FOR_4_STREAMS);
|
168
175
|
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
|
169
176
|
RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
|
170
177
|
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
|
@@ -176,13 +183,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
176
183
|
|
177
184
|
if (litEncType==set_repeat) {
|
178
185
|
if (singleStream) {
|
179
|
-
hufSuccess =
|
186
|
+
hufSuccess = HUF_decompress1X_usingDTable(
|
180
187
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
181
|
-
dctx->HUFptr,
|
188
|
+
dctx->HUFptr, flags);
|
182
189
|
} else {
|
183
|
-
|
190
|
+
assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
|
191
|
+
hufSuccess = HUF_decompress4X_usingDTable(
|
184
192
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
185
|
-
dctx->HUFptr,
|
193
|
+
dctx->HUFptr, flags);
|
186
194
|
}
|
187
195
|
} else {
|
188
196
|
if (singleStream) {
|
@@ -190,18 +198,18 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
190
198
|
hufSuccess = HUF_decompress1X_DCtx_wksp(
|
191
199
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
192
200
|
istart+lhSize, litCSize, dctx->workspace,
|
193
|
-
sizeof(dctx->workspace));
|
201
|
+
sizeof(dctx->workspace), flags);
|
194
202
|
#else
|
195
|
-
hufSuccess =
|
203
|
+
hufSuccess = HUF_decompress1X1_DCtx_wksp(
|
196
204
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
197
205
|
istart+lhSize, litCSize, dctx->workspace,
|
198
|
-
sizeof(dctx->workspace),
|
206
|
+
sizeof(dctx->workspace), flags);
|
199
207
|
#endif
|
200
208
|
} else {
|
201
|
-
hufSuccess =
|
209
|
+
hufSuccess = HUF_decompress4X_hufOnly_wksp(
|
202
210
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
203
211
|
istart+lhSize, litCSize, dctx->workspace,
|
204
|
-
sizeof(dctx->workspace),
|
212
|
+
sizeof(dctx->workspace), flags);
|
205
213
|
}
|
206
214
|
}
|
207
215
|
if (dctx->litBufferLocation == ZSTD_split)
|
@@ -237,6 +245,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
237
245
|
break;
|
238
246
|
case 3:
|
239
247
|
lhSize = 3;
|
248
|
+
RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
|
240
249
|
litSize = MEM_readLE24(istart) >> 4;
|
241
250
|
break;
|
242
251
|
}
|
@@ -279,12 +288,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
279
288
|
break;
|
280
289
|
case 1:
|
281
290
|
lhSize = 2;
|
291
|
+
RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
|
282
292
|
litSize = MEM_readLE16(istart) >> 4;
|
283
293
|
break;
|
284
294
|
case 3:
|
285
295
|
lhSize = 3;
|
296
|
+
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
|
286
297
|
litSize = MEM_readLE24(istart) >> 4;
|
287
|
-
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
|
288
298
|
break;
|
289
299
|
}
|
290
300
|
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
@@ -506,14 +516,15 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
506
516
|
for (i = 8; i < n; i += 8) {
|
507
517
|
MEM_write64(spread + pos + i, sv);
|
508
518
|
}
|
509
|
-
|
519
|
+
assert(n>=0);
|
520
|
+
pos += (size_t)n;
|
510
521
|
}
|
511
522
|
}
|
512
523
|
/* Now we spread those positions across the table.
|
513
|
-
* The benefit of doing it in two stages is that we avoid the
|
524
|
+
* The benefit of doing it in two stages is that we avoid the
|
514
525
|
* variable size inner loop, which caused lots of branch misses.
|
515
526
|
* Now we can run through all the positions without any branch misses.
|
516
|
-
* We unroll the loop twice, since that is what
|
527
|
+
* We unroll the loop twice, since that is what empirically worked best.
|
517
528
|
*/
|
518
529
|
{
|
519
530
|
size_t position = 0;
|
@@ -540,7 +551,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
540
551
|
for (i=0; i<n; i++) {
|
541
552
|
tableDecode[position].baseValue = s;
|
542
553
|
position = (position + step) & tableMask;
|
543
|
-
while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
|
554
|
+
while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
|
544
555
|
} }
|
545
556
|
assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
|
546
557
|
}
|
@@ -551,7 +562,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
551
562
|
for (u=0; u<tableSize; u++) {
|
552
563
|
U32 const symbol = tableDecode[u].baseValue;
|
553
564
|
U32 const nextState = symbolNext[symbol]++;
|
554
|
-
tableDecode[u].nbBits = (BYTE) (tableLog -
|
565
|
+
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
|
555
566
|
tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
|
556
567
|
assert(nbAdditionalBits[symbol] < 255);
|
557
568
|
tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
|
@@ -964,6 +975,11 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
964
975
|
|
965
976
|
assert(op != NULL /* Precondition */);
|
966
977
|
assert(oend_w < oend /* No underflow */);
|
978
|
+
|
979
|
+
#if defined(__aarch64__)
|
980
|
+
/* prefetch sequence starting from match that will be used for copy later */
|
981
|
+
PREFETCH_L1(match);
|
982
|
+
#endif
|
967
983
|
/* Handle edge cases in a slow path:
|
968
984
|
* - Read beyond end of literals
|
969
985
|
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
@@ -1154,7 +1170,7 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16
|
|
1154
1170
|
}
|
1155
1171
|
|
1156
1172
|
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
1157
|
-
* offset bits. But we can only read at most
|
1173
|
+
* offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
|
1158
1174
|
* bits before reloading. This value is the maximum number of bytes we read
|
1159
1175
|
* after reloading when we are decoding long offsets.
|
1160
1176
|
*/
|
@@ -1169,9 +1185,27 @@ FORCE_INLINE_TEMPLATE seq_t
|
|
1169
1185
|
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
1170
1186
|
{
|
1171
1187
|
seq_t seq;
|
1188
|
+
/*
|
1189
|
+
* ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be
|
1190
|
+
* loaded in one operation and extracted its fields by simply shifting or
|
1191
|
+
* bit-extracting on aarch64.
|
1192
|
+
* GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
|
1193
|
+
* operations that cause performance drop. This can be avoided by using this
|
1194
|
+
* ZSTD_memcpy hack.
|
1195
|
+
*/
|
1196
|
+
#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
|
1197
|
+
ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
|
1198
|
+
ZSTD_seqSymbol* const llDInfo = &llDInfoS;
|
1199
|
+
ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
|
1200
|
+
ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
|
1201
|
+
ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
|
1202
|
+
ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
|
1203
|
+
ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
|
1204
|
+
#else
|
1172
1205
|
const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
|
1173
1206
|
const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
|
1174
1207
|
const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
|
1208
|
+
#endif
|
1175
1209
|
seq.matchLength = mlDInfo->baseValue;
|
1176
1210
|
seq.litLength = llDInfo->baseValue;
|
1177
1211
|
{ U32 const ofBase = ofDInfo->baseValue;
|
@@ -1186,9 +1220,13 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
|
1186
1220
|
U32 const llnbBits = llDInfo->nbBits;
|
1187
1221
|
U32 const mlnbBits = mlDInfo->nbBits;
|
1188
1222
|
U32 const ofnbBits = ofDInfo->nbBits;
|
1223
|
+
|
1224
|
+
assert(llBits <= MaxLLBits);
|
1225
|
+
assert(mlBits <= MaxMLBits);
|
1226
|
+
assert(ofBits <= MaxOff);
|
1189
1227
|
/*
|
1190
1228
|
* As gcc has better branch and block analyzers, sometimes it is only
|
1191
|
-
* valuable to mark
|
1229
|
+
* valuable to mark likeliness for clang, it gives around 3-4% of
|
1192
1230
|
* performance.
|
1193
1231
|
*/
|
1194
1232
|
|
@@ -1201,13 +1239,16 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
|
1201
1239
|
#endif
|
1202
1240
|
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
1203
1241
|
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
1204
|
-
|
1242
|
+
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
|
1243
|
+
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
|
1205
1244
|
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
|
1206
|
-
|
1245
|
+
/* Always read extra bits, this keeps the logic simple,
|
1246
|
+
* avoids branches, and avoids accidentally reading 0 bits.
|
1247
|
+
*/
|
1248
|
+
U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
|
1207
1249
|
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
1208
1250
|
BIT_reloadDStream(&seqState->DStream);
|
1209
|
-
|
1210
|
-
assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
|
1251
|
+
offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
1211
1252
|
} else {
|
1212
1253
|
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
1213
1254
|
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
@@ -1552,7 +1593,7 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
|
|
1552
1593
|
const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
|
1553
1594
|
const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
|
1554
1595
|
const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
|
1555
|
-
DEBUGLOG(5, "ZSTD_decompressSequences_body");
|
1596
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
|
1556
1597
|
(void)frame;
|
1557
1598
|
|
1558
1599
|
/* Regen sequences */
|
@@ -1945,34 +1986,79 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
|
1945
1986
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
1946
1987
|
|
1947
1988
|
|
1989
|
+
/**
|
1990
|
+
* @returns The total size of the history referencable by zstd, including
|
1991
|
+
* both the prefix and the extDict. At @p op any offset larger than this
|
1992
|
+
* is invalid.
|
1993
|
+
*/
|
1994
|
+
static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
|
1995
|
+
{
|
1996
|
+
return (size_t)(op - virtualStart);
|
1997
|
+
}
|
1948
1998
|
|
1949
|
-
|
1950
|
-
|
1951
|
-
|
1999
|
+
typedef struct {
|
2000
|
+
unsigned longOffsetShare;
|
2001
|
+
unsigned maxNbAdditionalBits;
|
2002
|
+
} ZSTD_OffsetInfo;
|
2003
|
+
|
2004
|
+
/* ZSTD_getOffsetInfo() :
|
1952
2005
|
* condition : offTable must be valid
|
1953
2006
|
* @return : "share" of long offsets (arbitrarily defined as > (1<<23))
|
1954
|
-
* compared to maximum possible of (1<<OffFSELog)
|
1955
|
-
|
1956
|
-
|
2007
|
+
* compared to maximum possible of (1<<OffFSELog),
|
2008
|
+
* as well as the maximum number additional bits required.
|
2009
|
+
*/
|
2010
|
+
static ZSTD_OffsetInfo
|
2011
|
+
ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
|
1957
2012
|
{
|
1958
|
-
|
1959
|
-
|
1960
|
-
|
1961
|
-
|
1962
|
-
|
1963
|
-
|
1964
|
-
|
1965
|
-
|
1966
|
-
|
1967
|
-
|
2013
|
+
ZSTD_OffsetInfo info = {0, 0};
|
2014
|
+
/* If nbSeq == 0, then the offTable is uninitialized, but we have
|
2015
|
+
* no sequences, so both values should be 0.
|
2016
|
+
*/
|
2017
|
+
if (nbSeq != 0) {
|
2018
|
+
const void* ptr = offTable;
|
2019
|
+
U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
|
2020
|
+
const ZSTD_seqSymbol* table = offTable + 1;
|
2021
|
+
U32 const max = 1 << tableLog;
|
2022
|
+
U32 u;
|
2023
|
+
DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
|
2024
|
+
|
2025
|
+
assert(max <= (1 << OffFSELog)); /* max not too large */
|
2026
|
+
for (u=0; u<max; u++) {
|
2027
|
+
info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
|
2028
|
+
if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
|
2029
|
+
}
|
2030
|
+
|
2031
|
+
assert(tableLog <= OffFSELog);
|
2032
|
+
info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
|
1968
2033
|
}
|
1969
2034
|
|
1970
|
-
|
1971
|
-
|
2035
|
+
return info;
|
2036
|
+
}
|
1972
2037
|
|
1973
|
-
|
2038
|
+
/**
|
2039
|
+
* @returns The maximum offset we can decode in one read of our bitstream, without
|
2040
|
+
* reloading more bits in the middle of the offset bits read. Any offsets larger
|
2041
|
+
* than this must use the long offset decoder.
|
2042
|
+
*/
|
2043
|
+
static size_t ZSTD_maxShortOffset(void)
|
2044
|
+
{
|
2045
|
+
if (MEM_64bits()) {
|
2046
|
+
/* We can decode any offset without reloading bits.
|
2047
|
+
* This might change if the max window size grows.
|
2048
|
+
*/
|
2049
|
+
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
|
2050
|
+
return (size_t)-1;
|
2051
|
+
} else {
|
2052
|
+
/* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
|
2053
|
+
* This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
|
2054
|
+
* Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
|
2055
|
+
*/
|
2056
|
+
size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
|
2057
|
+
size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
|
2058
|
+
assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
|
2059
|
+
return maxOffset;
|
2060
|
+
}
|
1974
2061
|
}
|
1975
|
-
#endif
|
1976
2062
|
|
1977
2063
|
size_t
|
1978
2064
|
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
@@ -1980,20 +2066,21 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1980
2066
|
const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
|
1981
2067
|
{ /* blockType == blockCompressed */
|
1982
2068
|
const BYTE* ip = (const BYTE*)src;
|
1983
|
-
/* isLongOffset must be true if there are long offsets.
|
1984
|
-
* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
|
1985
|
-
* We don't expect that to be the case in 64-bit mode.
|
1986
|
-
* In block mode, window size is not known, so we have to be conservative.
|
1987
|
-
* (note: but it could be evaluated from current-lowLimit)
|
1988
|
-
*/
|
1989
|
-
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
|
1990
2069
|
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
|
1991
2070
|
|
1992
|
-
|
2071
|
+
/* Note : the wording of the specification
|
2072
|
+
* allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX.
|
2073
|
+
* This generally does not happen, as it makes little sense,
|
2074
|
+
* since an uncompressed block would feature same size and have no decompression cost.
|
2075
|
+
* Also, note that decoder from reference libzstd before < v1.5.4
|
2076
|
+
* would consider this edge case as an error.
|
2077
|
+
* As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX
|
2078
|
+
* for broader compatibility with the deployed ecosystem of zstd decoders */
|
2079
|
+
RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
|
1993
2080
|
|
1994
2081
|
/* Decode literals section */
|
1995
2082
|
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
|
1996
|
-
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock :
|
2083
|
+
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
|
1997
2084
|
if (ZSTD_isError(litCSize)) return litCSize;
|
1998
2085
|
ip += litCSize;
|
1999
2086
|
srcSize -= litCSize;
|
@@ -2001,6 +2088,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
2001
2088
|
|
2002
2089
|
/* Build Decoding Tables */
|
2003
2090
|
{
|
2091
|
+
/* Compute the maximum block size, which must also work when !frame and fParams are unset.
|
2092
|
+
* Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
|
2093
|
+
*/
|
2094
|
+
size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX));
|
2095
|
+
size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart);
|
2096
|
+
/* isLongOffset must be true if there are long offsets.
|
2097
|
+
* Offsets are long if they are larger than ZSTD_maxShortOffset().
|
2098
|
+
* We don't expect that to be the case in 64-bit mode.
|
2099
|
+
*
|
2100
|
+
* We check here to see if our history is large enough to allow long offsets.
|
2101
|
+
* If it isn't, then we can't possible have (valid) long offsets. If the offset
|
2102
|
+
* is invalid, then it is okay to read it incorrectly.
|
2103
|
+
*
|
2104
|
+
* If isLongOffsets is true, then we will later check our decoding table to see
|
2105
|
+
* if it is even possible to generate long offsets.
|
2106
|
+
*/
|
2107
|
+
ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
|
2004
2108
|
/* These macros control at build-time which decompressor implementation
|
2005
2109
|
* we use. If neither is defined, we do some inspection and dispatch at
|
2006
2110
|
* runtime.
|
@@ -2008,6 +2112,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
2008
2112
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
2009
2113
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
2010
2114
|
int usePrefetchDecoder = dctx->ddictIsCold;
|
2115
|
+
#else
|
2116
|
+
/* Set to 1 to avoid computing offset info if we don't need to.
|
2117
|
+
* Otherwise this value is ignored.
|
2118
|
+
*/
|
2119
|
+
int usePrefetchDecoder = 1;
|
2011
2120
|
#endif
|
2012
2121
|
int nbSeq;
|
2013
2122
|
size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
|
@@ -2017,26 +2126,38 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
2017
2126
|
|
2018
2127
|
RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
|
2019
2128
|
|
2020
|
-
|
2021
|
-
|
2022
|
-
|
2023
|
-
|
2024
|
-
|
2025
|
-
|
2026
|
-
|
2027
|
-
|
2129
|
+
/* If we could potentially have long offsets, or we might want to use the prefetch decoder,
|
2130
|
+
* compute information about the share of long offsets, and the maximum nbAdditionalBits.
|
2131
|
+
* NOTE: could probably use a larger nbSeq limit
|
2132
|
+
*/
|
2133
|
+
if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
|
2134
|
+
ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
|
2135
|
+
if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
|
2136
|
+
/* If isLongOffset, but the maximum number of additional bits that we see in our table is small
|
2137
|
+
* enough, then we know it is impossible to have too long an offset in this block, so we can
|
2138
|
+
* use the regular offset decoder.
|
2139
|
+
*/
|
2140
|
+
isLongOffset = ZSTD_lo_isRegularOffset;
|
2141
|
+
}
|
2142
|
+
if (!usePrefetchDecoder) {
|
2143
|
+
U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
|
2144
|
+
usePrefetchDecoder = (info.longOffsetShare >= minShare);
|
2145
|
+
}
|
2028
2146
|
}
|
2029
|
-
#endif
|
2030
2147
|
|
2031
2148
|
dctx->ddictIsCold = 0;
|
2032
2149
|
|
2033
2150
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
2034
2151
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
2035
|
-
if (usePrefetchDecoder)
|
2152
|
+
if (usePrefetchDecoder) {
|
2153
|
+
#else
|
2154
|
+
(void)usePrefetchDecoder;
|
2155
|
+
{
|
2036
2156
|
#endif
|
2037
2157
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
2038
2158
|
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
2039
2159
|
#endif
|
2160
|
+
}
|
2040
2161
|
|
2041
2162
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
2042
2163
|
/* else */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -75,12 +75,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
|
|
75
75
|
|
76
76
|
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
|
77
77
|
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
|
78
|
+
#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
|
78
79
|
|
79
80
|
typedef struct {
|
80
81
|
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
81
82
|
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
82
83
|
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
83
|
-
HUF_DTable hufTable[HUF_DTABLE_SIZE(
|
84
|
+
HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
|
84
85
|
U32 rep[ZSTD_REP_NUM];
|
85
86
|
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
86
87
|
} ZSTD_entropyDTables_t;
|
@@ -164,6 +165,7 @@ struct ZSTD_DCtx_s
|
|
164
165
|
ZSTD_dictUses_e dictUses;
|
165
166
|
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
166
167
|
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
168
|
+
int disableHufAsm;
|
167
169
|
|
168
170
|
/* streaming */
|
169
171
|
ZSTD_dStreamStage streamStage;
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -34,6 +34,7 @@
|
|
34
34
|
#include "../common/pool.h"
|
35
35
|
#include "../common/threading.h"
|
36
36
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
37
|
+
#include "../common/bits.h" /* ZSTD_highbit32 */
|
37
38
|
#include "../zdict.h"
|
38
39
|
#include "cover.h"
|
39
40
|
|
@@ -541,7 +542,7 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
|
|
541
542
|
|
542
543
|
/**
|
543
544
|
* Prepare a context for dictionary building.
|
544
|
-
* The context is only dependent on the parameter `d` and can used multiple
|
545
|
+
* The context is only dependent on the parameter `d` and can be used multiple
|
545
546
|
* times.
|
546
547
|
* Returns 0 on success or error code on error.
|
547
548
|
* The context must be destroyed with `COVER_ctx_destroy()`.
|
@@ -646,7 +647,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
646
647
|
|
647
648
|
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
|
648
649
|
{
|
649
|
-
const double ratio = (double)nbDmers / maxDictSize;
|
650
|
+
const double ratio = (double)nbDmers / (double)maxDictSize;
|
650
651
|
if (ratio >= 10) {
|
651
652
|
return;
|
652
653
|
}
|
@@ -950,9 +951,17 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
|
950
951
|
}
|
951
952
|
}
|
952
953
|
|
954
|
+
static COVER_dictSelection_t setDictSelection(BYTE* buf, size_t s, size_t csz)
|
955
|
+
{
|
956
|
+
COVER_dictSelection_t ds;
|
957
|
+
ds.dictContent = buf;
|
958
|
+
ds.dictSize = s;
|
959
|
+
ds.totalCompressedSize = csz;
|
960
|
+
return ds;
|
961
|
+
}
|
962
|
+
|
953
963
|
COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
|
954
|
-
|
955
|
-
return selection;
|
964
|
+
return setDictSelection(NULL, 0, error);
|
956
965
|
}
|
957
966
|
|
958
967
|
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
|
@@ -1005,9 +1014,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
|
|
1005
1014
|
}
|
1006
1015
|
|
1007
1016
|
if (params.shrinkDict == 0) {
|
1008
|
-
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
1009
1017
|
free(candidateDictBuffer);
|
1010
|
-
return
|
1018
|
+
return setDictSelection(largestDictbuffer, dictContentSize, totalCompressedSize);
|
1011
1019
|
}
|
1012
1020
|
|
1013
1021
|
largestDict = dictContentSize;
|
@@ -1039,20 +1047,16 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
|
|
1039
1047
|
return COVER_dictSelectionError(totalCompressedSize);
|
1040
1048
|
}
|
1041
1049
|
|
1042
|
-
if (totalCompressedSize <= largestCompressed * regressionTolerance) {
|
1043
|
-
COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
|
1050
|
+
if ((double)totalCompressedSize <= (double)largestCompressed * regressionTolerance) {
|
1044
1051
|
free(largestDictbuffer);
|
1045
|
-
return
|
1052
|
+
return setDictSelection( candidateDictBuffer, dictContentSize, totalCompressedSize );
|
1046
1053
|
}
|
1047
1054
|
dictContentSize *= 2;
|
1048
1055
|
}
|
1049
1056
|
dictContentSize = largestDict;
|
1050
1057
|
totalCompressedSize = largestCompressed;
|
1051
|
-
|
1052
|
-
|
1053
|
-
free(candidateDictBuffer);
|
1054
|
-
return selection;
|
1055
|
-
}
|
1058
|
+
free(candidateDictBuffer);
|
1059
|
+
return setDictSelection( largestDictbuffer, dictContentSize, totalCompressedSize );
|
1056
1060
|
}
|
1057
1061
|
|
1058
1062
|
/**
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -304,7 +304,7 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
|
|
304
304
|
|
305
305
|
/**
|
306
306
|
* Prepare a context for dictionary building.
|
307
|
-
* The context is only dependent on the parameter `d` and can used multiple
|
307
|
+
* The context is only dependent on the parameter `d` and can be used multiple
|
308
308
|
* times.
|
309
309
|
* Returns 0 on success or error code on error.
|
310
310
|
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|