zstd-ruby 1.4.4.0 → 1.5.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +74 -97
- data/ext/zstdruby/libzstd/common/compiler.h +219 -20
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +184 -80
- data/ext/zstdruby/libzstd/common/error_private.c +11 -2
- data/ext/zstdruby/libzstd/common/error_private.h +87 -4
- data/ext/zstdruby/libzstd/common/fse.h +47 -116
- data/ext/zstdruby/libzstd/common/fse_decompress.c +127 -127
- data/ext/zstdruby/libzstd/common/huf.h +112 -197
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +11 -5
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +78 -22
- data/ext/zstdruby/libzstd/common/threading.h +9 -13
- data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
- data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +186 -144
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +99 -196
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +968 -331
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +4120 -1191
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +688 -159
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +121 -40
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +62 -35
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +577 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +322 -115
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +394 -154
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -253
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1289 -247
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +339 -212
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +508 -282
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +217 -466
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +35 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1220 -572
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +23 -19
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +859 -273
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1244 -375
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +21 -7
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +74 -11
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +75 -54
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +55 -36
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +126 -110
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +248 -56
- data/ext/zstdruby/libzstd/zstd.h +1277 -306
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +29 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +24 -39
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -289
- data/ext/zstdruby/libzstd/README.md +0 -159
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -47
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2152
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3514
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3156
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3641
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4046
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4150
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4533
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -15,9 +15,9 @@
|
|
15
15
|
/*-*******************************************************
|
16
16
|
* Dependencies
|
17
17
|
*********************************************************/
|
18
|
-
#include
|
19
|
-
#include "zstd.h" /* DCtx, and some public functions */
|
20
|
-
#include "zstd_internal.h" /* blockProperties_t, and some public functions */
|
18
|
+
#include "../common/zstd_deps.h" /* size_t */
|
19
|
+
#include "../zstd.h" /* DCtx, and some public functions */
|
20
|
+
#include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
|
21
21
|
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
|
22
22
|
|
23
23
|
|
@@ -33,6 +33,12 @@
|
|
33
33
|
*/
|
34
34
|
|
35
35
|
|
36
|
+
/* Streaming state is used to inform allocation of the literal buffer */
|
37
|
+
typedef enum {
|
38
|
+
not_streaming = 0,
|
39
|
+
is_streaming = 1
|
40
|
+
} streaming_operation;
|
41
|
+
|
36
42
|
/* ZSTD_decompressBlock_internal() :
|
37
43
|
* decompress block, starting at `src`,
|
38
44
|
* into destination buffer `dst`.
|
@@ -41,19 +47,27 @@
|
|
41
47
|
*/
|
42
48
|
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
43
49
|
void* dst, size_t dstCapacity,
|
44
|
-
const void* src, size_t srcSize, const int frame);
|
50
|
+
const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
|
45
51
|
|
46
52
|
/* ZSTD_buildFSETable() :
|
47
53
|
* generate FSE decoding table for one symbol (ll, ml or off)
|
48
54
|
* this function must be called with valid parameters only
|
49
55
|
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
|
50
56
|
* in which case it cannot fail.
|
57
|
+
* The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
|
58
|
+
* defined in zstd_decompress_internal.h.
|
51
59
|
* Internal use only.
|
52
60
|
*/
|
53
61
|
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
54
62
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
55
|
-
const U32* baseValue, const
|
56
|
-
unsigned tableLog
|
63
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
64
|
+
unsigned tableLog, void* wksp, size_t wkspSize,
|
65
|
+
int bmi2);
|
66
|
+
|
67
|
+
/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
|
68
|
+
size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
|
69
|
+
void* dst, size_t dstCapacity,
|
70
|
+
const void* src, size_t srcSize);
|
57
71
|
|
58
72
|
|
59
73
|
#endif /* ZSTD_DEC_BLOCK_H */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -19,34 +19,34 @@
|
|
19
19
|
/*-*******************************************************
|
20
20
|
* Dependencies
|
21
21
|
*********************************************************/
|
22
|
-
#include "mem.h" /* BYTE, U16, U32 */
|
23
|
-
#include "zstd_internal.h" /*
|
22
|
+
#include "../common/mem.h" /* BYTE, U16, U32 */
|
23
|
+
#include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
|
24
24
|
|
25
25
|
|
26
26
|
|
27
27
|
/*-*******************************************************
|
28
28
|
* Constants
|
29
29
|
*********************************************************/
|
30
|
-
static const U32 LL_base[MaxLL+1] = {
|
30
|
+
static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
|
31
31
|
0, 1, 2, 3, 4, 5, 6, 7,
|
32
32
|
8, 9, 10, 11, 12, 13, 14, 15,
|
33
33
|
16, 18, 20, 22, 24, 28, 32, 40,
|
34
34
|
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
|
35
35
|
0x2000, 0x4000, 0x8000, 0x10000 };
|
36
36
|
|
37
|
-
static const U32 OF_base[MaxOff+1] = {
|
37
|
+
static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
|
38
38
|
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
39
39
|
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
40
40
|
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
41
41
|
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
42
42
|
|
43
|
-
static const
|
43
|
+
static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
|
44
44
|
0, 1, 2, 3, 4, 5, 6, 7,
|
45
45
|
8, 9, 10, 11, 12, 13, 14, 15,
|
46
46
|
16, 17, 18, 19, 20, 21, 22, 23,
|
47
47
|
24, 25, 26, 27, 28, 29, 30, 31 };
|
48
48
|
|
49
|
-
static const U32 ML_base[MaxML+1] = {
|
49
|
+
static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
|
50
50
|
3, 4, 5, 6, 7, 8, 9, 10,
|
51
51
|
11, 12, 13, 14, 15, 16, 17, 18,
|
52
52
|
19, 20, 21, 22, 23, 24, 25, 26,
|
@@ -73,12 +73,17 @@ static const U32 ML_base[MaxML+1] = {
|
|
73
73
|
|
74
74
|
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
|
75
75
|
|
76
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
|
77
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
|
78
|
+
#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
|
79
|
+
|
76
80
|
typedef struct {
|
77
81
|
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
78
82
|
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
79
83
|
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
80
|
-
HUF_DTable hufTable[HUF_DTABLE_SIZE(
|
84
|
+
HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
|
81
85
|
U32 rep[ZSTD_REP_NUM];
|
86
|
+
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
82
87
|
} ZSTD_entropyDTables_t;
|
83
88
|
|
84
89
|
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
@@ -95,6 +100,29 @@ typedef enum {
|
|
95
100
|
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
96
101
|
} ZSTD_dictUses_e;
|
97
102
|
|
103
|
+
/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
|
104
|
+
typedef struct {
|
105
|
+
const ZSTD_DDict** ddictPtrTable;
|
106
|
+
size_t ddictPtrTableSize;
|
107
|
+
size_t ddictPtrCount;
|
108
|
+
} ZSTD_DDictHashSet;
|
109
|
+
|
110
|
+
#ifndef ZSTD_DECODER_INTERNAL_BUFFER
|
111
|
+
# define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
|
112
|
+
#endif
|
113
|
+
|
114
|
+
#define ZSTD_LBMIN 64
|
115
|
+
#define ZSTD_LBMAX (128 << 10)
|
116
|
+
|
117
|
+
/* extra buffer, compensates when dst is not large enough to store litBuffer */
|
118
|
+
#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
|
119
|
+
|
120
|
+
typedef enum {
|
121
|
+
ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
|
122
|
+
ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
|
123
|
+
ZSTD_split = 2 /* Split between litExtraBuffer and dst */
|
124
|
+
} ZSTD_litLocation_e;
|
125
|
+
|
98
126
|
struct ZSTD_DCtx_s
|
99
127
|
{
|
100
128
|
const ZSTD_seqSymbol* LLTptr;
|
@@ -109,6 +137,7 @@ struct ZSTD_DCtx_s
|
|
109
137
|
const void* dictEnd; /* end of previous segment */
|
110
138
|
size_t expected;
|
111
139
|
ZSTD_frameHeader fParams;
|
140
|
+
U64 processedCSize;
|
112
141
|
U64 decodedSize;
|
113
142
|
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
|
114
143
|
ZSTD_dStage stage;
|
@@ -117,12 +146,16 @@ struct ZSTD_DCtx_s
|
|
117
146
|
XXH64_state_t xxhState;
|
118
147
|
size_t headerSize;
|
119
148
|
ZSTD_format_e format;
|
149
|
+
ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
|
150
|
+
U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
|
120
151
|
const BYTE* litPtr;
|
121
152
|
ZSTD_customMem customMem;
|
122
153
|
size_t litSize;
|
123
154
|
size_t rleSize;
|
124
155
|
size_t staticSize;
|
156
|
+
#if DYNAMIC_BMI2 != 0
|
125
157
|
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
|
158
|
+
#endif
|
126
159
|
|
127
160
|
/* dictionary */
|
128
161
|
ZSTD_DDict* ddictLocal;
|
@@ -130,6 +163,9 @@ struct ZSTD_DCtx_s
|
|
130
163
|
U32 dictID;
|
131
164
|
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
132
165
|
ZSTD_dictUses_e dictUses;
|
166
|
+
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
167
|
+
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
168
|
+
int disableHufAsm;
|
133
169
|
|
134
170
|
/* streaming */
|
135
171
|
ZSTD_dStreamStage streamStage;
|
@@ -142,17 +178,44 @@ struct ZSTD_DCtx_s
|
|
142
178
|
size_t outStart;
|
143
179
|
size_t outEnd;
|
144
180
|
size_t lhSize;
|
181
|
+
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
145
182
|
void* legacyContext;
|
146
183
|
U32 previousLegacyVersion;
|
147
184
|
U32 legacyVersion;
|
185
|
+
#endif
|
148
186
|
U32 hostageByte;
|
149
187
|
int noForwardProgress;
|
188
|
+
ZSTD_bufferMode_e outBufferMode;
|
189
|
+
ZSTD_outBuffer expectedOutBuffer;
|
150
190
|
|
151
191
|
/* workspace */
|
152
|
-
BYTE litBuffer
|
192
|
+
BYTE* litBuffer;
|
193
|
+
const BYTE* litBufferEnd;
|
194
|
+
ZSTD_litLocation_e litBufferLocation;
|
195
|
+
BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
|
153
196
|
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
197
|
+
|
198
|
+
size_t oversizedDuration;
|
199
|
+
|
200
|
+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
201
|
+
void const* dictContentBeginForFuzzing;
|
202
|
+
void const* dictContentEndForFuzzing;
|
203
|
+
#endif
|
204
|
+
|
205
|
+
/* Tracing */
|
206
|
+
#if ZSTD_TRACE
|
207
|
+
ZSTD_TraceCtx traceCtx;
|
208
|
+
#endif
|
154
209
|
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
155
210
|
|
211
|
+
MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
|
212
|
+
#if DYNAMIC_BMI2 != 0
|
213
|
+
return dctx->bmi2;
|
214
|
+
#else
|
215
|
+
(void)dctx;
|
216
|
+
return 0;
|
217
|
+
#endif
|
218
|
+
}
|
156
219
|
|
157
220
|
/*-*******************************************************
|
158
221
|
* Shared internal functions
|
@@ -160,7 +223,7 @@ struct ZSTD_DCtx_s
|
|
160
223
|
|
161
224
|
/*! ZSTD_loadDEntropy() :
|
162
225
|
* dict : must point at beginning of a valid zstd dictionary.
|
163
|
-
* @return : size of entropy tables
|
226
|
+
* @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
|
164
227
|
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
165
228
|
const void* const dict, size_t const dictSize);
|
166
229
|
|
@@ -169,7 +232,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|
169
232
|
* If yes, do nothing (continue on current segment).
|
170
233
|
* If not, classify previous segment as "external dictionary", and start a new segment.
|
171
234
|
* This function cannot fail. */
|
172
|
-
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
|
235
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
|
173
236
|
|
174
237
|
|
175
238
|
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -26,47 +26,65 @@
|
|
26
26
|
#include <string.h> /* memset */
|
27
27
|
#include <time.h> /* clock */
|
28
28
|
|
29
|
-
#include "mem.h" /* read */
|
30
|
-
#include "pool.h"
|
31
|
-
#include "threading.h"
|
32
|
-
#include "cover.h"
|
33
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
34
29
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
35
|
-
#define ZDICT_STATIC_LINKING_ONLY
|
30
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
36
31
|
#endif
|
37
|
-
|
32
|
+
|
33
|
+
#include "../common/mem.h" /* read */
|
34
|
+
#include "../common/pool.h"
|
35
|
+
#include "../common/threading.h"
|
36
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
37
|
+
#include "../common/bits.h" /* ZSTD_highbit32 */
|
38
|
+
#include "../zdict.h"
|
39
|
+
#include "cover.h"
|
38
40
|
|
39
41
|
/*-*************************************
|
40
42
|
* Constants
|
41
43
|
***************************************/
|
44
|
+
/**
|
45
|
+
* There are 32bit indexes used to ref samples, so limit samples size to 4GB
|
46
|
+
* on 64bit builds.
|
47
|
+
* For 32bit builds we choose 1 GB.
|
48
|
+
* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
|
49
|
+
* contiguous buffer, so 1GB is already a high limit.
|
50
|
+
*/
|
42
51
|
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
43
|
-
#define
|
52
|
+
#define COVER_DEFAULT_SPLITPOINT 1.0
|
44
53
|
|
45
54
|
/*-*************************************
|
46
55
|
* Console display
|
47
56
|
***************************************/
|
48
|
-
|
57
|
+
#ifndef LOCALDISPLAYLEVEL
|
58
|
+
static int g_displayLevel = 0;
|
59
|
+
#endif
|
60
|
+
#undef DISPLAY
|
49
61
|
#define DISPLAY(...) \
|
50
62
|
{ \
|
51
63
|
fprintf(stderr, __VA_ARGS__); \
|
52
64
|
fflush(stderr); \
|
53
65
|
}
|
66
|
+
#undef LOCALDISPLAYLEVEL
|
54
67
|
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
55
68
|
if (displayLevel >= l) { \
|
56
69
|
DISPLAY(__VA_ARGS__); \
|
57
70
|
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
71
|
+
#undef DISPLAYLEVEL
|
58
72
|
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
59
73
|
|
74
|
+
#ifndef LOCALDISPLAYUPDATE
|
75
|
+
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
76
|
+
static clock_t g_time = 0;
|
77
|
+
#endif
|
78
|
+
#undef LOCALDISPLAYUPDATE
|
60
79
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
61
80
|
if (displayLevel >= l) { \
|
62
|
-
if ((clock() - g_time >
|
81
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
63
82
|
g_time = clock(); \
|
64
83
|
DISPLAY(__VA_ARGS__); \
|
65
84
|
} \
|
66
85
|
}
|
86
|
+
#undef DISPLAYUPDATE
|
67
87
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
68
|
-
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
69
|
-
static clock_t g_time = 0;
|
70
88
|
|
71
89
|
/*-*************************************
|
72
90
|
* Hash table
|
@@ -120,9 +138,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
|
|
120
138
|
/**
|
121
139
|
* Internal hash function
|
122
140
|
*/
|
123
|
-
static const U32
|
141
|
+
static const U32 COVER_prime4bytes = 2654435761U;
|
124
142
|
static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
|
125
|
-
return (key *
|
143
|
+
return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
|
126
144
|
}
|
127
145
|
|
128
146
|
/**
|
@@ -215,7 +233,7 @@ typedef struct {
|
|
215
233
|
} COVER_ctx_t;
|
216
234
|
|
217
235
|
/* We need a global context for qsort... */
|
218
|
-
static COVER_ctx_t *
|
236
|
+
static COVER_ctx_t *g_coverCtx = NULL;
|
219
237
|
|
220
238
|
/*-*************************************
|
221
239
|
* Helper functions
|
@@ -258,11 +276,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
|
258
276
|
|
259
277
|
/**
|
260
278
|
* Same as COVER_cmp() except ties are broken by pointer value
|
261
|
-
* NOTE:
|
279
|
+
* NOTE: g_coverCtx must be set to call this function. A global is required because
|
262
280
|
* qsort doesn't take an opaque pointer.
|
263
281
|
*/
|
264
|
-
static int COVER_strict_cmp(const void *lp, const void *rp) {
|
265
|
-
int result = COVER_cmp(
|
282
|
+
static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
|
283
|
+
int result = COVER_cmp(g_coverCtx, lp, rp);
|
266
284
|
if (result == 0) {
|
267
285
|
result = lp < rp ? -1 : 1;
|
268
286
|
}
|
@@ -271,8 +289,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
|
|
271
289
|
/**
|
272
290
|
* Faster version for d <= 8.
|
273
291
|
*/
|
274
|
-
static int COVER_strict_cmp8(const void *lp, const void *rp) {
|
275
|
-
int result = COVER_cmp8(
|
292
|
+
static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
|
293
|
+
int result = COVER_cmp8(g_coverCtx, lp, rp);
|
276
294
|
if (result == 0) {
|
277
295
|
result = lp < rp ? -1 : 1;
|
278
296
|
}
|
@@ -524,7 +542,7 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
|
|
524
542
|
|
525
543
|
/**
|
526
544
|
* Prepare a context for dictionary building.
|
527
|
-
* The context is only dependent on the parameter `d` and can used multiple
|
545
|
+
* The context is only dependent on the parameter `d` and can be used multiple
|
528
546
|
* times.
|
529
547
|
* Returns 0 on success or error code on error.
|
530
548
|
* The context must be destroyed with `COVER_ctx_destroy()`.
|
@@ -603,7 +621,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
603
621
|
/* qsort doesn't take an opaque pointer, so pass as a global.
|
604
622
|
* On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
|
605
623
|
*/
|
606
|
-
|
624
|
+
g_coverCtx = ctx;
|
607
625
|
#if defined(__OpenBSD__)
|
608
626
|
mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
609
627
|
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
|
@@ -629,7 +647,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
629
647
|
|
630
648
|
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
|
631
649
|
{
|
632
|
-
const double ratio = (double)nbDmers / maxDictSize;
|
650
|
+
const double ratio = (double)nbDmers / (double)maxDictSize;
|
633
651
|
if (ratio >= 10) {
|
634
652
|
return;
|
635
653
|
}
|
@@ -725,7 +743,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
725
743
|
COVER_map_t activeDmers;
|
726
744
|
parameters.splitPoint = 1.0;
|
727
745
|
/* Initialize global data */
|
728
|
-
g_displayLevel = parameters.zParams.notificationLevel;
|
746
|
+
g_displayLevel = (int)parameters.zParams.notificationLevel;
|
729
747
|
/* Checks */
|
730
748
|
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
731
749
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
@@ -933,9 +951,17 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
|
933
951
|
}
|
934
952
|
}
|
935
953
|
|
954
|
+
static COVER_dictSelection_t setDictSelection(BYTE* buf, size_t s, size_t csz)
|
955
|
+
{
|
956
|
+
COVER_dictSelection_t ds;
|
957
|
+
ds.dictContent = buf;
|
958
|
+
ds.dictSize = s;
|
959
|
+
ds.totalCompressedSize = csz;
|
960
|
+
return ds;
|
961
|
+
}
|
962
|
+
|
936
963
|
COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
|
937
|
-
|
938
|
-
return selection;
|
964
|
+
return setDictSelection(NULL, 0, error);
|
939
965
|
}
|
940
966
|
|
941
967
|
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
|
@@ -946,7 +972,7 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
|
|
946
972
|
free(selection.dictContent);
|
947
973
|
}
|
948
974
|
|
949
|
-
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
975
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
|
950
976
|
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
951
977
|
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
|
952
978
|
|
@@ -954,8 +980,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
954
980
|
size_t largestCompressed = 0;
|
955
981
|
BYTE* customDictContentEnd = customDictContent + dictContentSize;
|
956
982
|
|
957
|
-
BYTE * largestDictbuffer = (BYTE *)malloc(
|
958
|
-
BYTE * candidateDictBuffer = (BYTE *)malloc(
|
983
|
+
BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
|
984
|
+
BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
|
959
985
|
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
|
960
986
|
|
961
987
|
if (!largestDictbuffer || !candidateDictBuffer) {
|
@@ -967,7 +993,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
967
993
|
/* Initial dictionary size and compressed size */
|
968
994
|
memcpy(largestDictbuffer, customDictContent, dictContentSize);
|
969
995
|
dictContentSize = ZDICT_finalizeDictionary(
|
970
|
-
largestDictbuffer,
|
996
|
+
largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
|
971
997
|
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
972
998
|
|
973
999
|
if (ZDICT_isError(dictContentSize)) {
|
@@ -988,9 +1014,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
988
1014
|
}
|
989
1015
|
|
990
1016
|
if (params.shrinkDict == 0) {
|
991
|
-
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
992
1017
|
free(candidateDictBuffer);
|
993
|
-
return
|
1018
|
+
return setDictSelection(largestDictbuffer, dictContentSize, totalCompressedSize);
|
994
1019
|
}
|
995
1020
|
|
996
1021
|
largestDict = dictContentSize;
|
@@ -1001,7 +1026,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
1001
1026
|
while (dictContentSize < largestDict) {
|
1002
1027
|
memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
|
1003
1028
|
dictContentSize = ZDICT_finalizeDictionary(
|
1004
|
-
candidateDictBuffer,
|
1029
|
+
candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
|
1005
1030
|
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
1006
1031
|
|
1007
1032
|
if (ZDICT_isError(dictContentSize)) {
|
@@ -1022,20 +1047,16 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
1022
1047
|
return COVER_dictSelectionError(totalCompressedSize);
|
1023
1048
|
}
|
1024
1049
|
|
1025
|
-
if (totalCompressedSize <= largestCompressed * regressionTolerance) {
|
1026
|
-
COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
|
1050
|
+
if ((double)totalCompressedSize <= (double)largestCompressed * regressionTolerance) {
|
1027
1051
|
free(largestDictbuffer);
|
1028
|
-
return
|
1052
|
+
return setDictSelection( candidateDictBuffer, dictContentSize, totalCompressedSize );
|
1029
1053
|
}
|
1030
1054
|
dictContentSize *= 2;
|
1031
1055
|
}
|
1032
1056
|
dictContentSize = largestDict;
|
1033
1057
|
totalCompressedSize = largestCompressed;
|
1034
|
-
|
1035
|
-
|
1036
|
-
free(candidateDictBuffer);
|
1037
|
-
return selection;
|
1038
|
-
}
|
1058
|
+
free(candidateDictBuffer);
|
1059
|
+
return setDictSelection( largestDictbuffer, dictContentSize, totalCompressedSize );
|
1039
1060
|
}
|
1040
1061
|
|
1041
1062
|
/**
|
@@ -1053,18 +1074,19 @@ typedef struct COVER_tryParameters_data_s {
|
|
1053
1074
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
1054
1075
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
1055
1076
|
*/
|
1056
|
-
static void COVER_tryParameters(void *opaque)
|
1077
|
+
static void COVER_tryParameters(void *opaque)
|
1078
|
+
{
|
1057
1079
|
/* Save parameters as local variables */
|
1058
|
-
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t
|
1080
|
+
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
|
1059
1081
|
const COVER_ctx_t *const ctx = data->ctx;
|
1060
1082
|
const ZDICT_cover_params_t parameters = data->parameters;
|
1061
1083
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
1062
1084
|
size_t totalCompressedSize = ERROR(GENERIC);
|
1063
1085
|
/* Allocate space for hash table, dict, and freqs */
|
1064
1086
|
COVER_map_t activeDmers;
|
1065
|
-
BYTE
|
1087
|
+
BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
|
1066
1088
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
1067
|
-
U32
|
1089
|
+
U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
|
1068
1090
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
1069
1091
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
1070
1092
|
goto _cleanup;
|
@@ -1079,7 +1101,7 @@ static void COVER_tryParameters(void *opaque) {
|
|
1079
1101
|
{
|
1080
1102
|
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
1081
1103
|
dictBufferCapacity, parameters);
|
1082
|
-
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
1104
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
1083
1105
|
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
1084
1106
|
totalCompressedSize);
|
1085
1107
|
|
@@ -1094,19 +1116,18 @@ _cleanup:
|
|
1094
1116
|
free(data);
|
1095
1117
|
COVER_map_destroy(&activeDmers);
|
1096
1118
|
COVER_dictSelectionFree(selection);
|
1097
|
-
|
1098
|
-
free(freqs);
|
1099
|
-
}
|
1119
|
+
free(freqs);
|
1100
1120
|
}
|
1101
1121
|
|
1102
1122
|
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
1103
|
-
void
|
1104
|
-
const size_t
|
1105
|
-
ZDICT_cover_params_t
|
1123
|
+
void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
|
1124
|
+
const size_t* samplesSizes, unsigned nbSamples,
|
1125
|
+
ZDICT_cover_params_t* parameters)
|
1126
|
+
{
|
1106
1127
|
/* constants */
|
1107
1128
|
const unsigned nbThreads = parameters->nbThreads;
|
1108
1129
|
const double splitPoint =
|
1109
|
-
parameters->splitPoint <= 0.0 ?
|
1130
|
+
parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
1110
1131
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
1111
1132
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
1112
1133
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
@@ -1,15 +1,26 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
11
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
12
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
13
|
+
#endif
|
14
|
+
|
1
15
|
#include <stdio.h> /* fprintf */
|
2
16
|
#include <stdlib.h> /* malloc, free, qsort */
|
3
17
|
#include <string.h> /* memset */
|
4
18
|
#include <time.h> /* clock */
|
5
|
-
#include "mem.h" /* read */
|
6
|
-
#include "pool.h"
|
7
|
-
#include "threading.h"
|
8
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
9
|
-
#
|
10
|
-
#define ZDICT_STATIC_LINKING_ONLY
|
11
|
-
#endif
|
12
|
-
#include "zdict.h"
|
19
|
+
#include "../common/mem.h" /* read */
|
20
|
+
#include "../common/pool.h"
|
21
|
+
#include "../common/threading.h"
|
22
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
23
|
+
#include "../zdict.h"
|
13
24
|
|
14
25
|
/**
|
15
26
|
* COVER_best_t is used for two purposes:
|
@@ -142,6 +153,6 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
|
|
142
153
|
* smallest dictionary within a specified regression of the compressed size
|
143
154
|
* from the largest dictionary.
|
144
155
|
*/
|
145
|
-
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
156
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
|
146
157
|
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
147
158
|
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
|
@@ -1576,7 +1576,7 @@ note:
|
|
1576
1576
|
/* Construct the inverse suffix array of type B* suffixes using trsort. */
|
1577
1577
|
trsort(ISAb, SA, m, 1);
|
1578
1578
|
|
1579
|
-
/* Set the sorted order of
|
1579
|
+
/* Set the sorted order of type B* suffixes. */
|
1580
1580
|
for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
|
1581
1581
|
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
|
1582
1582
|
if(0 <= i) {
|